From 454b14c68f04aa8341806a2ed41824a522e91bf7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Mon, 16 May 2011 23:29:40 +0200 Subject: [PATCH 01/11] icons for Polish recipes --- recipes/icons/osnews_pl.png | Bin 0 -> 1006 bytes recipes/icons/rmf24_opinie.png | Bin 0 -> 722 bytes recipes/icons/swiatkindle.png | Bin 0 -> 425 bytes 3 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 recipes/icons/osnews_pl.png create mode 100644 recipes/icons/rmf24_opinie.png create mode 100644 recipes/icons/swiatkindle.png diff --git a/recipes/icons/osnews_pl.png b/recipes/icons/osnews_pl.png new file mode 100644 index 0000000000000000000000000000000000000000..157bbefa727e9e1fa700369009b21592d44ecc83 GIT binary patch literal 1006 zcmeAS@N?(olHy`uVBq!ia0vp^0w65F1|noMXEwRgbDb6jnWdSzjJF*K(&S!|It+mE5*9Zc$b=J~9WgAWf5-kGCf=;pFzYiYrQ%6;)#s*`7_ozBiy_frv?wQbw9 z)2T)$pLC1!%g61Wc2-Z{e6_Y|e(nE%Urds>$@UwU{VFlal$i7WZEnsKFLw6y^L0N0 zG-fcaw?+xrRE22V*!KF_ z=i1si#d@bFPtL!$X8rZg=jV7X=Vi7ifA{C-yS($a?`+XIK69R$l9H5kbkwQm?QQ$& z^DCJSAD($;W3sUD-MhQiX_)76tk+SW+}W~<$-QsIDxK?}H{YDBF3haFOJq)9Gco^$N4y?uXsZbHF_D=QbfzpguMz~jnz$e?QxhsSc$-eQMI z9sk0XYceXD{YrLS_Hx%g{+L}??$`gZxNv*6*U`rg?(OS}WQzoZ z9_MsER%CfDbZSQZ{cXD33>`^^2TWv_hcS1YN1K%6^~D;KFP4U~PMfxE zySC_pT&CBrckiCYTJvYe*3?-CHm9FHJ$tspS}~Ty8#@xhr?1oIy7=SA2@gH?JG-Wy zEGcC)HqI3i=`}`^M+z!^5~(kKMY~@c2iagq7dc zsMV~D7ovV!m6g@~Es8zxxyGI+c6%<$dqm`20NSqDlNdpQVoqT`g->FM|P#k;3^ZTi&5_i$yKYo#}aRUpMFc`SaU-9|+{%ulu`ebMg0e5>Fo`Wxov%1KPXz#ksj@GdHGj zI#oU0WD&blX-|9opR216X?1Pj=00Rs%Vkqp^mo@|lhyyFQ{z&Woj?)FK#IZ0z|c_Fz*N`JEX2sr%GA`#&_dh5$jZQ=QtgEwiiX_$l+3hBEEGJR2=mxK8t1*IFkADeVTSsYg?X>3u7 zEm$(S}zdbp1~lsA~}Hl z6${J7_p_Kd>@F=j;KsO8<6_67=M4-VOxNyftZ4SgaIV>PVd28Zrwl`$`>uIZ`s@8W zR^by3v1cz;Z98&mUfmr@VHOwTM8=M^c8vf%5%ayvYOk^+2YYKy<-fT7x8b(AOAGe# zKNAx^?>E^*dV!;fz7B&)u7GLAe}__=!xNvkpL~8y;_sBjZ;Q;g_uYGTaQD~5rzQc+ zoDyb#1jD-3R{9iViQV>-Q;*k~=we)w*6>!`f5V|Qg4gBxdy0i%lYRMlxe27{?l#Gf~o}jci%G845s7H%yIa+E@GKc6~v|xyGERH00)|WTsW(*04Hc Rayd`~gQu&X%Q~loCIBf=9ykC1 literal 0 HcmV?d00001 diff --git a/recipes/icons/swiatkindle.png b/recipes/icons/swiatkindle.png new file mode 100644 index 0000000000000000000000000000000000000000..1fc505bfbfabd98cc547b7844357266c21e1502e GIT binary patch literal 425 zcmeAS@N?(olHy`uVBq!ia0vp^0wBx*Bp9q_EZ7UAn3BBRT^JZv^(q?yd7K3vk;OpT z1B~5HX4?T7uRUEHLnJPno;&Sz*g>N0;r~y2c*NJm?AY>vvunzwlv$Hs${0<~Oq&$q ztC1+g-_ad(mn*i?{$Jo7yFE2E2U@3it>;e8jlBAm%_3Q8a^+48JR4T+`P_Z_^~;zm0Xkxq!^403=MS+Omz*-LW~TpOo7N$+rY@mz+h^CIv Date: Tue, 17 May 2011 08:27:27 -0600 Subject: [PATCH 02/11] ... --- src/calibre/manual/faq.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst index 1c6b65c770..d3784eda6f 100644 --- a/src/calibre/manual/faq.rst +++ b/src/calibre/manual/faq.rst @@ -22,7 +22,7 @@ It can convert every input format in the following list, to every output format. *Input Formats:* CBZ, CBR, CBC, CHM, EPUB, FB2, HTML, HTMLZ, LIT, LRF, MOBI, ODT, PDF, PRC, PDB, PML, RB, RTF, SNB, TCR, TXT, TXTZ -*Output Formats:* EPUB, FB2, OEB, LIT, LRF, MOBI, HTMLZ, PDB, PML, RB, PDF, SNB, TCR, TXT, TXTZ +*Output Formats:* EPUB, FB2, OEB, LIT, LRF, MOBI, HTMLZ, PDB, PML, RB, PDF, RTF, SNB, TCR, TXT, TXTZ .. note :: From dc6f033b466d63269fa2c7f856b49f4e57937c62 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 17 May 2011 09:30:51 -0600 Subject: [PATCH 03/11] Updated United Daily --- recipes/united_daily.recipe | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/recipes/united_daily.recipe b/recipes/united_daily.recipe index 6954a7e725..1013b3d2b6 100644 --- a/recipes/united_daily.recipe +++ b/recipes/united_daily.recipe @@ -14,6 +14,7 @@ class UnitedDaily(BasicNewsRecipe): (u'生活', u'http://udn.com/udnrss/life.xml'), (u'綜合', u'http://udn.com/udnrss/education.xml'), (u'意見評論', u'http://udn.com/udnrss/opinion.xml'), + (u'校園博覽會', u'http://mag.udn.com/udnrss/campus_rss.xml'), (u'大台北', u'http://udn.com/udnrss/local_taipei.xml'), (u'桃竹苗', u'http://udn.com/udnrss/local_tyhcml.xml'), (u'中彰投', u'http://udn.com/udnrss/local_tcchnt.xml'), @@ -21,15 +22,21 @@ class UnitedDaily(BasicNewsRecipe): (u'高屏離島', u'http://udn.com/udnrss/local_ksptisland.xml'), (u'基宜花東', u'http://udn.com/udnrss/local_klilhltt.xml'), (u'台灣百寶鄉', u'http://udn.com/udnrss/local_oddlyenough.xml'), + (u'台灣人物', u'http://mag.udn.com/udnrss/people_rss.xml'), (u'兩岸要聞', u'http://udn.com/udnrss/mainland.xml'), (u'國際焦點', u'http://udn.com/udnrss/international.xml'), (u'台商經貿', u'http://udn.com/udnrss/financechina.xml'), (u'國際財經', u'http://udn.com/udnrss/financeworld.xml'), + (u'全球觀察', u'http://mag.udn.com/udnrss/world_rss.xml'), (u'財經焦點', u'http://udn.com/udnrss/financesfocus.xml'), (u'股市要聞', u'http://udn.com/udnrss/stock.xml'), (u'股市快訊', u'http://udn.com/udnrss/stklatest.xml'), (u'稅務法務', u'http://udn.com/udnrss/tax.xml'), (u'房市情報', u'http://udn.com/udnrss/houses.xml'), + (u'個人理財', u'http://mag.udn.com/udnrss/wealth_rss.xml'), + (u'研究報告', u'http://mag.udn.com/udnrss/report_rss.xml'), + (u'基金', u'http://mag.udn.com/udnrss/fund_rss.xml'), + (u'理財會客室', u'http://mag.udn.com/udnrss/m_forum_rss.xml'), (u'棒球', u'http://udn.com/udnrss/baseball.xml'), (u'籃球', u'http://udn.com/udnrss/basketball.xml'), (u'體壇動態', u'http://udn.com/udnrss/sportsfocus.xml'), @@ -40,19 +47,24 @@ class UnitedDaily(BasicNewsRecipe): (u'電影世界', u'http://udn.com/udnrss/movie.xml'), (u'流行音樂', u'http://udn.com/udnrss/music.xml'), (u'觀點專題', u'http://udn.com/udnrss/starssubject.xml'), + (u'消費流行', u'http://mag.udn.com/udnrss/happylife_rss.xml'), (u'食樂指南', u'http://udn.com/udnrss/food.xml'), + (u'數位資訊', u'http://mag.udn.com/udnrss/digital_rss.xml'), (u'折扣好康', u'http://udn.com/udnrss/shopping.xml'), + (u'發燒車訊', u'http://mag.udn.com/udnrss/car_rss.xml'), (u'醫藥新聞', u'http://udn.com/udnrss/health.xml'), (u'家婦繽紛', u'http://udn.com/udnrss/benfen.xml'), (u'談星論命', u'http://udn.com/udnrss/astrology.xml'), (u'文化副刊', u'http://udn.com/udnrss/reading.xml'), + (u'旅遊休閒', u'http://travel.udn.com/udnrss/travel_rss.xml'), + (u'健康醫藥', u'http://mag.udn.com/udnrss/life_rss.xml'), ] - extra_css = '''div[id='story_title'] {font-size:200%; font-weight:bold;}''' + extra_css = '''div[id='story_title'] {font-size:200%; font-weight:bold;} td[class='story_title'] {font-size:200%; font-weight:bold;} td[class='story_title'] td[class='story_title']>div {font-size:200%; font-weight:bold;}''' __author__ = 'Eddie Lau' - __version__ = '1.0' - language = 'zh' + __version__ = '1.1' + language = 'zh-TW' publisher = 'United Daily News Group' description = 'United Daily (Taiwan)' category = 'News, Chinese, Taiwan' @@ -63,5 +75,12 @@ class UnitedDaily(BasicNewsRecipe): conversion_options = {'linearize_tables':True} masthead_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif' cover_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif' - keep_only_tags = [dict(name='div', attrs={'id':['story_title','story_author', 'story']})] + keep_only_tags = [dict(name='td', attrs={'class':['story_title']}), + dict(name='div', attrs={'id':['story_title']}), + dict(name='td', attrs={'class':['story_author']}), + dict(name='div', attrs={'id':['story_author']}), + dict(name='td', attrs={'class':['story']}), + dict(name='div', attrs={'id':['story']}), + ] remove_tags = [dict(name='div', attrs={'id':['mvouter']})] + From 253e49141a1739423fdf793c9098359a78c116f2 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 17 May 2011 09:35:45 -0600 Subject: [PATCH 04/11] Men's Health by Anonymous --- recipes/mens_health.recipe | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 recipes/mens_health.recipe diff --git a/recipes/mens_health.recipe b/recipes/mens_health.recipe new file mode 100644 index 0000000000..4e69db8a7c --- /dev/null +++ b/recipes/mens_health.recipe @@ -0,0 +1,10 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1305636254(BasicNewsRecipe): + title = u'Mens Health (US)' + language = 'en' + __author__ = 'Anonymous' + oldest_article = 14 + max_articles_per_feed = 100 + + feeds = [(u'News', u'http://blogs.menshealth.com/health-headlines/feed')] From a0942198acb97419569a83bd10ce4462cfaf8155 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 17 May 2011 13:06:56 -0600 Subject: [PATCH 05/11] Good to Know by Anonymous --- recipes/good_to_know.recipe | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 recipes/good_to_know.recipe diff --git a/recipes/good_to_know.recipe b/recipes/good_to_know.recipe new file mode 100644 index 0000000000..cf374128ce --- /dev/null +++ b/recipes/good_to_know.recipe @@ -0,0 +1,32 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1305547242(BasicNewsRecipe): + title = u'Good to Know (uk)' + oldest_article = 14 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + remove_javascript = True + __author__ = 'Anonymous' + language = 'en_GB' + remove_tags = [dict(name='div', attrs={'class':'articles_footer', 'class':'printoptions'})] + + def print_version(self, url): + return url + '/print/1' + + def preprocess_html(self, soup): + for alink in soup.findAll('a'): + if alink.string is not None: + tstr = alink.string + alink.replaceWith(tstr) + return soup + + feeds = [ (u'Family Conception Advice', u'http://www.goodtoknow.co.uk/feeds/family.rss'), + (u'Family Health Advice', u'http://www.goodtoknow.co.uk/feeds/health.rss'), + (u'Diet Advice', u'http://www.goodtoknow.co.uk/feeds/diet.rss'), + (u'Food Advice', u'http://www.goodtoknow.co.uk/feeds/food.rss'), + (u'Sex Advice', u'http://www.goodtoknow.co.uk/feeds/sex.rss'), + (u'Easy Exercise', u'http://www.goodtoknow.co.uk/feeds/easyexercise.rss'), + (u'Recipes', u'http://www.goodtoknow.co.uk/feeds/recipes.rss'), + (u'Food Quick-tips', u'http://www.goodtoknow.co.uk/feeds/foodquicktips.rss'), + ] From 9908fc66325028ea80b958d6d8fe0cc0db990984 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 17 May 2011 14:19:06 -0600 Subject: [PATCH 06/11] Glamour by Anonymous --- recipes/glamour.recipe | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 recipes/glamour.recipe diff --git a/recipes/glamour.recipe b/recipes/glamour.recipe new file mode 100644 index 0000000000..40e6b6e88b --- /dev/null +++ b/recipes/glamour.recipe @@ -0,0 +1,38 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1305547242(BasicNewsRecipe): + title = u'Glamour (US)' + oldest_article = 21 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + language = 'en' + remove_javascript = True + __author__ = 'Anonymous' + remove_tags = [dict(name='div', attrs={'class':'articles_footer', 'class':'printoptions'})] + + def print_version(self, url): + return url + '?printable=true' + + def preprocess_html(self, soup): + for alink in soup.findAll('a'): + if alink.string is not None: + tstr = alink.string + alink.replaceWith(tstr) + return soup + + feeds = [ (u'All Fashion', u'http://feeds.glamour.com/glamour/all_fashion'), + (u'All Beauty', u'http://feeds.glamour.com/glamour/all_beauty'), + (u'All Sex, Love & Life', u'http://feeds.glamour.com/glamour/sex_love_life'), + (u'All Health & Fitness', u'http://feeds.glamour.com/glamour/health_fitness'), + (u'Shopping', u'http://feeds.glamour.com/glamour/shopping'), + (u'Slaves to Fashion blog', u'http://feeds.glamour.com/glamour/slavestofashion'), + (u'The Girls in the Beauty Department', u'http://feeds.glamour.com/glamour/thegirlsinthebeautydepartment'), + (u'Smitten blog', u'http://feeds.glamour.com/glamour/smitten'), + (u'Save the Date', u'http://feeds.feedburner.com/glamour/save-the-date'), + (u'Single-ish blog', u'http://feeds.glamour.com/glamour/glamoursingle-ish'), + (u'Save the Date', u'http://feeds.feedburner.com/glamour/save-the-date'), + (u'Vitamin G blog', u'http://feeds.glamour.com/glamour/vitamin-g'), + (u'Margarita Shapes Up blog', u'http://feeds.glamour.com/glamour/margaritashapesup'), + (u'Little Miss Fortune blog', u'http://feeds.glamour.com/glamour/little-miss-fortune'), + ] From 1f31873432f6eccaf44931dd76f4a9225f56aba1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 17 May 2011 14:31:54 -0600 Subject: [PATCH 07/11] Add a tweak that controls what words are treated as suffixes when geenrating an author sort string from an author name. Also Fix #782551 (authorsort error on brackets) --- resources/default_tweaks.py | 7 +++- src/calibre/__init__.py | 18 +++++++++++ src/calibre/ebooks/metadata/__init__.py | 43 +++++++++++++++++-------- 3 files changed, 54 insertions(+), 14 deletions(-) diff --git a/resources/default_tweaks.py b/resources/default_tweaks.py index e91b4a62d5..691a82fc36 100644 --- a/resources/default_tweaks.py +++ b/resources/default_tweaks.py @@ -41,14 +41,19 @@ authors_completer_append_separator = False #: Author sort name algorithm # The algorithm used to copy author to author_sort # Possible values are: -# invert: use "fn ln" -> "ln, fn" (the default algorithm) +# invert: use "fn ln" -> "ln, fn" # copy : copy author to author_sort without modification # comma : use 'copy' if there is a ',' in the name, otherwise use 'invert' # nocomma : "fn ln" -> "ln fn" (without the comma) # When this tweak is changed, the author_sort values stored with each author # must be recomputed by right-clicking on an author in the left-hand tags pane, # selecting 'manage authors', and pressing 'Recalculate all author sort values'. +# The author name suffixes are words that are ignored when they occur at the +# end of an author name. The case of the suffix is ignored and trailing +# periods are automatically handled. author_sort_copy_method = 'comma' +author_name_suffixes = ('Jr', 'Sr', 'Inc', 'Ph.D', 'Phd', + 'MD', 'M.D', 'I', 'II', 'III', 'IV') #: Use author sort in Tag Browser # Set which author field to display in the tags pane (the list of authors, diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index bc99947345..b82ea984ec 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -630,6 +630,24 @@ def human_readable(size): size = size[:-2] return size + " " + suffix +def remove_bracketed_text(src, + brackets={u'(':u')', u'[':u']', u'{':u'}'}): + from collections import Counter + counts = Counter() + buf = [] + src = force_unicode(src) + rmap = dict([(v, k) for k, v in brackets.iteritems()]) + for char in src: + if char in brackets: + counts[char] += 1 + elif char in rmap: + idx = rmap[char] + if counts[idx] > 0: + counts[idx] -= 1 + elif sum(counts.itervalues()) < 1: + buf.append(char) + return u''.join(buf) + if isosx: import glob, shutil fdir = os.path.expanduser('~/.fonts') diff --git a/src/calibre/ebooks/metadata/__init__.py b/src/calibre/ebooks/metadata/__init__.py index 9c7838cb2c..2c26d011b7 100644 --- a/src/calibre/ebooks/metadata/__init__.py +++ b/src/calibre/ebooks/metadata/__init__.py @@ -10,7 +10,7 @@ import os, sys, re from urllib import unquote, quote from urlparse import urlparse -from calibre import relpath, guess_type +from calibre import relpath, guess_type, remove_bracketed_text from calibre.utils.config import tweaks @@ -27,20 +27,37 @@ def authors_to_string(authors): else: return '' -_bracket_pat = re.compile(r'[\[({].*?[})\]]') -def author_to_author_sort(author): +def author_to_author_sort(author, method=None): if not author: - return '' - method = tweaks['author_sort_copy_method'] - if method == 'copy' or (method == 'comma' and ',' in author): + return u'' + sauthor = remove_bracketed_text(author).strip() + tokens = sauthor.split() + if len(tokens) < 2: return author - author = _bracket_pat.sub('', author).strip() - tokens = author.split() - if tokens and tokens[-1] not in ('Inc.', 'Inc'): - tokens = tokens[-1:] + tokens[:-1] - if len(tokens) > 1 and method != 'nocomma': - tokens[0] += ',' - return ' '.join(tokens) + if method is None: + method = tweaks['author_sort_copy_method'] + if method == u'copy': + return author + suffixes = set([x.lower() for x in tweaks['author_name_suffixes']]) + suffixes |= set([x+u'.' for x in suffixes]) + + last = tokens[-1].lower() + suffix = None + if last in suffixes: + suffix = tokens[-1] + tokens = tokens[:-1] + + if method == u'comma' and u',' in u''.join(tokens): + return author + + atokens = tokens[-1:] + tokens[:-1] + if suffix: + atokens.append(suffix) + + if method != u'nocomma' and len(atokens) > 1: + atokens[0] += u',' + + return u' '.join(atokens) def authors_to_sort_string(authors): return ' & '.join(map(author_to_author_sort, authors)) From faf5ba7d7c80b248621bd0b0a4f7f0f41d12f27d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 17 May 2011 17:31:49 -0600 Subject: [PATCH 08/11] Add API to run arbitrary functions in worker processes --- src/calibre/utils/ipc/worker.py | 54 +++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/src/calibre/utils/ipc/worker.py b/src/calibre/utils/ipc/worker.py index a891d09f3d..88e571673f 100644 --- a/src/calibre/utils/ipc/worker.py +++ b/src/calibre/utils/ipc/worker.py @@ -50,6 +50,12 @@ PARALLEL_FUNCS = { 'save_book' : ('calibre.ebooks.metadata.worker', 'save_book', 'notification'), + + 'arbitrary' : + ('calibre.utils.ipc.worker', 'arbitrary', None), + + 'arbitrary_n' : + ('calibre.utils.ipc.worker', 'arbitrary', 'notification'), } class Progress(Thread): @@ -73,7 +79,55 @@ class Progress(Thread): except: break +def arbitrary(module_name, func_name, args, kwargs={}): + ''' + An entry point that allows arbitrary functions to be run in a parallel + process. useful for plugin developers that want to run jobs in a parallel + process. + To use this entry point, simply create a ParallelJob with the module and + function names for the real entry point. + + Remember that args and kwargs must be serialized so only use basic types + for them. + + To use this, you will do something like + + from calibre.gui2 import Dispatcher + gui.job_manager.run_job(Dispatcher(job_done), 'arbitrary', + args=('calibre_plugins.myplugin.worker', 'do_work', + ('arg1' 'arg2', 'arg3')), + description='Change the world') + + The function job_done will be called on completion, see the code in + gui2.actions.catalog for an example of using run_job and Dispatcher. + + :param module_name: The fully qualified name of the module that contains + the actual function to be run. For example: + calibre_plugins.myplugin.worker + :param func_name: The name of the function to be run. + :param name: A list (or tuple) of arguments that will be passed to the + function ``func_name`` + :param kwargs: A dictionary of keyword arguments to pass to func_name + ''' + module = importlib.import_module(module_name) + func = getattr(module, func_name) + return func(*args, **kwargs) + +def arbitrary_n(module_name, func_name, args, kwargs={}, + notification=lambda x, y: y): + ''' + Same as :func:`arbitrary` above, except that func_name must support a + keyword argument "notification". This will be a function that accepts two + arguments. func_name should call it periodically with progress information. + The first argument is a float between 0 and 1 that represent percent + completed and the second is a string with a message (it can be an empty + string). + ''' + module = importlib.import_module(module_name) + func = getattr(module, func_name) + kwargs['notification'] = notification + return func(*args, **kwargs) def get_func(name): module, func, notification = PARALLEL_FUNCS[name] From 93f8e4f7c58b5da03b596be5a3f46dc7d3073e2b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 18 May 2011 08:36:49 -0600 Subject: [PATCH 09/11] Update Dvhn --- recipes/dvhn.recipe | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/recipes/dvhn.recipe b/recipes/dvhn.recipe index 4c093aa9d2..d0330990fc 100644 --- a/recipes/dvhn.recipe +++ b/recipes/dvhn.recipe @@ -1,19 +1,21 @@ +import re from calibre.web.feeds.news import BasicNewsRecipe class AdvancedUserRecipe1302341394(BasicNewsRecipe): title = u'DvhN' - oldest_article = 1 + __author__ = 'Reijndert' + oldest_article = 7 max_articles_per_feed = 200 - __author__ = 'Reijndert' no_stylesheets = True - cover_url = 'http://www.dvhn.nl/template/Dagblad_v2.0/gfx/logo_DvhN.gif' + cover_url = 'http://members.home.nl/apm.de.haas/calibre/DvhN.jpg' language = 'nl' country = 'NL' version = 1 publisher = u'Dagblad van het Noorden' category = u'Nieuws' description = u'Nieuws uit Noord Nederland' + timefmt = ' %Y-%m-%d (%a)' keep_only_tags = [dict(name='div', attrs={'id':'fullPicture'}) @@ -21,11 +23,26 @@ class AdvancedUserRecipe1302341394(BasicNewsRecipe): ] remove_tags = [ - dict(name=['object','link','iframe','base']) - ,dict(name='span',attrs={'class':'copyright'}) + dict(name='span',attrs={'class':'location'}) ] - feeds = [(u'Drenthe', u'http://www.dvhn.nl/nieuws/drenthe/index.jsp?service=rss'), (u'Groningen', u'http://www.dvhn.nl/nieuws/groningen/index.jsp?service=rss'), (u'Nederland', u'http://www.dvhn.nl/nieuws/nederland/index.jsp?service=rss'), (u'Wereld', u'http://www.dvhn.nl/nieuws/wereld/index.jsp?service=rss'), (u'Economie', u'http://www.dvhn.nl/nieuws/economie/index.jsp?service=rss'), (u'Sport', u'http://www.dvhn.nl/nieuws/sport/index.jsp?service=rss'), (u'Cultuur', u'http://www.dvhn.nl/nieuws/kunst/index.jsp?service=rss'), (u'24 Uur', u'http://www.dvhn.nl/nieuws/24uurdvhn/index.jsp?service=rss&selectiontype=last24hours')] + preprocess_regexps = [ + (re.compile(r''), lambda h1: '') + ,(re.compile(r''), lambda h2: '') + ,(re.compile(r'Word vriend van Dagblad van het Noorden op Facebook'), lambda h3: '') + ,(re.compile(r'Volg Dagblad van het Noorden op Twitter'), lambda h3: '') + ] + + + feeds = [(u'Drenthe', u'http://www.dvhn.nl/nieuws/drenthe/index.jsp?service=rss') + , (u'Groningen', u'http://www.dvhn.nl/nieuws/groningen/index.jsp?service=rss') + , (u'Nederland', u'http://www.dvhn.nl/nieuws/nederland/index.jsp?service=rss') + , (u'Wereld', u'http://www.dvhn.nl/nieuws/wereld/index.jsp?service=rss') + , (u'Economie', u'http://www.dvhn.nl/nieuws/economie/index.jsp?service=rss') + , (u'Sport', u'http://www.dvhn.nl/nieuws/sport/index.jsp?service=rss') + , (u'Cultuur', u'http://www.dvhn.nl/nieuws/kunst/index.jsp?service=rss') + , (u'24 Uur', u'http://www.dvhn.nl/nieuws/24uurdvhn/index.jsp?service=rss&selectiontype=last24hours') + ] extra_css = ''' body {font-family: verdana, arial, helvetica, geneva, sans-serif;} From d882c28144e28d7cbe78addb95e3c3402e1c7ada Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 18 May 2011 09:15:17 -0600 Subject: [PATCH 10/11] Updated Newsweek --- recipes/newsweek.recipe | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/recipes/newsweek.recipe b/recipes/newsweek.recipe index 97abd69aac..a31706e257 100644 --- a/recipes/newsweek.recipe +++ b/recipes/newsweek.recipe @@ -11,6 +11,20 @@ class Newsweek(BasicNewsRecipe): BASE_URL = 'http://www.newsweek.com' + topics = { + 'Culture' : '/tag/culture.html', + 'Business' : '/tag/business.html', + 'Society' : '/tag/society.html', + 'Science' : '/tag/science.html', + 'Education' : '/tag/education.html', + 'Politics' : '/tag/politics.html', + 'Health' : '/tag/health.html', + 'World' : '/tag/world.html', + 'Nation' : '/tag/nation.html', + 'Technology' : '/tag/technology.html', + 'Game Changers' : '/tag/game-changers.html', + } + keep_only_tags = dict(name='article', attrs={'class':'article-text'}) remove_tags = [dict(attrs={'data-dartad':True})] remove_attributes = ['property'] @@ -21,14 +35,10 @@ class Newsweek(BasicNewsRecipe): return soup def newsweek_sections(self): - return [ - ('Nation', 'http://www.newsweek.com/tag/nation.html'), - ('Society', 'http://www.newsweek.com/tag/society.html'), - ('Culture', 'http://www.newsweek.com/tag/culture.html'), - ('World', 'http://www.newsweek.com/tag/world.html'), - ('Politics', 'http://www.newsweek.com/tag/politics.html'), - ('Business', 'http://www.newsweek.com/tag/business.html'), - ] + for topic_name, topic_url in self.topics.iteritems(): + yield (topic_name, + self.BASE_URL+topic_url) + def newsweek_parse_section_page(self, soup): for article in soup.findAll('article', about=True, From 5317f8bb9c0acd80576ca577ffc02d33fb138c1e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 18 May 2011 14:16:41 -0600 Subject: [PATCH 11/11] Various German news sources by schuster --- recipes/borse_online.recipe | 33 ++++++++++++++++++++ recipes/capital_de.recipe | 61 +++++++++++++++++++++++++++++++++++++ recipes/impulse_de.recipe | 32 +++++++++++++++++++ 3 files changed, 126 insertions(+) create mode 100644 recipes/borse_online.recipe create mode 100644 recipes/capital_de.recipe create mode 100644 recipes/impulse_de.recipe diff --git a/recipes/borse_online.recipe b/recipes/borse_online.recipe new file mode 100644 index 0000000000..c192ce2b8d --- /dev/null +++ b/recipes/borse_online.recipe @@ -0,0 +1,33 @@ +from calibre.web.feeds.recipes import BasicNewsRecipe +class AdvancedUserRecipe1303841067(BasicNewsRecipe): + + title = u'Börse-online' + __author__ = 'schuster' + oldest_article = 1 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + language = 'de' + remove_javascript = True + cover_url = 'http://www.dpv.de/images/1995/source.gif' + masthead_url = 'http://www.zeitschriften-cover.de/cover/boerse-online-cover-januar-2010-x1387.jpg' + extra_css = ''' + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} + h4{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} + img {min-width:300px; max-width:600px; min-height:300px; max-height:800px} + p{font-family:Arial,Helvetica,sans-serif;font-size:small;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + ''' + remove_tags_bevor = [dict(name='h3')] + remove_tags_after = [dict(name='div', attrs={'class':'artikelfuss'})] + remove_tags = [dict(attrs={'class':['moduleTopNav', 'moduleHeaderNav', 'text', 'blau', 'poll1150']}), + dict(id=['newsletterlayer', 'newsletterlayerClose', 'newsletterlayer_body', 'newsletterarray_error', 'newsletterlayer_emailadress', 'newsletterlayer_submit', 'kommentar']), + dict(name=['h2', 'Gesamtranking', 'h3',''])] + + def print_version(self, url): + return url.replace('.html#nv=rss', '.html?mode=print') + + + + feeds = [(u'Börsennachrichten', u'http://www.boerse-online.de/rss/')] + diff --git a/recipes/capital_de.recipe b/recipes/capital_de.recipe new file mode 100644 index 0000000000..6826049bc9 --- /dev/null +++ b/recipes/capital_de.recipe @@ -0,0 +1,61 @@ +from calibre.web.feeds.news import BasicNewsRecipe +class AdvancedUserRecipe1305470859(BasicNewsRecipe): + title = u'Capital.de' + language = 'de' + __author__ = 'schuster' + oldest_article =7 + max_articles_per_feed = 35 + no_stylesheets = True + remove_javascript = True + use_embedded_content = False + masthead_url = 'http://www.wirtschaftsmedien-shop.de/media/stores/wirtschaftsmedien/capital/teaser_large_abo.jpg' + cover_url = 'http://d1kb9jvg6ylufe.cloudfront.net/WebsiteCMS/de/unternehmen/linktipps/mainColumn/08/image/DE_Capital_bis20mm_SW.jpg' + + extra_css = ''' + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} + h4{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} + img {min-width:300px; max-width:600px; min-height:300px; max-height:800px} + p{font-family:Arial,Helvetica,sans-serif;font-size:small;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + ''' + def print_version(self, url): + return url.replace ('nv=rss#utm_source=rss2&utm_medium=rss_feed&utm_campaign=/', 'mode=print') + remove_tags_bevor = [dict(name='td', attrs={'class':'textcell'})] + remove_tags_after = [dict(name='div', attrs={'class':'artikelsplit'})] + + feeds = [ (u'Wirtschaftsmagazin', u'http://www.capital.de/rss/'), + (u'Unternehmen', u'http://www.capital.de/rss/unternehmen'), + (u'Finanz & Geldanlage', u'http://www.capital.de/rss/finanzen/geldanlage')] + + def append_page(self, soup, appendtag, position): + pager = soup.find('div',attrs={'class':'artikelsplit'}) + if pager: + nexturl = self.INDEX + pager.a['href'] + soup2 = self.index_to_soup(nexturl) + texttag = soup2.find('div', attrs={'class':'printable'}) + for it in texttag.findAll(style=True): + del it['style'] + newpos = len(texttag.contents) + self.append_page(soup2,texttag,newpos) + texttag.extract() + appendtag.insert(position,texttag) + + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + for item in soup.findAll('div', attrs={'class':'artikelsplit'}): + item.extract() + self.append_page(soup, soup.body, 3) + pager = soup.find('div',attrs={'class':'artikelsplit'}) + if pager: + pager.extract() + return self.adeify_images(soup) + + + + remove_tags = [dict(attrs={'class':['navSeitenAlle', 'kommentieren', 'teaserheader', 'teasercontent', 'info', 'zwischenhead', 'artikelsplit']}), + dict(id=['topNav', 'mainNav', 'subNav', 'socialmedia', 'footerRahmen', 'gatrixx_marktinformationen', 'pager', 'weitere']), + dict(span=['ratingtext', 'Gesamtranking', 'h3','']), + dict(rel=['canonical'])] + diff --git a/recipes/impulse_de.recipe b/recipes/impulse_de.recipe new file mode 100644 index 0000000000..d38c0aa6a6 --- /dev/null +++ b/recipes/impulse_de.recipe @@ -0,0 +1,32 @@ +from calibre.web.feeds.news import BasicNewsRecipe +class AdvancedUserRecipe1305470859(BasicNewsRecipe): + title = u'Impulse.de' + language = 'de' + __author__ = 'schuster' + oldest_article =14 + max_articles_per_feed = 100 + no_stylesheets = True + remove_javascript = True + use_embedded_content = False + cover_url = 'http://www.bvk.de/files/image/bilder/Logo%20Impulse.jpg' + + extra_css = ''' + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} + h4{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} + img {min-width:300px; max-width:600px; min-height:300px; max-height:800px} + p{font-family:Arial,Helvetica,sans-serif;font-size:small;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + ''' + def print_version(self, url): + return url.replace ('#utm_source=rss2&utm_medium=rss_feed&utm_campaign=/', '?mode=print') + remove_tags_bevor = [dict(name='h1', attrs={'class':'h2'})] + remove_tags_after = [dict(name='div', attrs={'class':'artikelfuss'})] + + feeds = [ (u'impulstest', u'http://www.impulse.de/rss/')] + + + remove_tags = [dict(attrs={'class':['navSeitenAlle', 'kommentieren', 'teaserheader', 'teasercontent', 'info', 'zwischenhead', 'kasten_artikel']}), + dict(id=['metaNav', 'impKopf', 'impTopNav', 'impSubNav', 'footerRahmen', 'gatrixx_marktinformationen', 'pager', 'weitere', 'socialmedia', 'rating_open']), + dict(span=['ratingtext', 'Gesamtranking', 'h3','']), + dict(rel=['canonical'])] +