From 570eb48fb152c3cb353c965bcf03aaea2fb61839 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 6 Oct 2010 09:28:54 -0600 Subject: [PATCH 1/5] IMproved versions of Sueddeutsche and HNA --- resources/recipes/hna.recipe | 22 +++++++++++++++++----- resources/recipes/sueddeutsche.recipe | 14 +++++++------- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/resources/recipes/hna.recipe b/resources/recipes/hna.recipe index e04837bd17..6e843800ee 100644 --- a/resources/recipes/hna.recipe +++ b/resources/recipes/hna.recipe @@ -30,21 +30,33 @@ class hnaDe(BasicNewsRecipe): dict(id='superbanner'), dict(id='navigation'), dict(id='skyscraper'), + dict(id='idNavigationWrap'), + dict(id='idHeaderSearchForm'), + dict(id='idLoginBarWrap'), + dict(id='idAccountButtons'), + dict(id='idHeadButtons'), + dict(id='idBoxesWrap'), dict(id=''), dict(name='span'), dict(name='ul', attrs={'class':'linklist'}), dict(name='a', attrs={'href':'#'}), dict(name='div', attrs={'class':'hlist'}), + dict(name='li', attrs={'class':'idButton idIsLoginGroup idHeaderRegister '}), + dict(name='li', attrs={'class':'idVideoBar idFirst'}), + dict(name='li', attrs={'class':'idSetStartPageLink idLast'}), + dict(name='li', attrs={'class':'idKinderNetzBar idLast'}), + dict(name='li', attrs={'class':'idFotoBar '}), dict(name='div', attrs={'class':'subc noprint'}), + dict(name='div', attrs={'class':'idBreadcrumb'}), + dict(name='div', attrs={'class':'idLay idAdvertising idClStandard '}), + dict(name='span', attrs={'class':'idHeadLineIntro'}), dict(name='p', attrs={'class':'breadcrumb'}), dict(name='a', attrs={'style':'cursor:hand'}), - dict(name='p', attrs={'class':'h5'})] + dict(name='p', attrs={'class':'h5'}), + dict(name='p', attrs={'class':'idMoreEnd'})] #remove_tags_after = [dict(name='div', attrs={'class':'rahmenbreaking'})] - remove_tags_after = [dict(name='a', attrs={'href':'#'})] + remove_tags_after = [dict(name='p', attrs={'class':'idMoreEnd'})] feeds = [ ('hna_soehre', 'http://feeds2.feedburner.com/hna/soehre'), ('hna_kassel', 'http://feeds2.feedburner.com/hna/kassel') ] - - - diff --git a/resources/recipes/sueddeutsche.recipe b/resources/recipes/sueddeutsche.recipe index 9a5b00fc6c..d898376003 100644 --- a/resources/recipes/sueddeutsche.recipe +++ b/resources/recipes/sueddeutsche.recipe @@ -28,7 +28,7 @@ class Sueddeutsche(BasicNewsRecipe): "SKY_AD","NT1_AD","navbar1","sdesiteheader"]}), dict(name='div', attrs={'class':["similar-article-box","artikelliste","nteaser301bg", - "pages closed","basebox right narrow"]}), + "pages closed","basebox right narrow","headslot galleried"]}), dict(name='div', attrs={'class':["articleDistractor","listHeader","listHeader2","hr2", "item","videoBigButton","articlefooter full-column", @@ -38,10 +38,11 @@ class Sueddeutsche(BasicNewsRecipe): dict(name='div', attrs={'style':["position:relative;"]}), dict(name='span', attrs={'class':["nlinkheaderteaserschwarz","artikelLink","r10000000"]}), dict(name='table', attrs={'class':["stoerBS","kommentare","footer","pageBoxBot","pageAktiv","bgcontent"]}), - dict(name='ul', attrs={'class':["breadcrumb","articles","activities","sitenav"]}), + dict(name='ul', attrs={'class':["breadcrumb","articles","activities","sitenav","actions"]}), dict(name='td', attrs={'class':["artikelDruckenRight"]}), dict(name='p', text = "ANZEIGE") ] + remove_tags_after = [dict(name='div', attrs={'class':["themenbox full-column"]})] extra_css = ''' h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #003399;} @@ -70,9 +71,8 @@ class Sueddeutsche(BasicNewsRecipe): (u'Reise', u'http://suche.sueddeutsche.de/query/reise/nav/%C2%A7ressort%3AReise/sort/-docdatetime?output=rss') ] + + def print_version(self, url): - return url.replace('/text/', '/text/print.html') - - - - + main, sep, id = url.rpartition('/') + return main + '/2.220/' + id From a5e10f2feed9564a20685c641be2e0aa3acb7633 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 6 Oct 2010 10:05:46 -0600 Subject: [PATCH 2/5] Anand Tech by Oliver Niesner --- resources/images/news/anandtech.png | Bin 0 -> 664 bytes resources/recipes/anandtech.recipe | 32 ++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 resources/images/news/anandtech.png create mode 100644 resources/recipes/anandtech.recipe diff --git a/resources/images/news/anandtech.png b/resources/images/news/anandtech.png new file mode 100644 index 0000000000000000000000000000000000000000..19270d99a96779c4f2309a5185cd32aad2295b6c GIT binary patch literal 664 zcmV;J0%!e+P)wEnB@2SW->E^7(DC*27q%w4IAS8KtJKO z2mt9+irbg_$ZOg!U>F#bgv!7T;2PlDX!Zx|iFJ2lSfHCWqU<91dztv$ybtIEWD)4u zXe>r+tc#oh*}S=yk*?#2l2nxRh(J{-v!|kh(4i<=!CIGIrHi+K zYyxNni>BC#HC;)7F{}K zlBtm=qzpGH!-ZjA!6Z{)%AeW{yaDBM}(Ia&|NYko)|+fmwB}ax+MT6N>E2> z*#m%y^dE3h$#JJSjmOLq_xKr7&T@ZNX65JObrY)7&+vzv5Cjpac!i_o2DchAIBlJI zXAQlz*LmPRh5Bx6(}K07@1*8Fp_}%<_Xbva{ds&8NQ-4$sp(_J8HO*@6p~A&jzr@1 za}2}6Fsv03xR=olk`5n(feTEyn(^ovk{`xPngOyuX34}CFf4mm5-I~PKFYhwpLX;& yOqA{Sx~)8g#3ISbQLybT5O& Date: Wed, 6 Oct 2010 10:07:31 -0600 Subject: [PATCH 3/5] Rusia Hoy by DM --- resources/images/news/rusiahoy.png | Bin 0 -> 492 bytes resources/recipes/rusiahoy.recipe | 47 +++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 resources/images/news/rusiahoy.png create mode 100644 resources/recipes/rusiahoy.recipe diff --git a/resources/images/news/rusiahoy.png b/resources/images/news/rusiahoy.png new file mode 100644 index 0000000000000000000000000000000000000000..6fbdefa6a5ca3770c206656e762d68b256da7400 GIT binary patch literal 492 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87#Kx8T^vI!PA{Fj(2vGAs!Wu z3MQUiO?C`S{!JzUs#*D8m}v8Q{<2?x^Z5V#c&o@Z|9wN?J9#%Qr-sP2DsLMr zcI~;f#38SzWun|qTWxdA0{351OWVAw7CcdUdiyHljAyKA?-p^eX15#*I|JmLzi$4n z^!ELq^6Q^PUCK>ZwFVekswJ)wB`Jv|saDBFsX&Us$iUD*7l;hZLJSS849u(y&46qx w1A~TZqLwHca`RI%(<%`fOhXLKt&A Date: Wed, 6 Oct 2010 10:10:12 -0600 Subject: [PATCH 4/5] Communications of the Association for Computing Machinery by jonmisurda --- resources/recipes/cacm.recipe | 37 +++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 resources/recipes/cacm.recipe diff --git a/resources/recipes/cacm.recipe b/resources/recipes/cacm.recipe new file mode 100644 index 0000000000..1618bae742 --- /dev/null +++ b/resources/recipes/cacm.recipe @@ -0,0 +1,37 @@ +import datetime +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1286242553(BasicNewsRecipe): + title = u'CACM' + oldest_article = 7 + max_articles_per_feed = 100 + needs_subscription = True + feeds = [(u'CACM', u'http://cacm.acm.org/magazine.rss')] + language = 'en' + __author__ = 'jonmisurda' + no_stylesheets = True + remove_tags = [ + dict(name='div', attrs={'class':['FeatureBox', 'ArticleComments', 'SideColumn', \ + 'LeftColumn', 'RightColumn', 'SiteSearch', 'MainNavBar','more', 'SubMenu', 'inner']}) + ] + cover_url_pattern = 'http://cacm.acm.org/magazines/%d/%d' + + def get_browser(self): + br = BasicNewsRecipe.get_browser() + if self.username is not None and self.password is not None: + br.open('https://cacm.acm.org/login') + br.select_form(nr=1) + br['current_member[user]'] = self.username + br['current_member[passwd]'] = self.password + br.submit() + return br + + def get_cover_url(self): + now = datetime.datetime.now() + + cover_url = None + soup = self.index_to_soup(self.cover_url_pattern % (now.year, now.month)) + cover_item = soup.find('img',attrs={'alt':'magazine cover image'}) + if cover_item: + cover_url = cover_item['src'] + return cover_url From ed83d5e4a55544ef126d0ebc2aa68fbed5a7dfd4 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 6 Oct 2010 10:13:23 -0600 Subject: [PATCH 5/5] gsp.ro by bucsie --- resources/recipes/gsp.recipe | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 resources/recipes/gsp.recipe diff --git a/resources/recipes/gsp.recipe b/resources/recipes/gsp.recipe new file mode 100644 index 0000000000..90a8eecfe6 --- /dev/null +++ b/resources/recipes/gsp.recipe @@ -0,0 +1,20 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1286351181(BasicNewsRecipe): + title = u'gsp.ro' + __author__ = 'bucsie' + oldest_article = 2 + max_articles_per_feed = 100 + language='ro' + cover_url ='http://www.gsp.ro/images/sigla_rosu.jpg' + + remove_tags = [ + dict(name='div', attrs={'class':['related_articles', 'articol_noteaza straight_line dotted_line_top', 'comentarii','mai_multe_articole']}), + dict(name='div', attrs={'id':'icons'}) + ] + remove_tags_after = dict(name='div', attrs={'id':'adoceanintactrovccmgpmnyt'}) + + feeds = [(u'toate stirile', u'http://www.gsp.ro/index.php?section=section&screen=rss')] + + def print_version(self, url): + return 'http://www1.gsp.ro/print/' + url[(url.rindex('/')+1):]