From c7b434dd075b1d641aa0d302d8938d21abe911b7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 21 Jan 2015 08:54:21 +0530 Subject: [PATCH] Update Haaretz Fixes #1412859 [Updated recipe for Haaretz in English](https://bugs.launchpad.net/calibre/+bug/1412859) --- recipes/haaretz_en.recipe | 41 +++++++++++++++-------------------- recipes/icons/haaretz_en.png | Bin 712 -> 277 bytes 2 files changed, 17 insertions(+), 24 deletions(-) diff --git a/recipes/haaretz_en.recipe b/recipes/haaretz_en.recipe index 0856621d38..b40f8c74b2 100644 --- a/recipes/haaretz_en.recipe +++ b/recipes/haaretz_en.recipe @@ -1,5 +1,5 @@ __license__ = 'GPL v3' -__copyright__ = '2010-2012, Darko Miletic ' +__copyright__ = '2010-2015, Darko Miletic ' ''' www.haaretz.com ''' @@ -31,8 +31,6 @@ class Haaretz_en(BasicNewsRecipe): .authorBar {font-size: small} """ - preprocess_regexps = [(re.compile(r'.*?', re.DOTALL|re.IGNORECASE),lambda match: '')] - conversion_options = { 'comment' : description , 'tags' : category @@ -40,28 +38,29 @@ class Haaretz_en(BasicNewsRecipe): , 'language' : language } - keep_only_tags = [dict(attrs={'id':'threecolumns'})] + keep_only_tags = [dict(name='div', attrs={'id':'content'})] remove_attributes = ['width','height'] remove_tags = [ dict(name=['iframe','link','object','embed']) - ,dict(name='div',attrs={'class':'rightcol'}) + ,dict(name='div',attrs={'class':['rightcol', 'fblike']}) + ,dict(name='div',attrs={'id':'article_sso_form'}) ] feeds = [ - (u'Headlines' , 'http://feeds.feedburner.com/haaretz/LBao' ) - ,(u'Opinion' , 'http://feeds.feedburner.com/haaretz/opinions' ) - ,(u'Defence and diplomacy' , 'http://feeds.feedburner.com/DefenseAndDiplomacy' ) - ,(u'National' , 'http://feeds.feedburner.com/haaretz/National' ) - ,(u'International' , 'http://feeds.feedburner.com/InternationalRss' ) - ,(u'Jewish World' , 'http://feeds.feedburner.com/JewishWorldRss' ) - ,(u'Business' , 'http://feeds.feedburner.com/BusinessPrintRss' ) - ,(u'Real Estate' , 'http://feeds.feedburner.com/RealEstatePrintRss' ) - ,(u'Features' , 'http://feeds.feedburner.com/FeaturesPrintRss' ) - ,(u'Arts & Leisure' , 'http://feeds.feedburner.com/ArtsAndLeisureRss' ) - ,(u'Books' , 'http://www.haaretz.com/cmlink/books-rss-1.264947?localLinksEnabled=false') - ,(u'Food & Wine' , 'http://feeds.feedburner.com/FoodAndWinePrintRss' ) - ,(u'Sports' , 'http://feeds.feedburner.com/haaretz/Sport' ) + (u'Headlines' , 'http://www.haaretz.com/cmlink/1.263335') + ,(u'Opinion' , 'http://www.haaretz.com/cmlink/1.628752') + ,(u'Defence and diplomacy' , 'http://www.haaretz.com/cmlink/1.628763') + ,(u'National' , 'http://www.haaretz.com/cmlink/1.628764') + ,(u'International' , 'http://www.haaretz.com/cmlink/1.628765') + ,(u'Jewish World' , 'http://www.haaretz.com/cmlink/1.628766') + ,(u'Business' , 'http://www.haaretz.com/cmlink/1.628767') + ,(u'Real Estate' , 'http://www.haaretz.com/cmlink/1.628768') + ,(u'Features' , 'http://www.haaretz.com/cmlink/1.628769') + ,(u'Arts & Leisure' , 'http://www.haaretz.com/cmlink/1.628771') + ,(u'Books' , 'http://www.haaretz.com/cmlink/1.628772') + ,(u'Food & Wine' , 'http://www.haaretz.com/cmlink/1.628773') + ,(u'Sports' , 'http://www.haaretz.com/cmlink/1.628774') ] def get_browser(self): @@ -78,13 +77,7 @@ class Haaretz_en(BasicNewsRecipe): br.open('https://sso.haaretz.com/sso/sso/signIn',data) return br - def get_article_url(self, article): - url = BasicNewsRecipe.get_article_url(self, article) - return self.browser.open_novisit(url).geturl() - def print_version(self, url): article = url.rpartition('/')[2] return 'http://www.haaretz.com/misc/article-print-page/' + article - def preprocess_raw_html(self, raw, url): - return ''+raw[raw.find(''):] diff --git a/recipes/icons/haaretz_en.png b/recipes/icons/haaretz_en.png index 56b41baf99edab6643f02ab03cde981fe24a7bfa..1e9728be31949b2e3f1cc66218b3b1b472daca24 100644 GIT binary patch delta 259 zcmX@XI+aPWGr-TCmrII^fq{Y7)59eQNDF{42OE%-|NK93qM}Saw=e?(w=g5atT_@u zQT7r~Usv|~j3NS@;>{BZ?f``rdAc};Se#xv*^!G`k;gTD%@?-4|8L$p>yU4tYrJ3D zUBy)S3YVkPBmYkwYMnEa4=nt^Vy3{nU0ILQxO?>!z9(y6>XaABnYv$^?(tDq<4ayq zkd>rS8!iu=8)ZF~UwL@TYM$e-(;Sm_#Z=90<*~Wl;rWa4&nwop z`3FPy_k{htx2`BZ`0p<6h*NvTRx6!-%vv$ELUiNJ_kS4ov{%de-|9Z4TJ*Fl~w5|VrIa$5m{!8-8 z$%iEuEpT-&{UlU0=ruyIh(ho>$M? z{qV)df=zeJD=HuE++JF!A( zb5_O9O%-ng85{nd`Iemi%{uwc%*84D4%KbgRI!)k?}nP&GvCV^&YPI*@#+zmngz#lZa;_il+ve>$Dg!>8x1Vf?Z!c;eq_+=@NI zA+O&ZOx-EYDLkQLNqzP;HcRIQodaKTc9%66Ij-&Wy}0wD*@=r)Dhh`t?*08dkFWUv z8)y2>Zjl|r7kRs0{Q1rBOe`taIQc=pvHAhivI}ottjJM}TK(7U7t=4b-3Ae5DxGHx z7kr8jNKt4M>Zw<>{rU6Rzvt&ZeAxN(^Ml|8um1NMndSP-;@a?D{(p~R!NlCJvesJB zd$+B-v1TuKNLrK|+vBtEc7=66t6I3|=BvO%_g*}6aS5KL$zpL*XL}m!fdf8^yNYj3 zOzt}p*k|=PC3l(O9?>;tP535R<{n?5T##~i=H#uHdu3GI$_GL}a3*Z{S^86X_q#u=TsFeyIv+!~15<)(iEBhjN@7W> zRdP`(kYX@0Ff`CLFax3xBO@zAQ!5igAlu5o;PASKS5Y+N=BH$)RibDxwlXxbGO&PX Un3H{B4^RVxr>mdKI;Vst0H=vF&j0`b