From 99892dc98fb6dac8f1f0fcdac64917162db3efee Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 25 Feb 2012 10:06:12 +0530
Subject: [PATCH] Update Mediapart and rue89

---
 recipes/icons/mediapart.png | Bin 0 -> 382 bytes
 recipes/icons/rue89.png     | Bin 0 -> 1261 bytes
 recipes/mediapart.recipe    |  26 ++++++++------
 recipes/rue89.recipe        |  66 ++++++++++++++++++++----------------
 4 files changed, 53 insertions(+), 39 deletions(-)
 create mode 100644 recipes/icons/mediapart.png
 create mode 100644 recipes/icons/rue89.png
diff --git a/recipes/icons/mediapart.png b/recipes/icons/mediapart.png
new file mode 100644
index 0000000000000000000000000000000000000000..ab489d3db7f7ccef0d7ea8b9b06ebe8f782ce6be
GIT binary patch
literal 382
zcmeAS@N?(olHy`uVBq!ia0vp^0wB!73?$#)eFPFP2=EDU{r~^nyLUhWNZkd}ckk{7
zGI#Ip0E_-Vb7s}<-Q^&;yJyavnR)ljs!Wq-=Ye_|lf2zsI6vN8`Uc41Ebxdd2GSm2
z>~=ES4#;Trba4#fxSpI4VpPU!7$%fZ5NMpgUc;{_@j^^u?B3eq>ndz#U%a}zdwc%e
zBNHy~ljhLsGrY5!&2ZcP)C+fc*``aVIlJgchB>>G8D_XNzgX+bGg(n-8qZH}$C4}N
zZ0h&l?k*3H4@{h4nSF8J^(gIhBZJC<ylwL?EIME?x%k=HoyO`03=ACe-Hf<bx`Y57
zq*~${QIe8al4_M)lnSI6j0_BobPbGjjZ8uejjfC<tqjby4GgUe3~ripEkn_eo1c=I
iR*BGH9Aao;Wn^Y$Vgb=$7<>T~pbVa_elF{r5}E*np_Equ

literal 0
HcmV?d00001

diff --git a/recipes/icons/rue89.png b/recipes/icons/rue89.png
new file mode 100644
index 0000000000000000000000000000000000000000..55c52bc488abb05186f106e169556affcf67bf59
GIT binary patch
literal 1261
zcmY+^dr*>D6aer~Gcdzf;$&ql+QTRHfr3b>L~3ekX!xMiTr?BQ2*V-ERqQco`4~$!
z)<-Gjn9E{HTH9vnu9hjGm9>~8SgwlN3M?bKWx2Qh+uzJNXXehFnKNh3+#*`!ZcB@`
z77zqkhK2-2gKG0NY!D#tP1)!S3WB+3ci_@DE8^$hf|nJ?Ll`GO4fz(qy+`~Z$jl=&
za3?*ar!6KReBVLz7uC$^0SIeO-NQr*g`pIxgF_hgX#B-xEq0`#avMa1g9De=MX@X>
zFgwd^t*CEG<fdyx1eQ0kJRk~HB8Usyb5N}wJN~y;q+92>qgR%rmGNeM=c~0Um&FAR
z_vX)CT@bY6RQF=rk%b1M5o<6QOg%<*&&N0qsw&Cb?SrDw<LY`MYS25PHeu^k5BnJV
zHE-Xz$)uh#$kx`D=Kh#jRYj2y0fcZur_;g5bK9Gnn>%x=J6iOMzR3<Z06wd<xVRYV
z>5&1|F0#v;Lirf;Gnm%4qPf08VSt}J6~Cr)?0)`|%8&n)<A{V&*9Rt-RaPm0f-K*k
z!nc526WL%=Hu<g#X$=i>AB-I9l!A#?rXNdUFc>E80=m24ea`fjf8t{6n(my(f)Q83
z#ZyyLEXrlki=HyyV+^Ho$~&2%Ke;3C$9zK?3H2)D3gGA?4|Fac&FQzNvCGF^W&CC}
zbT?B;Os>42uoWqs&~sXza-479yRppX+{nmCV(BI3=}!U=KIY`ZBrRicupwc#eM`4`
z-rAhXX0x{?SB85^+gn>(&FEsVOhf0C+q&~gKmWWj#3<=8v*qc(xf@G&b#r>$(2w#D
z-#2^rhw0GZ%T=hP4y)>GRwxt-x+`H-B0^pojYcB$tiQkCRQiluJCoO;9ylOj6?Kc_
z_)UlLS{3IlCldG)o?n2NR!L6*3s%!AcmjYTwv$LCC^aWz`cG^pEQ}A*kIPnXgG433
z9nOc7OV>6)50Hi2L`8OuMs_vbd+p|rAKN4nNr^^zuVL?Xzm3CcYco@mQmt0~Xnm3H
zSE`dBtiI67I;ADP*!8-4czBpB(*iEcN(`AC{hC>>sW!Jv$;`~Ogd>b2FL3@FEKKC`
zOF(=5-Om&983o_<qR7?s%yR%rc;tveoaM7(-35J&JRXndGhpa&ZuJE7bV{eOha^`v
z-BwJFVcULr^5jWyGMk;1m0>}9T~}A<R7>wmRdpb_Kk#_$GDBk1??8aH6XvrURYbwq
zKu1q!XJ^|lEc!ZHT`*B@MO!SVpx?AaSI}J1rs#UHSiJHU{k>iuL5b0-9-ctjoU5s+
zS<k=K*x0y2_vsC}bk|>`)#<hSC9&cdQM4O4!y^A&A%N*iQvyX&Mu%GK(l?$46o&5`
z^e(v-o5J}nAlSLwJc_6Un+dCs8|UWcY{w0~srb<D;GH*Fp0k>em>7XZF`yY8eYrU(
zszMw$<0Yx`lu0+~845W#IL<=x(vimQ*;7|#<Ew)FPk{SXPUI%~pt@+U4=z7P^<3<=
zn>eul%k=7bMXWn6fG}sr8-kPcFBT@j|FM}H6wgilo_mO#l643a2oK}%SQwARd(&}V
zWG@1lK=godWElRi3MT!pA(NfTO3V4$fWyKBES^Be5y@UYWSHOq<3R%+G3f#tpwOVm
Kz$R+)ng0OQj8T68

literal 0
HcmV?d00001

diff --git a/recipes/mediapart.recipe b/recipes/mediapart.recipe
index a5bc4e96f9..0c9bbb4b01 100644
--- a/recipes/mediapart.recipe
+++ b/recipes/mediapart.recipe
@@ -1,11 +1,13 @@
 __license__   = 'GPL v3'
-__copyright__ = '2009, Mathieu Godlewski <mathieu at godlewski.fr>; 2010, 2011, Louis Gesbert <meta at antislash dot info>'
+__copyright__ = '2009, Mathieu Godlewski <mathieu at godlewski.fr>; 2010-2012, Louis Gesbert <meta at antislash dot info>'
 '''
 Mediapart
 '''
 
+__author__ = '2009, Mathieu Godlewski <mathieu at godlewski.fr>; 2010-2012, Louis Gesbert <meta at antislash dot info>'
+
 import re
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
 from calibre.web.feeds.news import BasicNewsRecipe
 
 class Mediapart(BasicNewsRecipe):
@@ -15,8 +17,9 @@ class Mediapart(BasicNewsRecipe):
     oldest_article = 7
     language = 'fr'
     needs_subscription = True
-
     max_articles_per_feed = 50
+
+    use_embedded_content = False
     no_stylesheets = True
 
     cover_url = 'http://static.mediapart.fr/files/pave_mediapart.jpg'
@@ -27,14 +30,9 @@ class Mediapart(BasicNewsRecipe):
 
 # -- print-version
 
-    preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in
-        [
-            (r'<div class="print-title">([^>]+)</div>', lambda match : '<h2>'+match.group(1)+'</h2>'),
-            (r'\'', lambda match: '&rsquo;')
-        ]
-    ]
+    conversion_options = { 'smarten_punctuation' : True }
 
-    remove_tags    = [ dict(name='div', attrs={'class':'print-source_url'}) ]
+    remove_tags = [ dict(name='div', attrs={'class':'print-source_url'}) ]
 
     def print_version(self, url):
         raw = self.browser.open(url).read()
@@ -55,3 +53,11 @@ class Mediapart(BasicNewsRecipe):
             br['pass'] = self.password
             br.submit()
         return br
+
+    def preprocess_html(self, soup):
+        for title in soup.findAll('p', {'class':'titre_page'}):
+            title.name = 'h3'
+        for legend in soup.findAll('span', {'class':'legend'}):
+            legend.insert(0, Tag(soup, 'br', []))
+            legend.name = 'small'
+        return soup
diff --git a/recipes/rue89.recipe b/recipes/rue89.recipe
index 51cf8f6b98..c49712dc32 100644
--- a/recipes/rue89.recipe
+++ b/recipes/rue89.recipe
@@ -1,10 +1,10 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Louis Gesbert <meta at antislash dot info>'
+__copyright__ = '2010-2012, Louis Gesbert <meta at antislash dot info>'
 '''
 Rue89
 '''
 
-__author__ = '2010, Louis Gesbert <meta at antislash dot info>'
+__author__ = '2010-2012, Louis Gesbert <meta at antislash dot info>'
 
 import re
 from calibre.ebooks.BeautifulSoup import Tag
@@ -17,37 +17,45 @@ class Rue89(BasicNewsRecipe):
     title = u'Rue89'
     language = 'fr'
     oldest_article = 7
-    max_articles_per_feed = 50
+    max_articles_per_feed = 12
 
-    feeds = [(u'La Une', u'http://www.rue89.com/homepage/feed')]
+    use_embedded_content = False
+
+    # From http://www.rue89.com/les-flux-rss-de-rue89
+    feeds = [
+        (u'La Une',    u'http://www.rue89.com/feed'),
+        (u'Rue69',     u'http://www.rue89.com/rue69/feed'),
+        (u'Eco',       u'http://www.rue89.com/rue89-eco/feed'),
+        (u'Planète',   u'http://www.rue89.com/rue89-planete/feed'),
+        (u'Sport',     u'http://www.rue89.com/rue89-sport/feed'),
+        (u'Culture',   u'http://www.rue89.com/culture/feed'),
+        (u'Hi-tech',   u'http://www.rue89.com/hi-tech/feed'),
+        (u'Media',     u'http://www.rue89.com/medias/feed'),
+        (u'Monde',     u'http://www.rue89.com/monde/feed'),
+        (u'Politique', u'http://www.rue89.com/politique/feed'),
+        (u'Societe',   u'http://www.rue89.com/societe/feed'),
+    ]
+
+    # Follow redirection from feedsportal.com
+    def get_article_url(self,article):
+        return self.browser.open_novisit(article.link).geturl()
+
+    def print_version(self, url):
+        return url + '?imprimer=1'
 
     no_stylesheets = True
 
-    preprocess_regexps = [
-        (re.compile(r'<(/?)h2>', re.IGNORECASE|re.DOTALL),
-         lambda match : '<'+match.group(1)+'h3>'),
-        (re.compile(r'<div class="print-title">([^>]+)</div>', re.IGNORECASE|re.DOTALL),
-         lambda match : '<h2>'+match.group(1)+'</h2>'),
-        (re.compile(r'<img[^>]+src="[^"]*/numeros/(\d+)[^0-9.">]*.gif"[^>]*/>', re.IGNORECASE|re.DOTALL),
-         lambda match : '<span style="font-family: Sans-serif; color: red; font-size:24pt; padding=2pt;">'+match.group(1)+'</span>'),
-        (re.compile(r'\''), lambda match: '&rsquo;'),
-        ]
+    conversion_options = { 'smarten_punctuation' : True }
 
-    def preprocess_html(self,soup):
-        body = Tag(soup, 'body')
-        title = soup.find('h1', {'class':'title'})
-        content = soup.find('div', {'class':'content'})
-        soup.body.replaceWith(body)
-        body.insert(0, title)
-        body.insert(1, content)
-        return soup
+    keep_only_tags = [
+        dict(name='div', attrs={'id':'article'}),
+    ]
 
-    remove_tags = [ #dict(name='div', attrs={'class':'print-source_url'}),
-                    #dict(name='div', attrs={'class':'print-links'}),
-                    #dict(name='img', attrs={'class':'print-logo'}),
-                    dict(name='div', attrs={'class':'content_top'}),
-                    dict(name='div', attrs={'id':'sidebar-left'}), ]
+    remove_tags_after = [
+        dict(name='div', attrs={'id':'plus_loin'}),
+    ]
 
-# -- print-version has poor quality on this website, better do the conversion ourselves
-#    def print_version(self, url):
-#        return re.sub('^.*-([0-9]+)$', 'http://www.rue89.com/print/\\1',url)
+    remove_tags = [
+        dict(name='div', attrs={'id':'article_tools'}),
+        dict(name='div', attrs={'id':'plus_loin'}),
+    ]