This commit is contained in:
Kovid Goyal 2013-08-08 21:32:35 +05:30
parent 36cace1b48
commit 9996f3b88a

View File

@ -2,7 +2,7 @@
__author__ = 'Sylvain Durand <sylvain.durand@ponts.org>' __author__ = 'Sylvain Durand <sylvain.durand@ponts.org>'
__license__ = 'GPL v3' __license__ = 'GPL v3'
import time import time, re
from calibre import strftime from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
@ -79,18 +79,18 @@ class LeMonde(BasicNewsRecipe):
art.insert(6,'<div id="photo"><img src="'+art.find(['url']).string+'" /></div>') art.insert(6,'<div id="photo"><img src="'+art.find(['url']).string+'" /></div>')
if art.find(['lgd']) and art.find(['lgd']).string: if art.find(['lgd']) and art.find(['lgd']).string:
art.insert(7,'<div id="lgd">'+art.find(['lgd']).string+'</div>') art.insert(7,'<div id="lgd">'+art.find(['lgd']).string+'</div>')
def guillemets(match): def guillemets(match):
if match.group(1) == u"=": if match.group(1) == u"=":
return match.group(0) return match.group(0)
return u'%s«&nbsp;%s&nbsp;»' % (match.group(1), match.group(2)) return u'%s«&nbsp;%s&nbsp;»' % (match.group(1), match.group(2))
article = "<html><head></head><body>"+unicode(art)+"</body></html>" article = "<html><head></head><body>"+unicode(art)+"</body></html>"
article = article.replace('<![CDATA[','').replace(']]>','').replace(' oC ','°C ') article = article.replace('<![CDATA[','').replace(']]>','').replace(' oC ','°C ')
article = article.replace('srttr>','h3>').replace('ssttr>','h2>').replace('ttr>','h1>') article = article.replace('srttr>','h3>').replace('ssttr>','h2>').replace('ttr>','h1>')
article = article.replace("'" , u'\u2019') article = article.replace("'" , u'\u2019')
article = re.sub('(.|^)"([^"]+)"', guillemets, article) article = re.sub('(.|^)"([^"]+)"', guillemets, article)
f = PersistentTemporaryFile() f = PersistentTemporaryFile()
f.write(article) f.write(article)
articles.append({'title':art.ttr.string,'url':"file:///"+f.name}) articles.append({'title':art.ttr.string,'url':"file:///"+f.name})
@ -99,9 +99,9 @@ class LeMonde(BasicNewsRecipe):
self.log("Vos identifiants sont incorrects, ou votre abonnement LeMonde.fr ne vous permet pas de télécharger le journal.") self.log("Vos identifiants sont incorrects, ou votre abonnement LeMonde.fr ne vous permet pas de télécharger le journal.")
return sections return sections
def preprocess_html(self, soup): def preprocess_html(self, soup):
for lgd in soup.findAll(id="lgd"): for lgd in soup.findAll(id="lgd"):
lgd.contents[-1].extract() lgd.contents[-1].extract()
return soup return soup