This commit is contained in:
Kovid Goyal 2013-08-08 21:32:35 +05:30
parent 36cace1b48
commit 9996f3b88a

View File

@ -2,7 +2,7 @@
__author__ = 'Sylvain Durand <sylvain.durand@ponts.org>'
__license__ = 'GPL v3'
import time
import time, re
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
@ -79,18 +79,18 @@ class LeMonde(BasicNewsRecipe):
art.insert(6,'<div id="photo"><img src="'+art.find(['url']).string+'" /></div>')
if art.find(['lgd']) and art.find(['lgd']).string:
art.insert(7,'<div id="lgd">'+art.find(['lgd']).string+'</div>')
def guillemets(match):
if match.group(1) == u"=":
return match.group(0)
return u'%s«&nbsp;%s&nbsp;»' % (match.group(1), match.group(2))
article = "<html><head></head><body>"+unicode(art)+"</body></html>"
article = article.replace('<![CDATA[','').replace(']]>','').replace(' oC ','°C ')
article = article.replace('srttr>','h3>').replace('ssttr>','h2>').replace('ttr>','h1>')
article = article.replace("'" , u'\u2019')
article = re.sub('(.|^)"([^"]+)"', guillemets, article)
f = PersistentTemporaryFile()
f.write(article)
articles.append({'title':art.ttr.string,'url':"file:///"+f.name})
@ -99,9 +99,9 @@ class LeMonde(BasicNewsRecipe):
self.log("Vos identifiants sont incorrects, ou votre abonnement LeMonde.fr ne vous permet pas de télécharger le journal.")
return sections
def preprocess_html(self, soup):
for lgd in soup.findAll(id="lgd"):
lgd.contents[-1].extract()
return soup