Typography, error message and description

- french typography (apostrophes, guillemets);
- error message if using a free account (which can't download the newspaper);
- description.
This commit is contained in:
sylvaindurand 2013-08-05 16:04:04 +02:00
parent 10b4b9d39c
commit 3cf7988c25

View File

@ -13,7 +13,7 @@ class LeMonde(BasicNewsRecipe):
title = u'Le Monde: Édition abonnés' title = u'Le Monde: Édition abonnés'
__author__ = 'Sylvain Durand' __author__ = 'Sylvain Durand'
description = u'Disponible du lundi au samedi à partir de 14 heures environ, avec tous ses cahiers.' description = u'La version papier du quotidien Le Monde, disponible du lundi au samedi à partir de 14 heures environ, avec tous ses cahiers.'
language = 'fr' language = 'fr'
encoding = 'utf8' encoding = 'utf8'
@ -65,28 +65,41 @@ class LeMonde(BasicNewsRecipe):
url = time.strftime(self.journal_url,self.date) url = time.strftime(self.journal_url,self.date)
soup = self.index_to_soup(url).sommaire soup = self.index_to_soup(url).sommaire
sections = [] sections = []
for sec in soup.findAll("section"): try:
articles = [] for sec in soup.findAll("section"):
if sec['cahier'] != "Le Monde": articles = []
for col in sec.findAll("fnts"): if sec['cahier'] != "Le Monde":
col.extract() for col in sec.findAll("fnts"):
if sec['cahier']=="Le Monde Magazine": col.extract()
continue if sec['cahier']=="Le Monde Magazine":
for art in sec.findAll("art"): continue
if art.txt.string and art.ttr.string: for art in sec.findAll("art"):
if art.find(['url']): if art.txt.string and art.ttr.string:
art.insert(6,'<div id="photo"><img src="'+art.find(['url']).string+'" /></div>') if art.find(['url']):
if art.find(['lgd']) and art.find(['lgd']).string: art.insert(6,'<div id="photo"><img src="'+art.find(['url']).string+'" /></div>')
art.insert(7,'<div id="lgd">'+art.find(['lgd']).string+'</div>') if art.find(['lgd']) and art.find(['lgd']).string:
article = "<html><head></head><body>"+unicode(art)+"</body></html>" art.insert(7,'<div id="lgd">'+art.find(['lgd']).string+'</div>')
article = article.replace('<![CDATA[','').replace(']]>','').replace(' oC ','°C ')
article = article.replace('srttr>','h3>').replace('ssttr>','h2>').replace('ttr>','h1>') def guillemets(match):
f = PersistentTemporaryFile() if match.group(1) == u"=":
f.write(article) return match.group(0)
articles.append({'title':art.ttr.string,'url':"file:///"+f.name}) return u'%s«&nbsp;%s&nbsp;»' % (match.group(1), match.group(2))
sections.append((sec['nom'], articles))
article = "<html><head></head><body>"+unicode(art)+"</body></html>"
article = article.replace('<![CDATA[','').replace(']]>','').replace(' oC ','°C ')
article = article.replace('srttr>','h3>').replace('ssttr>','h2>').replace('ttr>','h1>')
article = article.replace("'" , u'\u2019')
article = re.sub('(.|^)"([^"]+)"', guillemets, article)
f = PersistentTemporaryFile()
f.write(article)
articles.append({'title':art.ttr.string,'url':"file:///"+f.name})
sections.append((sec['nom'], articles))
except AttributeError:
self.log("Vos identifiants sont incorrects, ou votre abonnement LeMonde.fr ne vous permet pas de télécharger le journal.")
return sections return sections
def preprocess_html(self, soup): def preprocess_html(self, soup):
for lgd in soup.findAll(id="lgd"): for lgd in soup.findAll(id="lgd"):
lgd.contents[-1].extract() lgd.contents[-1].extract()