From 3cf7988c256c4547111e76cd2df9b02ce19f4010 Mon Sep 17 00:00:00 2001 From: sylvaindurand Date: Mon, 5 Aug 2013 16:04:04 +0200 Subject: [PATCH] Typography, error message and description - french typography (apostrophes, guillemets); - error message if using a free account (which can't download the newspaper); - description. --- recipes/le_monde_sub.recipe | 55 +++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/recipes/le_monde_sub.recipe b/recipes/le_monde_sub.recipe index dc9fa9d36f..e55d71e0a7 100644 --- a/recipes/le_monde_sub.recipe +++ b/recipes/le_monde_sub.recipe @@ -13,7 +13,7 @@ class LeMonde(BasicNewsRecipe): title = u'Le Monde: Édition abonnés' __author__ = 'Sylvain Durand' - description = u'Disponible du lundi au samedi à partir de 14 heures environ, avec tous ses cahiers.' + description = u'La version papier du quotidien Le Monde, disponible du lundi au samedi à partir de 14 heures environ, avec tous ses cahiers.' language = 'fr' encoding = 'utf8' @@ -65,28 +65,41 @@ class LeMonde(BasicNewsRecipe): url = time.strftime(self.journal_url,self.date) soup = self.index_to_soup(url).sommaire sections = [] - for sec in soup.findAll("section"): - articles = [] - if sec['cahier'] != "Le Monde": - for col in sec.findAll("fnts"): - col.extract() - if sec['cahier']=="Le Monde Magazine": - continue - for art in sec.findAll("art"): - if art.txt.string and art.ttr.string: - if art.find(['url']): - art.insert(6,'
') - if art.find(['lgd']) and art.find(['lgd']).string: - art.insert(7,'
'+art.find(['lgd']).string+'
') - article = ""+unicode(art)+"" - article = article.replace('','').replace(' oC ','°C ') - article = article.replace('srttr>','h3>').replace('ssttr>','h2>').replace('ttr>','h1>') - f = PersistentTemporaryFile() - f.write(article) - articles.append({'title':art.ttr.string,'url':"file:///"+f.name}) - sections.append((sec['nom'], articles)) + try: + for sec in soup.findAll("section"): + articles = [] + if sec['cahier'] != "Le Monde": + for col in sec.findAll("fnts"): + col.extract() + if sec['cahier']=="Le Monde Magazine": + continue + for art in sec.findAll("art"): + if art.txt.string and art.ttr.string: + if art.find(['url']): + art.insert(6,'
') + if art.find(['lgd']) and art.find(['lgd']).string: + art.insert(7,'
'+art.find(['lgd']).string+'
') + + def guillemets(match): + if match.group(1) == u"=": + return match.group(0) + return u'%s« %s »' % (match.group(1), match.group(2)) + + article = ""+unicode(art)+"" + article = article.replace('','').replace(' oC ','°C ') + article = article.replace('srttr>','h3>').replace('ssttr>','h2>').replace('ttr>','h1>') + article = article.replace("'" , u'\u2019') + article = re.sub('(.|^)"([^"]+)"', guillemets, article) + + f = PersistentTemporaryFile() + f.write(article) + articles.append({'title':art.ttr.string,'url':"file:///"+f.name}) + sections.append((sec['nom'], articles)) + except AttributeError: + self.log("Vos identifiants sont incorrects, ou votre abonnement LeMonde.fr ne vous permet pas de télécharger le journal.") return sections + def preprocess_html(self, soup): for lgd in soup.findAll(id="lgd"): lgd.contents[-1].extract()