Typography, error message and description

- french typography (apostrophes, guillemets); - error message if using a free account (which can't download the newspaper); - description.
2025-07-09 03:04:10 -04:00 · 2013-08-05 16:04:04 +02:00 · 2013-08-05 16:04:04 +02:00 · 3cf7988c25
commit 3cf7988c25
parent 10b4b9d39c
1 changed files with 34 additions and 21 deletions
--- a/recipes/le_monde_sub.recipe
+++ b/recipes/le_monde_sub.recipe
@ -13,7 +13,7 @@ class LeMonde(BasicNewsRecipe):
    title              = u'Le Monde: Édition abonnés'
    __author__         = 'Sylvain Durand'
-    description        = u'Disponible du lundi au samedi à partir de 14 heures environ, avec tous ses cahiers.'
+    description        = u'La version papier du quotidien Le Monde, disponible du lundi au samedi à partir de 14 heures environ, avec tous ses cahiers.'
    language           = 'fr'
    encoding           = 'utf8'
@ -65,28 +65,41 @@ class LeMonde(BasicNewsRecipe):
        url = time.strftime(self.journal_url,self.date)
        soup = self.index_to_soup(url).sommaire
        sections = []
-        for sec in soup.findAll("section"):
+        try:
-            articles = []
+            for sec in soup.findAll("section"):
-            if sec['cahier'] != "Le Monde":
+                articles = []
-                for col in sec.findAll("fnts"):
+                if sec['cahier'] != "Le Monde":
-                    col.extract()
+                    for col in sec.findAll("fnts"):
-            if sec['cahier']=="Le Monde Magazine":
+                        col.extract()
-                continue
+                if sec['cahier']=="Le Monde Magazine":
-            for art in sec.findAll("art"):
+                    continue
-                if art.txt.string and art.ttr.string:
+                for art in sec.findAll("art"):
-                    if art.find(['url']):
+                    if art.txt.string and art.ttr.string:
-                        art.insert(6,'<div id="photo"><img src="'+art.find(['url']).string+'" /></div>')
+                        if art.find(['url']):
-                    if art.find(['lgd']) and art.find(['lgd']).string:
+                            art.insert(6,'<div id="photo"><img src="'+art.find(['url']).string+'" /></div>')
-                        art.insert(7,'<div id="lgd">'+art.find(['lgd']).string+'</div>')
+                        if art.find(['lgd']) and art.find(['lgd']).string:
-                    article = "<html><head></head><body>"+unicode(art)+"</body></html>"
+                            art.insert(7,'<div id="lgd">'+art.find(['lgd']).string+'</div>')
-                    article = article.replace('<![CDATA[','').replace(']]>','').replace(' oC ','°C ')
+                        
-                    article = article.replace('srttr>','h3>').replace('ssttr>','h2>').replace('ttr>','h1>')
+                        def guillemets(match):
-                    f = PersistentTemporaryFile()
+                            if match.group(1) == u"=":
-                    f.write(article)
+                                return match.group(0)
-                    articles.append({'title':art.ttr.string,'url':"file:///"+f.name})
+                            return u'%s«&nbsp;%s&nbsp;»' % (match.group(1), match.group(2))
-            sections.append((sec['nom'], articles))
+                            
                        article = "<html><head></head><body>"+unicode(art)+"</body></html>"
                        article = article.replace('<![CDATA[','').replace(']]>','').replace(' oC ','°C ')
                        article = article.replace('srttr>','h3>').replace('ssttr>','h2>').replace('ttr>','h1>')
                        article = article.replace("'" , u'\u2019')
                        article = re.sub('(.|^)"([^"]+)"', guillemets, article)
                        f = PersistentTemporaryFile()
                        f.write(article)
                        articles.append({'title':art.ttr.string,'url':"file:///"+f.name})
                sections.append((sec['nom'], articles))
        except AttributeError:
            self.log("Vos identifiants sont incorrects, ou votre abonnement LeMonde.fr ne vous permet pas de télécharger le journal.")
        return sections
    def preprocess_html(self, soup):
        for lgd in soup.findAll(id="lgd"):
            lgd.contents[-1].extract()