mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Typography, error message and description
- french typography (apostrophes, guillemets); - error message if using a free account (which can't download the newspaper); - description.
This commit is contained in:
parent
10b4b9d39c
commit
3cf7988c25
@ -13,7 +13,7 @@ class LeMonde(BasicNewsRecipe):
|
|||||||
|
|
||||||
title = u'Le Monde: Édition abonnés'
|
title = u'Le Monde: Édition abonnés'
|
||||||
__author__ = 'Sylvain Durand'
|
__author__ = 'Sylvain Durand'
|
||||||
description = u'Disponible du lundi au samedi à partir de 14 heures environ, avec tous ses cahiers.'
|
description = u'La version papier du quotidien Le Monde, disponible du lundi au samedi à partir de 14 heures environ, avec tous ses cahiers.'
|
||||||
language = 'fr'
|
language = 'fr'
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
|
|
||||||
@ -65,28 +65,41 @@ class LeMonde(BasicNewsRecipe):
|
|||||||
url = time.strftime(self.journal_url,self.date)
|
url = time.strftime(self.journal_url,self.date)
|
||||||
soup = self.index_to_soup(url).sommaire
|
soup = self.index_to_soup(url).sommaire
|
||||||
sections = []
|
sections = []
|
||||||
for sec in soup.findAll("section"):
|
try:
|
||||||
articles = []
|
for sec in soup.findAll("section"):
|
||||||
if sec['cahier'] != "Le Monde":
|
articles = []
|
||||||
for col in sec.findAll("fnts"):
|
if sec['cahier'] != "Le Monde":
|
||||||
col.extract()
|
for col in sec.findAll("fnts"):
|
||||||
if sec['cahier']=="Le Monde Magazine":
|
col.extract()
|
||||||
continue
|
if sec['cahier']=="Le Monde Magazine":
|
||||||
for art in sec.findAll("art"):
|
continue
|
||||||
if art.txt.string and art.ttr.string:
|
for art in sec.findAll("art"):
|
||||||
if art.find(['url']):
|
if art.txt.string and art.ttr.string:
|
||||||
art.insert(6,'<div id="photo"><img src="'+art.find(['url']).string+'" /></div>')
|
if art.find(['url']):
|
||||||
if art.find(['lgd']) and art.find(['lgd']).string:
|
art.insert(6,'<div id="photo"><img src="'+art.find(['url']).string+'" /></div>')
|
||||||
art.insert(7,'<div id="lgd">'+art.find(['lgd']).string+'</div>')
|
if art.find(['lgd']) and art.find(['lgd']).string:
|
||||||
article = "<html><head></head><body>"+unicode(art)+"</body></html>"
|
art.insert(7,'<div id="lgd">'+art.find(['lgd']).string+'</div>')
|
||||||
article = article.replace('<![CDATA[','').replace(']]>','').replace(' oC ','°C ')
|
|
||||||
article = article.replace('srttr>','h3>').replace('ssttr>','h2>').replace('ttr>','h1>')
|
def guillemets(match):
|
||||||
f = PersistentTemporaryFile()
|
if match.group(1) == u"=":
|
||||||
f.write(article)
|
return match.group(0)
|
||||||
articles.append({'title':art.ttr.string,'url':"file:///"+f.name})
|
return u'%s« %s »' % (match.group(1), match.group(2))
|
||||||
sections.append((sec['nom'], articles))
|
|
||||||
|
article = "<html><head></head><body>"+unicode(art)+"</body></html>"
|
||||||
|
article = article.replace('<![CDATA[','').replace(']]>','').replace(' oC ','°C ')
|
||||||
|
article = article.replace('srttr>','h3>').replace('ssttr>','h2>').replace('ttr>','h1>')
|
||||||
|
article = article.replace("'" , u'\u2019')
|
||||||
|
article = re.sub('(.|^)"([^"]+)"', guillemets, article)
|
||||||
|
|
||||||
|
f = PersistentTemporaryFile()
|
||||||
|
f.write(article)
|
||||||
|
articles.append({'title':art.ttr.string,'url':"file:///"+f.name})
|
||||||
|
sections.append((sec['nom'], articles))
|
||||||
|
except AttributeError:
|
||||||
|
self.log("Vos identifiants sont incorrects, ou votre abonnement LeMonde.fr ne vous permet pas de télécharger le journal.")
|
||||||
return sections
|
return sections
|
||||||
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for lgd in soup.findAll(id="lgd"):
|
for lgd in soup.findAll(id="lgd"):
|
||||||
lgd.contents[-1].extract()
|
lgd.contents[-1].extract()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user