Update Le Monde diplomatique.fr

This commit is contained in:
Kovid Goyal 2015-12-12 10:58:33 +05:30
parent 4d78cc9f1a
commit 30b0783a4e

View File

@ -11,6 +11,11 @@ import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds import feeds_from_index from calibre.web.feeds import feeds_from_index
def absurl(url):
if url.startswith('/'):
url = 'http://www.monde-diplomatique.fr' + url
return url
class LeMondeDiplomatiqueSiteWeb(BasicNewsRecipe): class LeMondeDiplomatiqueSiteWeb(BasicNewsRecipe):
title = u'Le Monde diplomatique.fr' title = u'Le Monde diplomatique.fr'
__author__ = 'Gaëtan Lehmann' __author__ = 'Gaëtan Lehmann'
@ -44,11 +49,10 @@ class LeMondeDiplomatiqueSiteWeb(BasicNewsRecipe):
dict(name='div', attrs={'class':'espace'})] dict(name='div', attrs={'class':'espace'})]
conversion_options = { conversion_options = {
'comments' : description 'comments' : description,
,'tags' : category 'tags' : category,
,'language' : language 'publisher' : publisher,
,'publisher' : publisher 'linearize_tables': True
,'linearize_tables': True
} }
remove_empty_feeds = True remove_empty_feeds = True
@ -60,44 +64,42 @@ class LeMondeDiplomatiqueSiteWeb(BasicNewsRecipe):
def parse_index_valise(self): def parse_index_valise(self):
articles = [] articles = []
soup = self.index_to_soup('http://www.monde-diplomatique.fr/carnet/') soup = self.index_to_soup('http://www.monde-diplomatique.fr/carnet/')
cnt = soup.find('ul',attrs={'class':'hermetique liste'}) cnt = soup.find('ul',attrs={'class':'liste double'})
for item in cnt.findAll('li'): for item in cnt.findAll('li'):
description = '' description = ''
feed_link = item.find('a') feed_link = item.find('a', href=True)
title = self.tag_to_string(item.find('h3'))
desc = item.find('div',attrs={'class':'intro'}) desc = item.find('div',attrs={'class':'intro'})
date = item.find('div',attrs={'class':'dates_auteurs'}) date = item.find('div',attrs={'class':'dates_auteurs'})
if desc: if desc:
description = desc.string description = desc.string
if feed_link and feed_link.has_key('href'): if feed_link:
url = 'http://www.monde-diplomatique.fr' + feed_link['href']
title = self.tag_to_string(feed_link)
articles.append({ articles.append({
'title' :title 'title' :title,
,'date' :date.string.strip() 'date' :self.tag_to_string(date),
,'url' :url 'url' :absurl(feed_link['href']),
,'description':description 'description':description
}) })
return [("La valise diplomatique", articles)] return [("La valise diplomatique", articles)]
def parse_index_cartes(self): def parse_index_cartes(self):
articles = [] articles = []
soup = self.index_to_soup('http://www.monde-diplomatique.fr/cartes/') soup = self.index_to_soup('http://www.monde-diplomatique.fr/cartes/')
cnt = soup.find('div',attrs={'class':'decale hermetique'}) cnt = soup.find('ul',attrs={'class':'liste_vignettes hautcartes'})
for item in cnt.findAll('div',attrs={'class':re.compile('grid_3 filet hombre_demi')}): for li in cnt.findAll('li'):
feed_link = item.find('a',attrs={'class':'couve'}) feed_link = li.find('a', href=True)
h3 = item.find('h3') h3 = li.find('h3')
authorAndDate = item.find('div',attrs={'class':'dates_auteurs'}) authorAndDate = li.find('div',attrs={'class':'dates_auteurs'})
author_date = authorAndDate.string.strip().split(', ') author_date = self.tag_to_string(authorAndDate).split(', ')
author = author_date[0] author = author_date[0]
date = author_date[-1] date = author_date[-1]
if feed_link and feed_link.has_key('href'): if feed_link:
url = 'http://www.monde-diplomatique.fr' + feed_link['href']
title = self.tag_to_string(h3) title = self.tag_to_string(h3)
articles.append({ articles.append({
'title' :title 'title' :title,
,'date' :date 'date' :date,
,'url' :url 'url' :absurl(feed_link['href']),
,'description': author 'description': author
}) })
return [("Cartes", articles)] return [("Cartes", articles)]