mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update Le Monde diplomatique.fr
This commit is contained in:
parent
4d78cc9f1a
commit
30b0783a4e
@ -11,6 +11,11 @@ import re
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.web.feeds import feeds_from_index
|
from calibre.web.feeds import feeds_from_index
|
||||||
|
|
||||||
|
def absurl(url):
|
||||||
|
if url.startswith('/'):
|
||||||
|
url = 'http://www.monde-diplomatique.fr' + url
|
||||||
|
return url
|
||||||
|
|
||||||
class LeMondeDiplomatiqueSiteWeb(BasicNewsRecipe):
|
class LeMondeDiplomatiqueSiteWeb(BasicNewsRecipe):
|
||||||
title = u'Le Monde diplomatique.fr'
|
title = u'Le Monde diplomatique.fr'
|
||||||
__author__ = 'Gaëtan Lehmann'
|
__author__ = 'Gaëtan Lehmann'
|
||||||
@ -44,11 +49,10 @@ class LeMondeDiplomatiqueSiteWeb(BasicNewsRecipe):
|
|||||||
dict(name='div', attrs={'class':'espace'})]
|
dict(name='div', attrs={'class':'espace'})]
|
||||||
|
|
||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comments' : description
|
'comments' : description,
|
||||||
,'tags' : category
|
'tags' : category,
|
||||||
,'language' : language
|
'publisher' : publisher,
|
||||||
,'publisher' : publisher
|
'linearize_tables': True
|
||||||
,'linearize_tables': True
|
|
||||||
}
|
}
|
||||||
|
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
@ -60,44 +64,42 @@ class LeMondeDiplomatiqueSiteWeb(BasicNewsRecipe):
|
|||||||
def parse_index_valise(self):
|
def parse_index_valise(self):
|
||||||
articles = []
|
articles = []
|
||||||
soup = self.index_to_soup('http://www.monde-diplomatique.fr/carnet/')
|
soup = self.index_to_soup('http://www.monde-diplomatique.fr/carnet/')
|
||||||
cnt = soup.find('ul',attrs={'class':'hermetique liste'})
|
cnt = soup.find('ul',attrs={'class':'liste double'})
|
||||||
for item in cnt.findAll('li'):
|
for item in cnt.findAll('li'):
|
||||||
description = ''
|
description = ''
|
||||||
feed_link = item.find('a')
|
feed_link = item.find('a', href=True)
|
||||||
|
title = self.tag_to_string(item.find('h3'))
|
||||||
desc = item.find('div',attrs={'class':'intro'})
|
desc = item.find('div',attrs={'class':'intro'})
|
||||||
date = item.find('div',attrs={'class':'dates_auteurs'})
|
date = item.find('div',attrs={'class':'dates_auteurs'})
|
||||||
if desc:
|
if desc:
|
||||||
description = desc.string
|
description = desc.string
|
||||||
if feed_link and feed_link.has_key('href'):
|
if feed_link:
|
||||||
url = 'http://www.monde-diplomatique.fr' + feed_link['href']
|
|
||||||
title = self.tag_to_string(feed_link)
|
|
||||||
articles.append({
|
articles.append({
|
||||||
'title' :title
|
'title' :title,
|
||||||
,'date' :date.string.strip()
|
'date' :self.tag_to_string(date),
|
||||||
,'url' :url
|
'url' :absurl(feed_link['href']),
|
||||||
,'description':description
|
'description':description
|
||||||
})
|
})
|
||||||
return [("La valise diplomatique", articles)]
|
return [("La valise diplomatique", articles)]
|
||||||
|
|
||||||
def parse_index_cartes(self):
|
def parse_index_cartes(self):
|
||||||
articles = []
|
articles = []
|
||||||
soup = self.index_to_soup('http://www.monde-diplomatique.fr/cartes/')
|
soup = self.index_to_soup('http://www.monde-diplomatique.fr/cartes/')
|
||||||
cnt = soup.find('div',attrs={'class':'decale hermetique'})
|
cnt = soup.find('ul',attrs={'class':'liste_vignettes hautcartes'})
|
||||||
for item in cnt.findAll('div',attrs={'class':re.compile('grid_3 filet hombre_demi')}):
|
for li in cnt.findAll('li'):
|
||||||
feed_link = item.find('a',attrs={'class':'couve'})
|
feed_link = li.find('a', href=True)
|
||||||
h3 = item.find('h3')
|
h3 = li.find('h3')
|
||||||
authorAndDate = item.find('div',attrs={'class':'dates_auteurs'})
|
authorAndDate = li.find('div',attrs={'class':'dates_auteurs'})
|
||||||
author_date = authorAndDate.string.strip().split(', ')
|
author_date = self.tag_to_string(authorAndDate).split(', ')
|
||||||
author = author_date[0]
|
author = author_date[0]
|
||||||
date = author_date[-1]
|
date = author_date[-1]
|
||||||
if feed_link and feed_link.has_key('href'):
|
if feed_link:
|
||||||
url = 'http://www.monde-diplomatique.fr' + feed_link['href']
|
|
||||||
title = self.tag_to_string(h3)
|
title = self.tag_to_string(h3)
|
||||||
articles.append({
|
articles.append({
|
||||||
'title' :title
|
'title' :title,
|
||||||
,'date' :date
|
'date' :date,
|
||||||
,'url' :url
|
'url' :absurl(feed_link['href']),
|
||||||
,'description': author
|
'description': author
|
||||||
})
|
})
|
||||||
return [("Cartes", articles)]
|
return [("Cartes", articles)]
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user