From 452ff0f8a842fd8c93350bf7322c5aba3784cc16 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 12 Jul 2013 22:17:58 +0530 Subject: [PATCH] oops, accidentally unstaged the files in the last commit --- recipes/icons/le_monde_diplomatique_fr.png | Bin 0 -> 446 bytes recipes/le_monde_diplomatique_fr.recipe | 111 +++++++++++++++++++++ 2 files changed, 111 insertions(+) create mode 100644 recipes/icons/le_monde_diplomatique_fr.png create mode 100644 recipes/le_monde_diplomatique_fr.recipe diff --git a/recipes/icons/le_monde_diplomatique_fr.png b/recipes/icons/le_monde_diplomatique_fr.png new file mode 100644 index 0000000000000000000000000000000000000000..ae4547977f67c6988ecf696047b47482effa8ddf GIT binary patch literal 446 zcmeAS@N?(olHy`uVBq!ia0vp^3LwnE3?yBabR7dyEa{HEjtmSN`?>!lvVtU&J%W50 z7^>757#dm_7=8hT8eT9klo~KFyh>nTu$sZZAYL$MSD+10!Y05c#P!UXGhe=Z{`c?i zOa_J%HJ8W`|Cl(zu^J-YSPTNYul2KxwhLQP$Q#x!4 zr4Nj^*=~OQplTk&#D2-_NexCU3wTo(H-X&ystszz{M@xoaYxD z{krHL^S0l%^Q$@Jn(O`s*IwmZW5-qS@Zi5#pzBmiTq8oyUAEN_sDZ)L)z4*}Q$iB}g`T6{ literal 0 HcmV?d00001 diff --git a/recipes/le_monde_diplomatique_fr.recipe b/recipes/le_monde_diplomatique_fr.recipe new file mode 100644 index 0000000000..f7c3b30fa0 --- /dev/null +++ b/recipes/le_monde_diplomatique_fr.recipe @@ -0,0 +1,111 @@ +# vim:fileencoding=utf-8 +from __future__ import unicode_literals + +__license__ = 'GPL v3' +__copyright__ = '2013' +''' +monde-diplomatique.fr +''' + +import re +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.web.feeds import feeds_from_index + +class LeMondeDiplomatiqueSiteWeb(BasicNewsRecipe): + title = u'Le Monde diplomatique.fr' + __author__ = 'Gaëtan Lehmann' + description = "Le Monde diplomatique est un mensuel français d’information et d’opinion à la ligne éditoriale nettement engagée en faveur d'une gauche de rupture avec le capitalisme. Il aborde de nombreux sujets — géopolitique, relations internationales, économie, questions sociales, écologie, culture, médias, …" # noqa + oldest_article = 7 + max_articles_per_feed = 100 + auto_cleanup = True + publisher = 'monde-diplomatique.fr' + category = 'news, France, world' + language = 'fr' + masthead_url = 'http://www.monde-diplomatique.fr/squelettes/images/logotyfa.png' + timefmt = ' [%d %b %Y]' + no_stylesheets = True + + feeds = [(u'Blogs', u'http://blog.mondediplo.net/spip.php?page=backend'), (u'Archives', u'http://www.monde-diplomatique.fr/rss/')] + + preprocess_regexps = [ + (re.compile(r'(.*) - Les blogs du Diplo'), lambda m: '' + m.group(1) + ''), + (re.compile(r'

(.*) - Les blogs du Diplo

'), lambda m: '

' + m.group(1) + '

'), + (re.compile(r'(.*) \(Le Monde diplomatique\)'), lambda m: '' + m.group(1) + ''), + (re.compile(r'

(.*) \(Le Monde diplomatique\)

'), lambda m: '

' + m.group(1) + '

'), + (re.compile(r'

Grand format

'), lambda m: '')] + + remove_tags = [dict(name='div', attrs={'class':'voiraussi liste'}), + dict(name='ul', attrs={'class':'hermetique carto hombre_demi_inverse'}), + dict(name='a', attrs={'class':'tousles'}), + dict(name='h3', attrs={'class':'cat'}), + dict(name='div', attrs={'class':'logodiplo'}), + dict(name='img', attrs={'class':'spip_logos'}), + dict(name='p', attrs={'id':'hierarchie'}), + dict(name='div', attrs={'class':'espace'})] + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + ,'linearize_tables': True + } + + remove_empty_feeds = True + + filterDuplicates = True + + # don't use parse_index - we need it to send an exception so we can mix + # feed and parse_index results in parse_feeds + def parse_index_valise(self): + articles = [] + soup = self.index_to_soup('http://www.monde-diplomatique.fr/carnet/') + cnt = soup.find('ul',attrs={'class':'hermetique liste'}) + for item in cnt.findAll('li'): + description = '' + feed_link = item.find('a') + desc = item.find('div',attrs={'class':'intro'}) + date = item.find('div',attrs={'class':'dates_auteurs'}) + if desc: + description = desc.string + if feed_link and feed_link.has_key('href'): + url = 'http://www.monde-diplomatique.fr' + feed_link['href'] + title = self.tag_to_string(feed_link) + articles.append({ + 'title' :title + ,'date' :date.string.strip() + ,'url' :url + ,'description':description + }) + return [("La valise diplomatique", articles)] + + def parse_index_cartes(self): + articles = [] + soup = self.index_to_soup('http://www.monde-diplomatique.fr/cartes/') + cnt = soup.find('div',attrs={'class':'decale hermetique'}) + for item in cnt.findAll('div',attrs={'class':re.compile('grid_3 filet hombre_demi')}): + feed_link = item.find('a',attrs={'class':'couve'}) + h3 = item.find('h3') + authorAndDate = item.find('div',attrs={'class':'dates_auteurs'}) + author, date = authorAndDate.string.strip().split(', ') + if feed_link and feed_link.has_key('href'): + url = 'http://www.monde-diplomatique.fr' + feed_link['href'] + title = self.tag_to_string(h3) + articles.append({ + 'title' :title + ,'date' :date + ,'url' :url + ,'description': author + }) + return [("Cartes", articles)] + + def parse_feeds(self): + feeds = BasicNewsRecipe.parse_feeds(self) + valise = feeds_from_index(self.parse_index_valise(), oldest_article=self.oldest_article, + max_articles_per_feed=self.max_articles_per_feed, + log=self.log) + cartes = feeds_from_index(self.parse_index_cartes(), oldest_article=self.oldest_article, + max_articles_per_feed=self.max_articles_per_feed, + log=self.log) + feeds = valise + feeds + cartes + return feeds