From 7f091a5ffe0af5e37dfd8a0175ae7f8cc7dec08e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Chabot?= Date: Sun, 16 Oct 2011 11:26:34 +0200 Subject: [PATCH] Add lepoint news website recipe --- recipes/lepoint.recipe | 75 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 recipes/lepoint.recipe diff --git a/recipes/lepoint.recipe b/recipes/lepoint.recipe new file mode 100644 index 0000000000..2cdc42fa5f --- /dev/null +++ b/recipes/lepoint.recipe @@ -0,0 +1,75 @@ +__license__ = 'GPL v3' +__copyright__ = '2011 Aurélien Chabot ' +''' +LePoint.fr +''' +import re +from calibre.web.feeds.recipes import BasicNewsRecipe + +class lepoint(BasicNewsRecipe): + + title = 'Le Point' + __author__ = 'calibre' + description = 'Actualités' + encoding = 'utf-8' + publisher = 'LePoint.fr' + category = 'news, France, world' + language = 'fr' + + use_embedded_content = False + timefmt = ' [%d %b %Y]' + max_articles_per_feed = 15 + no_stylesheets = True + remove_empty_feeds = True + filterDuplicates = True + + extra_css = ''' + h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;} + .chapo {font-size:xx-small; font-family:Arial,Helvetica,sans-serif;} + .info_article {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;} + .media_article {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;} + .article {font-size:medium; font-family:Arial,Helvetica,sans-serif;} + ''' + + remove_tags = [ + dict(name='iframe'), + dict(name='div', attrs={'class':['entete_chroniqueur']}), + dict(name='div', attrs={'class':['col_article']}), + dict(name='div', attrs={'class':['signature_article']}), + dict(name='div', attrs={'class':['util_font util_article']}), + dict(name='div', attrs={'class':['util_article bottom']}) + ] + + keep_only_tags = [dict(name='div', attrs={'class':['page_article']})] + + remove_tags_after = dict(name='div', attrs={'class':['util_article bottom']}) + + feeds = [ + (u'À la une', 'http://www.lepoint.fr/rss.xml'), + ('International', 'http://www.lepoint.fr/monde/rss.xml'), + ('Tech/Web', 'http://www.lepoint.fr/high-tech-internet/rss.xml'), + ('Sciences', 'http://www.lepoint.fr/science/rss.xml'), + ('Economie', 'http://www.lepoint.fr/economie/rss.xml'), + (u'Socièté', 'http://www.lepoint.fr/societe/rss.xml'), + ('Politique', 'http://www.lepoint.fr/politique/rss.xml'), + (u'Médias', 'http://www.lepoint.fr/medias/rss.xml'), + ('Culture', 'http://www.lepoint.fr/culture/rss.xml'), + (u'Santé', 'http://www.lepoint.fr/sante/rss.xml'), + ('Sport', 'http://www.lepoint.fr/sport/rss.xml') + ] + + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup + + def get_masthead_url(self): + masthead = 'http://www.lepoint.fr/images/commun/logo.png' + br = BasicNewsRecipe.get_browser() + try: + br.open(masthead) + except: + self.log("\nCover unavailable") + masthead = None + return masthead