# -*- coding: utf-8 -*- __license__ = 'GPL v3' __copyright__ = '2011 Aurélien Chabot ' ''' LePoint.fr ''' from calibre.web.feeds.recipes import BasicNewsRecipe class lepoint(BasicNewsRecipe): title = 'Le Point' __author__ = 'calibre' description = 'Actualités' encoding = 'utf-8' publisher = 'LePoint.fr' category = 'news, France, world' language = 'fr' use_embedded_content = False timefmt = ' [%d %b %Y]' max_articles_per_feed = 15 no_stylesheets = True remove_empty_feeds = True filterDuplicates = True extra_css = ''' h1 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;} .chapo {font-size:xx-small; font-family:Arial,Helvetica,sans-serif;} .info_article {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;} .media_article {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;} .article {font-size:medium; font-family:Arial,Helvetica,sans-serif;} ''' remove_tags = [ dict(name='iframe'), dict(name='div', attrs={'class':['entete_chroniqueur']}), dict(name='div', attrs={'class':['col_article']}), dict(name='div', attrs={'class':['signature_article']}), dict(name='div', attrs={'class':['util_font util_article']}), dict(name='div', attrs={'class':['util_article bottom']}) ] keep_only_tags = [dict(name='div', attrs={'class':['page_article']})] remove_tags_after = dict(name='div', attrs={'class':['util_article bottom']}) feeds = [ (u'À la une', 'http://www.lepoint.fr/rss.xml'), ('International', 'http://www.lepoint.fr/monde/rss.xml'), ('Tech/Web', 'http://www.lepoint.fr/high-tech-internet/rss.xml'), ('Sciences', 'http://www.lepoint.fr/science/rss.xml'), ('Economie', 'http://www.lepoint.fr/economie/rss.xml'), (u'Socièté', 'http://www.lepoint.fr/societe/rss.xml'), ('Politique', 'http://www.lepoint.fr/politique/rss.xml'), (u'Médias', 'http://www.lepoint.fr/medias/rss.xml'), ('Culture', 'http://www.lepoint.fr/culture/rss.xml'), (u'Santé', 'http://www.lepoint.fr/sante/rss.xml'), ('Sport', 'http://www.lepoint.fr/sport/rss.xml') ] def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] return soup def get_masthead_url(self): masthead = 'http://www.lepoint.fr/images/commun/logo.png' br = BasicNewsRecipe.get_browser() try: br.open(masthead) except: self.log("\nCover unavailable") masthead = None return masthead