#!/usr/bin/env python # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' from calibre.web.feeds.news import BasicNewsRecipe class LeTemps(BasicNewsRecipe): title = u'Le Temps' oldest_article = 7 max_articles_per_feed = 100 __author__ = 'Sujata Raman' description = 'French news. Needs a subscription from http://www.letemps.ch' no_stylesheets = True remove_javascript = True recursions = 1 encoding = 'UTF-8' match_regexps = [r'http://www.letemps.ch/Page/Uuid/[-0-9a-f]+\|[1-9]'] language = 'fr' needs_subscription = True def get_browser(self): br = BasicNewsRecipe.get_browser(self) br.open('http://www.letemps.ch/login') br['username'] = self.username br['password'] = self.password raw = br.submit().read() if '>Login' in raw: raise ValueError('Failed to login to letemp.ch. Check ' 'your username and password') return br keep_only_tags = [dict(name='div', attrs={'id':'content'}), dict(name='div', attrs={'class':'story'}) ] remove_tags = [dict(name='div', attrs={'id':['footer','sub']}), dict(name='div', attrs={'class':['box additional','box function','right','box links','follow']})] extra_css = '''h1{font-family:"Georgia","Times New Roman",Times,serif;font-size:large;} .headline{font-family:"Georgia","Times New Roman",Times,serif;font-size:large;color:#990000;} .summary_gal{color:#777777;font-family:"Georgia","Times New Roman",Times,serif;font-size:x-small;} #capt{color:#1B1B1B;font-family:"Georgia","Times New Roman",Times,serif;font-size:x-small;} #content{font-family:"Lucida Grande","Lucida Sans Unicode",Arial,Verdana,sans-serif;} h2 {font-size: 24px; line-height: 25px; margin-bottom: 14px; text-transform:uppercase;} .author {font-size:x-small; margin: 0 0 5px 0; color:#797971; font-family:"Lucida Grande","Lucida Sans Unicode",Arial,Verdana,sans-serif;} .lead {font-family:"Lucida Grande","Lucida Sans Unicode",Arial,Verdana,sans-serif;font-weight: bold; margin: 10px 0;font-size:small;} p {margin: 0 0 10px 0;} h3{font-size:small;font-weight:bold;} .heading{color:#940026;font-size:x-small;} .description{font-size:x-small;font-family:"Lucida Grande","Lucida Sans Unicode",Arial,Verdana,sans-serif;color:#797971; } a {color:#1B1B1B; font-size:small;} .linkbox{font-size:x-small;color:#1B1B1B;font-family:"Lucida Grande","Lucida Sans Unicode",Arial,Verdana,sans-serif;} ''' feeds = [ (u'Actualit\xe9', 'http://www.letemps.ch/rss/site/'), ('Monde', 'http://www.letemps.ch/rss/site/actualite/monde'), (u'Suisse & R\xe9gions', 'http://www.letemps.ch/rss/site/actualite/suisse_regions'), ('Sciences & Environnement', 'http://www.letemps.ch/rss/site/actualite/sciences_environnement'), (u'Soci\xe9t\xe9', 'http://www.letemps.ch/rss/site/actualite/societe'), ('Economie & Finance', 'http://www.letemps.ch/rss/site/economie_finance'), ('Economie & Finance - Finance', 'http://www.letemps.ch/rss/site/economie_finance/finance'), ('Economie & Finance - Fonds de placement', 'http://www.letemps.ch/rss/site/economie_finance/fonds_placement'), (u'Economie & Finance - Carri\xe9res', 'http://www.letemps.ch/rss/site/economie_finance/carrieres'), ('Culture', 'http://www.letemps.ch/rss/site/culture'), (u'Culture - Cin\xe9ma', 'http://www.letemps.ch/rss/site/culture/cinema'), ('Culture - Musiques', 'http://www.letemps.ch/rss/site/culture/musiques'), (u'Culture - Sc\xe9nes', 'http://www.letemps.ch/rss/site/culture/scenes'), ('Culture - Arts plastiques', 'http://www.letemps.ch/rss/site/culture/arts_plastiques'), ('Livres', 'http://www.letemps.ch/rss/site/culture/livres'), ('Opinions', 'http://www.letemps.ch/rss/site/opinions'), ('Opinions - Editoriaux', 'http://www.letemps.ch/rss/site/opinions/editoriaux'), (u'Opinions - Invit\xe9s', 'http://www.letemps.ch/rss/site/opinions/invites'), ('Opinions - Chroniques', 'http://www.letemps.ch/rss/site/opinions/chroniques'), ('LifeStyle', 'http://www.letemps.ch/rss/site/lifestyle'), ('LifeStyle - Luxe', 'http://www.letemps.ch/rss/site/lifestyle/luxe'), ('LifeStyle - Horlogerie & Joaillerie', 'http://www.letemps.ch/rss/site/lifestyle/horlogerie_joaillerie'), ('LifeStyle - Design', 'http://www.letemps.ch/rss/site/lifestyle/design'), ('LifeStyle - Voyages', 'http://www.letemps.ch/rss/site/lifestyle/voyages'), ('LifeStyle - Gastronomie', 'http://www.letemps.ch/rss/site/lifestyle/gastronomie'), ('LifeStyle - Architecture & Immobilier', 'http://www.letemps.ch/rss/site/lifestyle/architecture_immobilier'), ('LifeStyle - Automobile', 'http://www.letemps.ch/rss/site/lifestyle/automobile'), ('Sports', 'http://www.letemps.ch/rss/site/actualite/sports'), ] def postprocess_html(self, soup, first): for tag in soup.findAll('div', attrs = {'class':'box pagination'}): tag.extract() if not first: h = soup.find('h1') if h is not None: h.extract() return soup # def print_version(self, url): # return url.replace('Page', 'Facet/print')