Improved recipe for Le Temps

This commit is contained in:
Kovid Goyal 2009-12-24 12:15:10 -07:00
parent 41a3d34586
commit b0eb97c60a

View File

@ -14,75 +14,77 @@ class LeTemps(BasicNewsRecipe):
title = u'Le Temps' title = u'Le Temps'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
__author__ = 'Sujata Raman'
no_stylesheets = True no_stylesheets = True
remove_tags = [dict(name='div', attrs={'id':'footer'})] remove_javascript = True
remove_tags = [dict(name='div', attrs={'class':'box links'})] recursions = 1
remove_tags = [dict(name='script')] encoding = 'UTF-8'
extra_css = '''.heading {font-size: 13px; line-height: 15px; match_regexps = [r'http://www.letemps.ch/Page/Uuid/[-0-9a-f]+\|[1-9]']
margin: 20px 0;} \n h2 {font-size: 24px; line-height: 25px; margin-bottom: lang = 'fr'
14px;} \n .author {font-size: 11px; margin: 0 0 5px 0;} \n .lead {font-
weight: 700; margin: 10px 0;} \n p {margin: 0 0 10px 0;}''' keep_only_tags = [dict(name='div', attrs={'id':'content'}),
dict(name='div', attrs={'class':'story'})
]
remove_tags = [dict(name='div', attrs={'id':['footer','sub']}),
dict(name='div', attrs={'class':['box additional','box function','right','box links','follow']})]
extra_css = '''h1{font-family:"Georgia","Times New Roman",Times,serif;font-size:large;}
.headline{font-family:"Georgia","Times New Roman",Times,serif;font-size:large;color:#990000;}
.summary_gal{color:#777777;font-family:"Georgia","Times New Roman",Times,serif;font-size:x-small;}
#capt{color:#1B1B1B;font-family:"Georgia","Times New Roman",Times,serif;font-size:x-small;}
#content{font-family:"Lucida Grande","Lucida Sans Unicode",Arial,Verdana,sans-serif;}
h2 {font-size: 24px; line-height: 25px; margin-bottom: 14px; text-transform:uppercase;}
.author {font-size:x-small; margin: 0 0 5px 0; color:#797971; font-family:"Lucida Grande","Lucida Sans Unicode",Arial,Verdana,sans-serif;}
.lead {font-family:"Lucida Grande","Lucida Sans Unicode",Arial,Verdana,sans-serif;font-weight: bold; margin: 10px 0;font-size:small;}
p {margin: 0 0 10px 0;}
h3{font-size:small;font-weight:bold;}
.heading{color:#940026;font-size:x-small;}
.description{font-size:x-small;font-family:"Lucida Grande","Lucida Sans Unicode",Arial,Verdana,sans-serif;color:#797971; }
a {color:#1B1B1B; font-size:small;}
.linkbox{font-size:x-small;color:#1B1B1B;font-family:"Lucida Grande","Lucida Sans Unicode",Arial,Verdana,sans-serif;} '''
feeds = [ feeds = [
('Actualité', (u'Actualit\xe9', 'http://www.letemps.ch/rss/site/'),
'http://www.letemps.ch/rss/site/'), ('Monde', 'http://www.letemps.ch/rss/site/actualite/monde'),
('Monde', (u'Suisse & R\xe9gions', 'http://www.letemps.ch/rss/site/actualite/suisse_regions'),
'http://www.letemps.ch/rss/site/actualite/monde'), ('Sciences & Environnement', 'http://www.letemps.ch/rss/site/actualite/sciences_environnement'),
('Suisse & Régions', (u'Soci\xe9t\xe9', 'http://www.letemps.ch/rss/site/actualite/societe'),
'http://www.letemps.ch/rss/site/actualite/suisse_regions'), ('Economie & Finance', 'http://www.letemps.ch/rss/site/economie_finance'),
('Sciences & Environnement', ('Economie & Finance - Finance', 'http://www.letemps.ch/rss/site/economie_finance/finance'),
'http://www.letemps.ch/rss/site/actualite/sciences_environnement'), ('Economie & Finance - Fonds de placement', 'http://www.letemps.ch/rss/site/economie_finance/fonds_placement'),
('Société', (u'Economie & Finance - Carri\xe9res', 'http://www.letemps.ch/rss/site/economie_finance/carrieres'),
'http://www.letemps.ch/rss/site/actualite/societe'), ('Culture', 'http://www.letemps.ch/rss/site/culture'),
('Economie & Finance', (u'Culture - Cin\xe9ma', 'http://www.letemps.ch/rss/site/culture/cinema'),
'http://www.letemps.ch/rss/site/economie_finance'), ('Culture - Musiques', 'http://www.letemps.ch/rss/site/culture/musiques'),
('Economie & Finance - Finance', (u'Culture - Sc\xe9nes', 'http://www.letemps.ch/rss/site/culture/scenes'),
'http://www.letemps.ch/rss/site/economie_finance/finance'), ('Culture - Arts plastiques', 'http://www.letemps.ch/rss/site/culture/arts_plastiques'),
('Economie & Finance - Fonds de placement', ('Livres', 'http://www.letemps.ch/rss/site/culture/livres'),
'http://www.letemps.ch/rss/site/economie_finance/fonds_placement'), ('Opinions', 'http://www.letemps.ch/rss/site/opinions'),
('Economie & Finance - Carrières', ('Opinions - Editoriaux', 'http://www.letemps.ch/rss/site/opinions/editoriaux'),
'http://www.letemps.ch/rss/site/economie_finance/carrieres'), (u'Opinions - Invit\xe9s', 'http://www.letemps.ch/rss/site/opinions/invites'),
('Culture', ('Opinions - Chroniques', 'http://www.letemps.ch/rss/site/opinions/chroniques'),
'http://www.letemps.ch/rss/site/culture'), ('LifeStyle', 'http://www.letemps.ch/rss/site/lifestyle'),
('Culture - Cinéma', ('LifeStyle - Luxe', 'http://www.letemps.ch/rss/site/lifestyle/luxe'),
'http://www.letemps.ch/rss/site/culture/cinema'), ('LifeStyle - Horlogerie & Joaillerie', 'http://www.letemps.ch/rss/site/lifestyle/horlogerie_joaillerie'),
('Culture - Musiques', ('LifeStyle - Design', 'http://www.letemps.ch/rss/site/lifestyle/design'),
'http://www.letemps.ch/rss/site/culture/musiques'), ('LifeStyle - Voyages', 'http://www.letemps.ch/rss/site/lifestyle/voyages'),
('Culture - Scènes', ('LifeStyle - Gastronomie', 'http://www.letemps.ch/rss/site/lifestyle/gastronomie'),
'http://www.letemps.ch/rss/site/culture/scenes'), ('LifeStyle - Architecture & Immobilier', 'http://www.letemps.ch/rss/site/lifestyle/architecture_immobilier'),
('Culture - Arts plastiques', ('LifeStyle - Automobile', 'http://www.letemps.ch/rss/site/lifestyle/automobile'),
'http://www.letemps.ch/rss/site/culture/arts_plastiques'), ('Sports', 'http://www.letemps.ch/rss/site/actualite/sports'),
('Livres',
'http://www.letemps.ch/rss/site/culture/livres'),
('Opinions',
'http://www.letemps.ch/rss/site/opinions'),
('Opinions - Editoriaux',
'http://www.letemps.ch/rss/site/opinions/editoriaux'),
('Opinions - Invités',
'http://www.letemps.ch/rss/site/opinions/invites'),
('Opinions - Chroniques',
'http://www.letemps.ch/rss/site/opinions/chroniques'),
('LifeStyle',
'http://www.letemps.ch/rss/site/lifestyle'),
('LifeStyle - Luxe',
'http://www.letemps.ch/rss/site/lifestyle/luxe'),
('LifeStyle - Horlogerie & Joaillerie',
'http://www.letemps.ch/rss/site/lifestyle/horlogerie_joaillerie'),
('LifeStyle - Design',
'http://www.letemps.ch/rss/site/lifestyle/design'),
('LifeStyle - Voyages',
'http://www.letemps.ch/rss/site/lifestyle/voyages'),
('LifeStyle - Gastronomie',
'http://www.letemps.ch/rss/site/lifestyle/gastronomie'),
('LifeStyle - Architecture & Immobilier',
'http://www.letemps.ch/rss/site/lifestyle/architecture_immobilier'),
('LifeStyle - Automobile',
'http://www.letemps.ch/rss/site/lifestyle/automobile'),
('Sports',
'http://www.letemps.ch/rss/site/actualite/sports'),
] ]
def print_version(self, url): def postprocess_html(self, soup, first):
return url.replace('Page', 'Facet/print') for tag in soup.findAll('div', attrs = {'class':'box pagination'}):
tag.extract()
if not first:
h = soup.find('h1')
if h is not None:
h.extract()
return soup
# def print_version(self, url):
# return url.replace('Page', 'Facet/print')