Improved recipe for FTD

This commit is contained in:
Kovid Goyal 2010-01-18 09:33:46 -07:00
parent b2857225db
commit 55fc72ac93
2 changed files with 100 additions and 89 deletions

View File

@ -9,16 +9,16 @@ from calibre.web.feeds.news import BasicNewsRecipe
class FTDe(BasicNewsRecipe): class FTDe(BasicNewsRecipe):
title = 'FTD' title = 'FTD'
description = 'Financial Times Deutschland' description = 'Financial Times Deutschland'
__author__ = 'Oliver Niesner' __author__ = 'Oliver Niesner'
use_embedded_content = False use_embedded_content = False
timefmt = ' [%d %b %Y]' timefmt = ' [%d %b %Y]'
language = 'de' language = _('German')
max_articles_per_feed = 40 max_articles_per_feed = 40
no_stylesheets = True no_stylesheets = True
remove_tags = [dict(id='navi_top'), remove_tags = [dict(id='navi_top'),
dict(id='topbanner'), dict(id='topbanner'),
dict(id='seitenkopf'), dict(id='seitenkopf'),
@ -28,8 +28,13 @@ class FTDe(BasicNewsRecipe):
dict(id='ADS_Top'), dict(id='ADS_Top'),
dict(id='spinner'), dict(id='spinner'),
dict(id='ftd-contentad'), dict(id='ftd-contentad'),
dict(id='ftd-promo'),
dict(id='nava-50009007-1-0'), dict(id='nava-50009007-1-0'),
dict(id='navli-50009007-1-0'), dict(id='navli-50009007-1-0'),
dict(id='Box5000534-0-0-0'),
dict(id='ExpV-1-0-0-1'),
dict(id='ExpV-1-0-0-0'),
dict(id='PollExpV-2-0-0-0'),
dict(id='starRating'), dict(id='starRating'),
dict(id='saveRating'), dict(id='saveRating'),
dict(id='yLayer'), dict(id='yLayer'),
@ -44,14 +49,19 @@ class FTDe(BasicNewsRecipe):
dict(name='ul', attrs={'class':'nav'}), dict(name='ul', attrs={'class':'nav'}),
dict(name='p', attrs={'class':'articleOptionHead'}), dict(name='p', attrs={'class':'articleOptionHead'}),
dict(name='p', attrs={'class':'articleOptionFoot'}), dict(name='p', attrs={'class':'articleOptionFoot'}),
dict(name='p', attrs={'class':'moreInfo'}),
dict(name='div', attrs={'class':'chartBox'}), dict(name='div', attrs={'class':'chartBox'}),
dict(name='div', attrs={'class':'ratingOpt starRatingContainer articleOptionFootFrame'}), dict(name='div', attrs={'class':'ratingOpt starRatingContainer articleOptionFootFrame'}),
dict(name='div', attrs={'class':'box boxArticleBasic boxComments boxTransparent'}), dict(name='div', attrs={'class':'box boxArticleBasic boxComments boxTransparent'}),
dict(name='div', attrs={'class':'box boxNavTabs '}), dict(name='div', attrs={'class':'box boxNavTabs'}),
dict(name='div', attrs={'class':'boxMMRgtLow'}),
dict(name='span', attrs={'class':'vote_455857'}), dict(name='span', attrs={'class':'vote_455857'}),
dict(name='div', attrs={'class':'relatedhalb'}), dict(name='div', attrs={'class':'relatedhalb'}),
dict(name='div', attrs={'class':'box boxListScrollOutline'}), dict(name='div', attrs={'class':'box boxListScrollOutline'}),
dict(name='div', attrs={'class':'box boxPhotoshow boxImgWide'}),
dict(name='div', attrs={'class':'box boxTeaser'}),
dict(name='div', attrs={'class':'tagCloud'}), dict(name='div', attrs={'class':'tagCloud'}),
dict(name='div', attrs={'class':'pollView'}),
dict(name='div', attrs={'class':'box boxArticleBasic boxNavTabsOutline'}), dict(name='div', attrs={'class':'box boxArticleBasic boxNavTabsOutline'}),
dict(name='div', attrs={'class':'ftdHpNav'}), dict(name='div', attrs={'class':'ftdHpNav'}),
dict(name='div', attrs={'class':'ftdHead'}), dict(name='div', attrs={'class':'ftdHead'}),
@ -67,11 +77,12 @@ class FTDe(BasicNewsRecipe):
dict(name='div', attrs={'class':'wertungoben'}), dict(name='div', attrs={'class':'wertungoben'}),
dict(name='div', attrs={'class':'artikelfuss'}), dict(name='div', attrs={'class':'artikelfuss'}),
dict(name='a', attrs={'class':'rating'}), dict(name='a', attrs={'class':'rating'}),
dict(name='a', attrs={'href':'#rt'}),
dict(name='div', attrs={'class':'articleOptionFootFrame'}), dict(name='div', attrs={'class':'articleOptionFootFrame'}),
dict(name='div', attrs={'class':'artikelsplitfaq'})] dict(name='div', attrs={'class':'artikelsplitfaq'})]
remove_tags_after = [dict(name='a', attrs={'class':'more'})] #remove_tags_after = [dict(name='a', attrs={'class':'more'})]
feeds = [ ('Finanzen', 'http://www.ftd.de/rss2/finanzen/maerkte'), feeds = [ ('Finanzen', 'http://www.ftd.de/rss2/finanzen/maerkte'),
('Meinungshungrige', 'http://www.ftd.de/rss2/meinungshungrige'), ('Meinungshungrige', 'http://www.ftd.de/rss2/meinungshungrige'),
('Unternehmen', 'http://www.ftd.de/rss2/unternehmen'), ('Unternehmen', 'http://www.ftd.de/rss2/unternehmen'),
('Politik', 'http://www.ftd.de/rss2/politik'), ('Politik', 'http://www.ftd.de/rss2/politik'),
@ -82,8 +93,8 @@ class FTDe(BasicNewsRecipe):
('Auto', 'http://www.ftd.de/rss2/auto'), ('Auto', 'http://www.ftd.de/rss2/auto'),
('Lifestyle', 'http://www.ftd.de/rss2/lifestyle') ('Lifestyle', 'http://www.ftd.de/rss2/lifestyle')
] ]
def print_version(self, url): def print_version(self, url):
return url + '?mode=print' return url.replace('.html', '.html?mode=print')

View File

@ -1,79 +1,79 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__author__ = 'Lorenzo Vigentini' __author__ = 'Lorenzo Vigentini'
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>' __copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
__version__ = 'v1.01' __version__ = 'v1.01'
__date__ = '14, January 2010' __date__ = '14, January 2010'
__description__ = 'Canadian Paper ' __description__ = 'Canadian Paper '
''' '''
http://www.ledevoir.com/ http://www.ledevoir.com/
''' '''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class ledevoir(BasicNewsRecipe): class ledevoir(BasicNewsRecipe):
author = 'Lorenzo Vigentini' author = 'Lorenzo Vigentini'
description = 'Canadian Paper' description = 'Canadian Paper'
cover_url = 'http://www.ledevoir.com/images/ul/graphiques/logo_devoir.gif' cover_url = 'http://www.ledevoir.com/images/ul/graphiques/logo_devoir.gif'
title = u'Le Devoir' title = u'Le Devoir'
publisher = 'leDevoir.com' publisher = 'leDevoir.com'
category = 'News, finance, economy, politics' category = 'News, finance, economy, politics'
language = 'fr' language = 'fr'
encoding = 'utf-8' encoding = 'utf-8'
timefmt = '[%a, %d %b, %Y]' timefmt = '[%a, %d %b, %Y]'
max_articles_per_feed = 50 max_articles_per_feed = 50
use_embedded_content = False use_embedded_content = False
recursion = 10 recursion = 10
remove_javascript = True remove_javascript = True
no_stylesheets = True no_stylesheets = True
keep_only_tags = [ keep_only_tags = [
dict(name='div', attrs={'id':'article'}), dict(name='div', attrs={'id':'article'}),
dict(name='ul', attrs={'id':'ariane'}) dict(name='ul', attrs={'id':'ariane'})
] ]
remove_tags = [ remove_tags = [
dict(name='div', attrs={'id':'dialog'}), dict(name='div', attrs={'id':'dialog'}),
dict(name='div', attrs={'class':['interesse_actions','reactions']}), dict(name='div', attrs={'class':['interesse_actions','reactions']}),
dict(name='ul', attrs={'class':'mots_cles'}), dict(name='ul', attrs={'class':'mots_cles'}),
dict(name='a', attrs={'class':'haut'}), dict(name='a', attrs={'class':'haut'}),
dict(name='h5', attrs={'class':'interesse_actions'}) dict(name='h5', attrs={'class':'interesse_actions'})
] ]
feeds = [ feeds = [
(u'A la une', 'http://www.ledevoir.com/rss/manchettes.xml'), (u'A la une', 'http://www.ledevoir.com/rss/manchettes.xml'),
(u'Edition complete', 'http://feeds2.feedburner.com/fluxdudevoir'), (u'Edition complete', 'http://feeds2.feedburner.com/fluxdudevoir'),
(u'Opinions', 'http://www.ledevoir.com/rss/opinions.xml'), (u'Opinions', 'http://www.ledevoir.com/rss/opinions.xml'),
(u'Chroniques', 'http://www.ledevoir.com/rss/chroniques.xml'), (u'Chroniques', 'http://www.ledevoir.com/rss/chroniques.xml'),
(u'Politique', 'http://www.ledevoir.com/rss/section/politique.xml?id=51'), (u'Politique', 'http://www.ledevoir.com/rss/section/politique.xml?id=51'),
(u'International', 'http://www.ledevoir.com/rss/section/international.xml?id=76'), (u'International', 'http://www.ledevoir.com/rss/section/international.xml?id=76'),
(u'Culture', 'http://www.ledevoir.com/rss/section/culture.xml?id=48'), (u'Culture', 'http://www.ledevoir.com/rss/section/culture.xml?id=48'),
(u'Environnement', 'http://www.ledevoir.com/rss/section/environnement.xml?id=78'), (u'Environnement', 'http://www.ledevoir.com/rss/section/environnement.xml?id=78'),
(u'Societe', 'http://www.ledevoir.com/rss/section/societe.xml?id=52'), (u'Societe', 'http://www.ledevoir.com/rss/section/societe.xml?id=52'),
(u'Economie', 'http://www.ledevoir.com/rss/section/economie.xml?id=49'), (u'Economie', 'http://www.ledevoir.com/rss/section/economie.xml?id=49'),
(u'Sports', 'http://www.ledevoir.com/rss/section/sports.xml?id=85'), (u'Sports', 'http://www.ledevoir.com/rss/section/sports.xml?id=85'),
(u'Loisirs', 'http://www.ledevoir.com/rss/section/loisirs.xml?id=50') (u'Loisirs', 'http://www.ledevoir.com/rss/section/loisirs.xml?id=50')
] ]
extra_css = ''' extra_css = '''
h1 {color:#1C1E7C;font-family:Times,Georgia,serif;font-size:1.85em;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:bold;line-height:1.2em;margin:0 0 5px;} h1 {color:#1C1E7C;font-family:Times,Georgia,serif;font-size:1.85em;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:bold;line-height:1.2em;margin:0 0 5px;}
h2 {color:#333333;font-family:Times,Georgia,serif;font-size:1.5em;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:normal;line-height:1.2em;margin:0 0 5px;} h2 {color:#333333;font-family:Times,Georgia,serif;font-size:1.5em;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:normal;line-height:1.2em;margin:0 0 5px;}
h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;} h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; } h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;} h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
.specs {line-height:1em;margin:1px 0;} .specs {line-height:1em;margin:1px 0;}
.specs span.auteur {font:0.85em/1.1em Arial, Verdana, sans-serif;color:#787878;} .specs span.auteur {font:0.85em/1.1em Arial, Verdana, sans-serif;color:#787878;}
.specs span.auteur a, .specs span.auteur a,
.specs span.auteur span {text-transform:uppercase;color:#787878;} .specs span.auteur span {text-transform:uppercase;color:#787878;}
.specs .date {font:0.85em/1.1em Arial, Verdana, sans-serif;color:#787878;} .specs .date {font:0.85em/1.1em Arial, Verdana, sans-serif;color:#787878;}
ul#ariane {list-style-type:none;margin:0;padding:5px 0 8px 0;font:0.85em/1.2em Arial, Verdana, sans-serif;color:#2E2E2E;border-bottom:10px solid #fff;} ul#ariane {list-style-type:none;margin:0;padding:5px 0 8px 0;font:0.85em/1.2em Arial, Verdana, sans-serif;color:#2E2E2E;border-bottom:10px solid #fff;}
ul#ariane li {display:inline;} ul#ariane li {display:inline;}
ul#ariane a {color:#2E2E2E;text-decoration:underline;} ul#ariane a {color:#2E2E2E;text-decoration:underline;}
.credit {color:#787878;font-size:0.71em;line-height:1.1em;font-weight:bold;} .credit {color:#787878;font-size:0.71em;line-height:1.1em;font-weight:bold;}
.texte {font-size:1.15em;line-height:1.4em;margin-bottom:17px;} .texte {font-size:1.15em;line-height:1.4em;margin-bottom:17px;}
''' '''