Fix USA Today news website recipe

This commit is contained in:
Aurélien Chabot 2011-10-16 11:24:15 +02:00
parent 4bc485f6a4
commit 1bb39ffd77

View File

@ -10,27 +10,28 @@ from calibre.web.feeds.news import BasicNewsRecipe
class USAToday(BasicNewsRecipe): class USAToday(BasicNewsRecipe):
title = 'USA Today' title = 'USA Today'
__author__ = 'Kovid Goyal' __author__ = 'calibre'
oldest_article = 1 description = 'newspaper'
publication_type = 'newspaper' encoding = 'utf-8'
timefmt = '' publisher = 'usatoday.com'
max_articles_per_feed = 20 category = 'news, usa'
language = 'en' language = 'en'
no_stylesheets = True
extra_css = '.headline {text-align: left;}\n \ use_embedded_content = False
.byline {font-family: monospace; \ timefmt = ' [%d %b %Y]'
text-align: left; \ max_articles_per_feed = 15
margin-bottom: 1em;}\n \ no_stylesheets = True
.image {text-align: center;}\n \ remove_empty_feeds = True
.caption {text-align: center; \ filterDuplicates = True
font-size: smaller; \
font-style: italic}\n \ extra_css = '''
.credit {text-align: right; \ h1, h2 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
margin-bottom: 0em; \ #post-attributes, .info, .clear {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
font-size: smaller;}\n \ #post-body, #content {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
.articleBody {text-align: left;}\n ' '''
#simultaneous_downloads = 1
feeds = [ feeds = [
('Top Headlines', 'http://rssfeeds.usatoday.com/usatoday-NewsTopStories'), ('Top Headlines', 'http://rssfeeds.usatoday.com/usatoday-NewsTopStories'),
('Tech Headlines', 'http://rssfeeds.usatoday.com/usatoday-TechTopStories'), ('Tech Headlines', 'http://rssfeeds.usatoday.com/usatoday-TechTopStories'),
@ -43,15 +44,18 @@ class USAToday(BasicNewsRecipe):
('Sport Headlines', 'http://rssfeeds.usatoday.com/UsatodaycomSports-TopStories'), ('Sport Headlines', 'http://rssfeeds.usatoday.com/UsatodaycomSports-TopStories'),
('Weather Headlines', 'http://rssfeeds.usatoday.com/usatoday-WeatherTopStories'), ('Weather Headlines', 'http://rssfeeds.usatoday.com/usatoday-WeatherTopStories'),
('Most Popular', 'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles'), ('Most Popular', 'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles'),
('Offbeat News', 'http://rssfeeds.usatoday.com/UsatodaycomOffbeat-TopStories'), ('Offbeat News', 'http://rssfeeds.usatoday.com/UsatodaycomOffbeat-TopStories')
] ]
keep_only_tags = [dict(attrs={'class':'story'})] keep_only_tags = [dict(attrs={'class':'story'})]
remove_tags = [ remove_tags = [
dict(attrs={'class':[ dict(attrs={'class':[
'share', 'share',
'reprints', 'reprints',
'inline-h3', 'inline-h3',
'info-extras', 'info-extras rounded',
'inset',
'ppy-outer', 'ppy-outer',
'ppy-caption', 'ppy-caption',
'comments', 'comments',
@ -61,9 +65,13 @@ class USAToday(BasicNewsRecipe):
'tags', 'tags',
'bottom-tools', 'bottom-tools',
'sponsoredlinks', 'sponsoredlinks',
'corrections'
]}), ]}),
dict(name='ul', attrs={'class':'inside-copy'}),
dict(id=['pluck']), dict(id=['pluck']),
] dict(id=['updated']),
dict(id=['post-date-updated'])
]
def get_masthead_url(self): def get_masthead_url(self):