Fix Seattle Times

This commit is contained in:
Kovid Goyal 2012-01-01 08:14:00 +05:30
parent 176d8dc762
commit 56bbaf7441

View File

@ -20,6 +20,8 @@ class SeattleTimes(BasicNewsRecipe):
use_embedded_content = False use_embedded_content = False
encoding = 'cp1252' encoding = 'cp1252'
language = 'en' language = 'en'
auto_cleanup = True
auto_cleanup_keep = '//div[@id="PhotoContainer"]'
feeds = [ feeds = [
(u'Top Stories', (u'Top Stories',
@ -69,24 +71,4 @@ class SeattleTimes(BasicNewsRecipe):
u'http://seattletimes.nwsource.com/rss/mostreadarticles.xml'), u'http://seattletimes.nwsource.com/rss/mostreadarticles.xml'),
] ]
keep_only_tags = [dict(id='content')]
remove_tags = [
dict(name=['object','link','script']),
{'class':['permission', 'note', 'bottomtools',
'homedelivery']},
dict(id=["rightcolumn", 'footer', 'adbottom']),
]
def print_version(self, url):
return url
start_url, sep, rest_url = url.rpartition('_')
rurl, rsep, article_id = start_url.rpartition('/')
return u'http://seattletimes.nwsource.com/cgi-bin/PrintStory.pl?document_id=' + article_id
def preprocess_html(self, soup):
mtag = '<meta http-equiv="Content-Language" content="en-US"/>'
soup.head.insert(0,mtag)
for item in soup.findAll(style=True):
del item['style']
return soup