Fix Business Week Magazine

This commit is contained in:
Kovid Goyal 2012-05-23 22:56:16 +05:30
parent 4833df56c8
commit cd8236c1d1

View File

@ -15,6 +15,7 @@ class BusinessWeek(BasicNewsRecipe):
oldest_article = 7
max_articles_per_feed = 200
no_stylesheets = True
auto_cleanup = True
encoding = 'utf8'
use_embedded_content = False
language = 'en'
@ -36,12 +37,12 @@ class BusinessWeek(BasicNewsRecipe):
, 'language' : language
}
remove_tags = [
dict(attrs={'class':'inStory'})
,dict(name=['meta','link','iframe','base','embed','object','table','th','tr','td'])
,dict(attrs={'id':['inset','videoDisplay']})
]
keep_only_tags = [dict(name='div', attrs={'id':['story-body','storyBody']})]
#remove_tags = [
#dict(attrs={'class':'inStory'})
#,dict(name=['meta','link','iframe','base','embed','object','table','th','tr','td'])
#,dict(attrs={'id':['inset','videoDisplay']})
#]
#keep_only_tags = [dict(name='div', attrs={'id':['story-body','storyBody']})]
remove_attributes = ['lang']
match_regexps = [r'http://www.businessweek.com/.*_page_[1-9].*']
@ -100,3 +101,4 @@ class BusinessWeek(BasicNewsRecipe):
tstr = alink.string
alink.replaceWith(tstr)
return soup