From cd8236c1d1816d9676683eac7a043e5c8d2f3564 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 23 May 2012 22:56:16 +0530 Subject: [PATCH] Fix Business Week Magazine --- recipes/bwmagazine.recipe | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/recipes/bwmagazine.recipe b/recipes/bwmagazine.recipe index 9a1f10a680..d11861ce08 100644 --- a/recipes/bwmagazine.recipe +++ b/recipes/bwmagazine.recipe @@ -15,6 +15,7 @@ class BusinessWeek(BasicNewsRecipe): oldest_article = 7 max_articles_per_feed = 200 no_stylesheets = True + auto_cleanup = True encoding = 'utf8' use_embedded_content = False language = 'en' @@ -36,12 +37,12 @@ class BusinessWeek(BasicNewsRecipe): , 'language' : language } - remove_tags = [ - dict(attrs={'class':'inStory'}) - ,dict(name=['meta','link','iframe','base','embed','object','table','th','tr','td']) - ,dict(attrs={'id':['inset','videoDisplay']}) - ] - keep_only_tags = [dict(name='div', attrs={'id':['story-body','storyBody']})] + #remove_tags = [ + #dict(attrs={'class':'inStory'}) + #,dict(name=['meta','link','iframe','base','embed','object','table','th','tr','td']) + #,dict(attrs={'id':['inset','videoDisplay']}) + #] + #keep_only_tags = [dict(name='div', attrs={'id':['story-body','storyBody']})] remove_attributes = ['lang'] match_regexps = [r'http://www.businessweek.com/.*_page_[1-9].*'] @@ -100,3 +101,4 @@ class BusinessWeek(BasicNewsRecipe): tstr = alink.string alink.replaceWith(tstr) return soup +