From 093404e2081f9b430c19dc1cc994b35126849ab2 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 14 Dec 2009 16:48:36 -0700 Subject: [PATCH] Improved recipe for Newsweek --- resources/recipes/newsweek.recipe | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/resources/recipes/newsweek.recipe b/resources/recipes/newsweek.recipe index ff408ca9a5..f6da941361 100644 --- a/resources/recipes/newsweek.recipe +++ b/resources/recipes/newsweek.recipe @@ -33,19 +33,21 @@ class Newsweek(BasicNewsRecipe): language = 'en' remove_tags = [ - {'class':['fwArticle noHr','fwArticle','subinfo','hdlBulletItem','head-content','navbar','link', 'ad', 'sponsorLinksArticle', 'mm-content', + {'class':['fwArticle noHr','fwArticle','hdlBulletItem','head-content','navbar','link', 'ad', 'sponsorLinksArticle', 'mm-content', 'inline-social-links-wrapper', 'email-article','ToolBox', + 'inline-promo-link', 'sponsorship', 'inlineComponentRight', 'comments-and-social-links-wrapper', 'EmailArticleBlock']}, {'id' : ['footer', 'ticker-data', 'topTenVertical', - 'digg-top-five', 'mesothorax', 'nw-comments', + 'digg-top-five', 'mesothorax', 'nw-comments', 'my-take-landing', 'ToolBox', 'EmailMain']}, {'class': re.compile('related-cloud')}, dict(name='li', attrs={'id':['slug_bigbox']}) ] - keep_only_tags = [{'class':['article HorizontalHeader', 'articlecontent','photoBox']}, ] + keep_only_tags = [{'class':['article HorizontalHeader', + 'articlecontent','photoBox', 'article columnist first']}, ] recursions = 1 match_regexps = [r'http://www.newsweek.com/id/\S+/page/\d+']