From 56bbaf7441f84966bd6a1059b08f5fa4a883b64b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 1 Jan 2012 08:14:00 +0530 Subject: [PATCH] Fix Seattle Times --- recipes/seattle_times.recipe | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/recipes/seattle_times.recipe b/recipes/seattle_times.recipe index 1d72df2991..5fa6dac066 100644 --- a/recipes/seattle_times.recipe +++ b/recipes/seattle_times.recipe @@ -20,6 +20,8 @@ class SeattleTimes(BasicNewsRecipe): use_embedded_content = False encoding = 'cp1252' language = 'en' + auto_cleanup = True + auto_cleanup_keep = '//div[@id="PhotoContainer"]' feeds = [ (u'Top Stories', @@ -69,24 +71,4 @@ class SeattleTimes(BasicNewsRecipe): u'http://seattletimes.nwsource.com/rss/mostreadarticles.xml'), ] - keep_only_tags = [dict(id='content')] - remove_tags = [ - dict(name=['object','link','script']), - {'class':['permission', 'note', 'bottomtools', - 'homedelivery']}, - dict(id=["rightcolumn", 'footer', 'adbottom']), - ] - - def print_version(self, url): - return url - start_url, sep, rest_url = url.rpartition('_') - rurl, rsep, article_id = start_url.rpartition('/') - return u'http://seattletimes.nwsource.com/cgi-bin/PrintStory.pl?document_id=' + article_id - - def preprocess_html(self, soup): - mtag = '' - soup.head.insert(0,mtag) - for item in soup.findAll(style=True): - del item['style'] - return soup