diff --git a/recipes/harpers.recipe b/recipes/harpers.recipe index a4576792d0..35210a5078 100644 --- a/recipes/harpers.recipe +++ b/recipes/harpers.recipe @@ -16,6 +16,7 @@ class Harpers(BasicNewsRecipe): max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False + auto_cleanup = True conversion_options = { 'comment' : description @@ -31,14 +32,14 @@ class Harpers(BasicNewsRecipe): .caption{font-family:Verdana,sans-serif;font-size:x-small;color:#666666;} ''' - keep_only_tags = [ dict(name='div', attrs={'id':'cached'}) ] - remove_tags = [ - dict(name='table', attrs={'class':['rcnt','rcnt topline']}) - ,dict(name=['link','object','embed','meta','base']) - ] - remove_attributes = ['width','height'] + #keep_only_tags = [ dict(name='div', attrs={'id':'cached'}) ] + #remove_tags = [ + #dict(name='table', attrs={'class':['rcnt','rcnt topline']}) + #,dict(name=['link','object','embed','meta','base']) + #] + #remove_attributes = ['width','height'] - feeds = [(u"Harper's Magazine", u'http://www.harpers.org/rss/frontpage-rss20.xml')] + feeds = [(u"Harper's Magazine", u'http://harpers.org/feed/')] def get_cover_url(self): cover_url = None @@ -49,9 +50,9 @@ class Harpers(BasicNewsRecipe): cover_url = 'http://harpers.org' + link_item['src'] return cover_url - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - for item in soup.findAll(xmlns=True): - del item['xmlns'] - return soup + #def preprocess_html(self, soup): + #for item in soup.findAll(style=True): + #del item['style'] + #for item in soup.findAll(xmlns=True): + #del item['xmlns'] + #return soup