From 17c0f31808e2cf71079945724f8d3cdecf338c5e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 8 Feb 2010 21:17:35 -0700 Subject: [PATCH] Fix #4768 (Jerusalem Post Failed Feed) --- resources/recipes/jpost.recipe | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/resources/recipes/jpost.recipe b/resources/recipes/jpost.recipe index b4b7d19c3c..8f1cdf73f4 100644 --- a/resources/recipes/jpost.recipe +++ b/resources/recipes/jpost.recipe @@ -10,22 +10,19 @@ class JerusalemPost(BasicNewsRecipe): __author__ = 'Kovid Goyal' max_articles_per_feed = 10 no_stylesheets = True - remove_tags_before = {'class':'byline'} - remove_tags = [ - {'class':['artAdBlock clearboth', 'tbartop', 'divdot_vrttbox', - 'slideshow']}, - dict(id=['artFontButtons', 'artRelatedBlock']), - ] - remove_tags_after = {'id':'artTxtBlock'} - + remove_tags_before = {'class':'jp-grid-content'} + remove_tags_after = {'id':'body_val'} + feeds = [ ('Front Page', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333346'), ('Israel News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1178443463156'), ('Middle East News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333498'), ('International News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1178443463144'), ('Editorials', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333211'), ] - - def postprocess_html(self, soup, first): - for tag in soup.findAll(name=['table', 'tr', 'td']): - tag.name = 'div' - return soup \ No newline at end of file + + def preprocess_html(self, soup): + for x in soup.findAll(name=['form', 'input']): + x.name = 'div' + for x in soup.findAll('body', style=True): + del x['style'] + return soup