diff --git a/resources/recipes/jpost.recipe b/resources/recipes/jpost.recipe index b4b7d19c3c..8f1cdf73f4 100644 --- a/resources/recipes/jpost.recipe +++ b/resources/recipes/jpost.recipe @@ -10,22 +10,19 @@ class JerusalemPost(BasicNewsRecipe): __author__ = 'Kovid Goyal' max_articles_per_feed = 10 no_stylesheets = True - remove_tags_before = {'class':'byline'} - remove_tags = [ - {'class':['artAdBlock clearboth', 'tbartop', 'divdot_vrttbox', - 'slideshow']}, - dict(id=['artFontButtons', 'artRelatedBlock']), - ] - remove_tags_after = {'id':'artTxtBlock'} - + remove_tags_before = {'class':'jp-grid-content'} + remove_tags_after = {'id':'body_val'} + feeds = [ ('Front Page', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333346'), ('Israel News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1178443463156'), ('Middle East News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333498'), ('International News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1178443463144'), ('Editorials', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333211'), ] - - def postprocess_html(self, soup, first): - for tag in soup.findAll(name=['table', 'tr', 'td']): - tag.name = 'div' - return soup \ No newline at end of file + + def preprocess_html(self, soup): + for x in soup.findAll(name=['form', 'input']): + x.name = 'div' + for x in soup.findAll('body', style=True): + del x['style'] + return soup