diff --git a/src/calibre/web/feeds/recipes/recipe_jpost.py b/src/calibre/web/feeds/recipes/recipe_jpost.py index d183a388ff..79cb653afc 100644 --- a/src/calibre/web/feeds/recipes/recipe_jpost.py +++ b/src/calibre/web/feeds/recipes/recipe_jpost.py @@ -1,37 +1,29 @@ -import re from calibre.web.feeds.news import BasicNewsRecipe class JerusalemPost(BasicNewsRecipe): - title = 'Jerusalem Post' + title = 'Jerusalem Post' description = 'News from Israel and the Middle East' use_embedded_content = False __author__ = 'Kovid Goyal' max_articles_per_feed = 10 - - - preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in - [ - (r'.*?' , lambda match : ''), - (r'.*?', lambda match : ''), - (r'.*?', lambda match : ''), - (r'.*?', lambda match : ''), - (r'
', lambda match : ''), - (r'\'NWAnews.com', lambda match : ''), - (r'', lambda match : ''), - (r'

.*?', lambda match : ''), - - ] - ] + remove_tags_before = {'class':'byline'} + remove_tags = [ + {'class':['artAdBlock clearboth', 'tbartop', 'divdot_vrttbox', + 'slideshow']}, + dict(id=['artFontButtons', 'artRelatedBlock']), + ] + remove_tags_after = {'id':'artTxtBlock'} feeds = [ ('Front Page', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333346'), - ('Israel News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1178443463156'), - ('Middle East News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333498'), - ('International News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1178443463144'), - ('Editorials', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333211'), + ('Israel News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1178443463156'), + ('Middle East News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333498'), + ('International News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1178443463144'), + ('Editorials', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333211'), ] - def print_version(self, url): - return ('http://www.jpost.com/servlet/Satellite?cid=' + url.rpartition('&')[2] + '&pagename=JPost%2FJPArticle%2FPrinter') - + def postprocess_html(self, soup, first): + for tag in soup.findAll(name=['table', 'tr', 'td']): + tag.name = 'div' + return soup \ No newline at end of file