import re from calibre.web.feeds.news import BasicNewsRecipe class JerusalemPost(BasicNewsRecipe): title = 'Jerusalem Post' description = 'News from Israel and the Middle East' use_embedded_content = False language = 'en' __author__ = 'Kovid Goyal' max_articles_per_feed = 10 no_stylesheets = True feeds = [ ('Front Page', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333346'), ('Israel News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1178443463156'), ('Middle East News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333498'), ('International News', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1178443463144'), ('Editorials', 'http://www.jpost.com/servlet/Satellite?pagename=JPost/Page/RSS&cid=1123495333211'), ] remove_tags = [ dict(id=lambda x: x and 'ads.' in x), dict(attrs={'class':['printinfo', 'tt1']}), dict(onclick='DoPrint()'), dict(name='input'), ] conversion_options = {'linearize_tables':True} def preprocess_html(self, soup): for tag in soup.findAll('form'): tag.name = 'div' return soup def print_version(self, url): m = re.search(r'(ID|id)=(\d+)', url) if m is not None: id_ = m.group(2) return 'http://www.jpost.com/LandedPages/PrintArticle.aspx?id=%s'%id_ return url