from calibre.web.feeds.recipes import BasicNewsRecipe def classes(classes): q = frozenset(classes.split(' ')) return dict(attrs={ 'class': lambda x: x and frozenset(x.split()).intersection(q)}) class NewZealandHerald(BasicNewsRecipe): title = 'New Zealand Herald' __author__ = 'Kovid Goyal' description = 'Daily news' timefmt = ' [%d %b, %Y]' language = 'en_NZ' oldest_article = 2.5 keep_only_tags = [ classes('article-header'), dict(id='article-content'), ] remove_tags = [ classes('ad-container pb-f-video-video-player pb-f-article-related-articles social-shares') ] feeds = [ ('Business', 'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000003.xml'), ('World', 'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000002.xml'), ('National', 'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000001.xml'), ('Entertainment', 'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_001501119.xml'), ('Travel', 'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000007.xml'), ('Opinion', 'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000466.xml'), ('Life & Style', 'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000006.xml'), ('Technology', 'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000005.xml'), ('Sport', 'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000004.xml'), ('Motoring', 'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000009.xml'), ('Property', 'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000008.xml'), ] def preprocess_html(self, soup, *a): for img in soup.findAll('img', attrs={'data-srcset': True}): img['src'] = img['data-srcset'].split()[0] return soup