from calibre.web.feeds.news import BasicNewsRecipe def classes(classes): q = frozenset(classes.split(' ')) return dict(attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)}) class AdvancedUserRecipe1298137661(BasicNewsRecipe): title = u'Helsingin Sanomat' __author__ = 'oneillpt' language = 'fi' oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True remove_javascript = True keep_only_tags = [ classes('article-title single-article'), ] remove_tags = [ dict(attrs={'class':['hidden print-url', 'article-paywall']}), dict(style=lambda x: x and 'height: 0' in x), ] feeds = [ (u'Uutiset - HS.fi', u'https://www.hs.fi/uutiset/rss/'), ] def preprocess_html(self, soup): for tag in soup.findAll(attrs={'data-mfp-src':True}): tag.name = 'img' tag['src'] = tag['data-mfp-src'] tag['style'] = 'display:block' return soup