from calibre.web.feeds.news import BasicNewsRecipe import re class NatureNews(BasicNewsRecipe): title = u'Nature News' language = 'en' __author__ = 'Krittika Goyal, Starson17' oldest_article = 31 #days remove_empty_feeds = True max_articles_per_feed = 50 no_stylesheets = True remove_tags_before = dict(name='h1', attrs={'class':'heading entry-title'}) remove_tags_after = dict(name='h2', attrs={'id':'comments'}) remove_tags = [ dict(name='h2', attrs={'id':'comments'}), dict(attrs={'alt':'Advertisement'}), dict(name='div', attrs={'class':'ad'}), ] preprocess_regexps = [ (re.compile(r'
ADVERTISEMENT
', re.DOTALL|re.IGNORECASE), lambda match: ''), ] feeds = [('Nature News', 'http://feeds.nature.com/news/rss/most_recent')]