''' https://sciencex.com/ ''' from calibre.web.feeds.news import BasicNewsRecipe class scix(BasicNewsRecipe): title = 'Science X' description = ( 'Science X is a network of high-quality websites that provides the most complete and comprehensive ' 'daily coverage of science, technology, and medical news. Articles from phys.org, medicalxpress.com' '& techxplore.com' ) language = 'en' __author__ = 'unkn0wn' oldest_article = 1 # days max_articles_per_feed = 50 encoding = 'utf-8' remove_attributes = ['height', 'width'] ignore_duplicate_articles = {'url', 'title'} extra_css = ''' #figure {text-align:center; font-size:small;} em, blockquote {color:#202020;} .article__info, .article-byline, .article-main__more, .d-print-block {font-size:small; color:#404040;} ''' resolve_internal_links = True remove_empty_feeds = True keep_only_tags = [dict(name='article', attrs={'class':'news-article'})] feeds = [ ('Tech Xplore', 'https://techxplore.com/rss-feed/'), ('Medical Xpress', 'https://medicalxpress.com/rss-feed/'), ('Phys.org', 'https://phys.org/rss-feed/') #https://medicalxpress.com/feeds/ #https://techxplore.com/feeds/ ] def preprocess_html(self, soup): for figure in soup.findAll('figure'): figure['id'] = 'figure' return soup