#!/usr/bin/env python # vim:fileencoding=utf-8 ''' https://sciencex.com/ ''' from calibre.web.feeds.news import BasicNewsRecipe class scix(BasicNewsRecipe): title = 'Science X' description = ( 'Science X is a network of high-quality websites that provides the most complete and comprehensive ' 'daily coverage of science, technology, and medical news. Articles from phys.org, medicalxpress.com' '& techxplore.com' ) language = 'en' __author__ = 'unkn0wn' oldest_article = 1 # days max_articles_per_feed = 50 encoding = 'utf-8' remove_attributes = ['height', 'width'] ignore_duplicate_articles = {'url', 'title'} extra_css = ''' #figure {text-align:center; font-size:small;} em, blockquote {color:#202020;} .article__info, .article-byline, .article-main__more, .d-print-block {font-size:small; color:#404040;} ''' recipe_specific_options = { 'days': { 'short': 'Oldest article to download from this news source. In days ', 'long': 'For example, 0.5, gives you articles from the past 12 hours', 'default': str(oldest_article) } } def __init__(self, *args, **kwargs): BasicNewsRecipe.__init__(self, *args, **kwargs) d = self.recipe_specific_options.get('days') if d and isinstance(d, str): self.oldest_article = float(d) resolve_internal_links = True remove_empty_feeds = True keep_only_tags = [dict(name='article', attrs={'class':'news-article'})] feeds = [ ('Tech Xplore', 'https://techxplore.com/rss-feed/'), ('Medical Xpress', 'https://medicalxpress.com/rss-feed/'), ('Phys.org', 'https://phys.org/rss-feed/') # https://medicalxpress.com/feeds/ # https://techxplore.com/feeds/ ] def preprocess_html(self, soup): for figure in soup.findAll('figure'): figure['id'] = 'figure' return soup