from calibre.web.feeds.news import BasicNewsRecipe class Athletic(BasicNewsRecipe): title = u'The Athletic' __author__ = 'unkn0wn' description = 'The Athletic delivers powerful stories and smart analysis that bring sports fans closer to the heart of the game. From breaking news and live commentary, to deeply-reported long reads and exclusive interviews, subscribers rely on The Athletic for every sports story that matters.' # noqa masthead_url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/9/95/The_Athletic_wordmark_black_2020.svg/640px-The_Athletic_wordmark_black_2020.svg.png' # noqa language = 'en' oldest_article = 1.15 # days max_articles_per_feed = 50 encoding = 'utf-8' use_embedded_content = False no_stylesheets = True remove_attributes = ['style', 'height', 'width'] ignore_duplicate_articles = {'url'} remove_empty_feeds = True extra_css = ''' #articleByLineString{font-size:small;} .inline-credits{font-size:small; text-align:center;} ''' keep_only_tags = [ dict(name='amp-img', attrs={'class': 'i-amphtml-layout-fill'}), dict(name='div', attrs={'class': ['the-lead-article', 'article-container']}) ] remove_tags = [ dict(name='i-amphtml-sizer') ] feeds = [ ('The Athletic Ink', 'https://theathletic.com/ink/?rss'), ('Football', 'https://theathletic.com/football/?rss'), ('Boxing', 'https://theathletic.com/boxing/?rss'), ('MMA', 'https://theathletic.com/mma/?rss'), ('Motorsports', 'https://theathletic.com/motorsports/?rss'), ('NBA', 'https://theathletic.com/nba/?rss'), ('NHL', 'https://theathletic.com/nhl/?rss'), ('Olympics', 'https://theathletic.com/olympics/?rss'), ('Culture', 'https://theathletic.com/culture/?rss'), ('Others', 'https://theathletic.com/rss-feed/'), # All Articles # just add '/?rss' to the sections you'd like to get.. there's too many ] def preprocess_html(self, soup): for img in soup.findAll('amp-img'): if not img.find('img'): img.name = 'img' return soup def print_version(self, url): reset = url.split('?')[0] + '?amp=1' return reset