diff --git a/recipes/icons/newsweek.png b/recipes/icons/newsweek.png deleted file mode 100644 index 662c92ed11..0000000000 Binary files a/recipes/icons/newsweek.png and /dev/null differ diff --git a/recipes/newsweek.recipe b/recipes/newsweek.recipe deleted file mode 100644 index fc55dac112..0000000000 --- a/recipes/newsweek.recipe +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# License: GPLv3 Copyright: 2015, Kovid Goyal - -import json -from calibre.web.feeds.news import BasicNewsRecipe -from collections import defaultdict - -BASE = 'https://www.newsweek.com' - - -def href_to_url(a, add_piano=False): - return BASE + a.get('href') + ('?piano_d=1' if add_piano else '') - - -def class_sels(*args): - q = set(args) - return dict(attrs={'class': lambda x: x and set(x.split()).intersection(q)}) - - -class Newsweek(BasicNewsRecipe): - - title = 'Newsweek' - __author__ = 'Kovid Goyal' - description = 'Weekly news and current affairs in the US' - language = 'en' - encoding = 'utf-8' - no_stylesheets = True - requires_version = (1, 40, 0) - - keep_only_tags = [ - dict(id='block-nw-magazine-article-header'), - class_sels('article-header', 'article-body') - ] - remove_tags = [ - dict(name=['aside', 'meta', 'source']), - class_sels( - 'block-openadstream', 'block-ibtmedia-social', 'issue-next', - 'most-popular', 'ibt-media-stories', 'user-btn-group', - 'trial-link', 'trc_related_container', - 'block-ibtmedia-top-stories', 'videocontent', 'newsletter-signup', - 'in-text-slideshows', 'content-correction', 'article-navigation' - ), - dict(id=['taboola-below-main-column', 'piano-root', - 'block-nw-magazine-magazine-more-from-issue']), - ] - remove_attributes = ['style'] - - def parse_index(self): - root = self.index_to_soup( - 'https://www.newsweek.com/archive', as_tree=True) - li = root.xpath( - '//ul[contains(@class, "magazine-archive-items")]/li')[0] - a = li.xpath('descendant::a[@href]')[0] - url = href_to_url(a, add_piano=True) - self.timefmt = self.tag_to_string(a) - img = li.xpath('descendant::a[@href]//source[@type="image/jpeg"]/@srcset')[0] - self.cover_url = img.partition('?')[0] - self.log('Found cover url:', self.cover_url) - root = self.index_to_soup(url, as_tree=True) - features = [] - for article in root.xpath('//div[@class="magazine-features"]//article'): - a = article.xpath('descendant::a[@class="article-link"]')[0] - title = self.tag_to_string(a) - url = href_to_url(a) - desc = '' - s = article.xpath('descendant::div[@class="summary"]') - if s: - desc = self.tag_to_string(s[0]) - features.append({'title': title, 'url': href_to_url(a), 'description': desc}) - self.log(title, url) - - index = [('Features', features)] - sections = defaultdict(list) - for widget in ('editor-pick',): - self.parse_widget(widget, sections) - for k in sorted(sections): - index.append((k, sections[k])) - return index - - def parse_widget(self, widget, sections): - raw = self.index_to_soup('https://d.newsweek.com/json/' + widget, raw=True) - data = json.loads(raw)['items'] - for item in data: - title = item['title'] - url = BASE + item['link'] - self.log(title, url) - sections[item['label']].append( - { - 'title': title, - 'url': url, - 'description': item['description'], - }) - - def preprocess_html(self, soup): - # Parallax images in the articles are loaded as background images - # on tags. Convert them to normal images. - for span in soup.findAll('span', attrs={'class': lambda x: x and 'parallax' in x.split()}): - s = span.find(style=True) - if s is not None: - url = s['style'].partition('(')[-1][:-1] - s['style'] = 'display: block' - s.name = 'img' - s['src'] = url - for img in soup.findAll('img', attrs={'data-src': True}): - img['src'] = img['data-src'] - return soup