Update Newsweek

Fixes #1866636 [newsweek won't download](https://bugs.launchpad.net/calibre/+bug/1866636)
2025-07-09 03:04:10 -04:00 · 2020-03-09 22:00:00 +05:30 · 2020-03-09 22:00:00 +05:30 · 6e4ed94a6b
commit 6e4ed94a6b
parent 948a15965e
1 changed files with 31 additions and 43 deletions
--- a/recipes/newsweek.recipe
+++ b/recipes/newsweek.recipe
@ -1,3 +1,8 @@
 #!/usr/bin/env python
 # vim:fileencoding=utf-8
 # License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
 import json
 from calibre.web.feeds.news import BasicNewsRecipe
 from collections import defaultdict
@ -49,28 +54,23 @@ class Newsweek(BasicNewsRecipe):
        a = li.xpath('descendant::a[@href]')[0]
        url = href_to_url(a, add_piano=True)
        self.timefmt = self.tag_to_string(a)
-        img = li.xpath('descendant::a[@href]//img[@data-src]')[0]
+        img = li.xpath('descendant::a[@href]//source[@type="image/jpeg"]/@srcset')[0]
-        self.cover_url = img.get('data-src').partition('?')[0]
+        self.cover_url = img.partition('?')[0]
        self.log('Found cover url:', self.cover_url)
        root = self.index_to_soup(url, as_tree=True)
        features = []
-        try:
+        for article in root.xpath('//div[@class="magazine-features"]//article'):
-            div = root.xpath('//div[@class="magazine-features"]')[0]
+            a = article.xpath('descendant::a[@class="article-link"]')[0]
        except IndexError:
            pass
        else:
            for a in div.xpath('descendant::div[@class="h1"]//a[@href]'):
            title = self.tag_to_string(a)
-                article = a.xpath('ancestor::article')[0]
+            url = href_to_url(a)
            desc = ''
            s = article.xpath('descendant::div[@class="summary"]')
            if s:
                desc = self.tag_to_string(s[0])
            features.append({'title': title, 'url': href_to_url(a), 'description': desc})
-                self.log(title, href_to_url(a))
+            self.log(title, url)
-        index = []
+        index = [('Features', features)]
        if features:
            index.append(('Features', features))
        sections = defaultdict(list)
        for widget in ('editor-pick',):
            self.parse_widget(widget, sections)
@ -79,30 +79,18 @@ class Newsweek(BasicNewsRecipe):
        return index
    def parse_widget(self, widget, sections):
-        root = self.index_to_soup('https://d.newsweek.com/widget/' + widget, as_tree=True)
+        raw = self.index_to_soup('https://d.newsweek.com/json/' + widget, raw=True)
-        div = root.xpath('//div')[0]
+        data = json.loads(raw)['items']
-        href_xpath = 'descendant::*[local-name()="h1" or local-name()="h2" or local-name()="h3" or local-name()="h4"]/a[@href]'
+        for item in data:
-        for a in div.xpath(href_xpath):
+            title = item['title']
-            title = self.tag_to_string(a)
+            url = BASE + item['link']
-            article = a.xpath('ancestor::article')[0]
+            self.log(title, url)
-            desc = ''
+            sections[item['label']].append(
-            s = article.xpath('descendant::div[@class="summary"]')
+                {
-            if s:
+                    'title': title,
-                desc = self.tag_to_string(s[0])
+                    'url': url,
-            sec = article.xpath('descendant::div[@class="category"]')
+                    'description': item['description'],
-            if sec:
+                })
                sec = self.tag_to_string(sec[0])
            else:
                sec = 'Articles'
            sections[sec].append(
                {'title': title, 'url': href_to_url(a), 'description': desc})
            self.log(title, href_to_url(a))
            if desc:
                self.log('\t' + desc)
            self.log('')
    def print_version(self, url):
        return url + '?piano_d=1'
    def preprocess_html(self, soup):
        # Parallax images in the articles are loaded as background images