From 6e4ed94a6b525fbc05deb4799481eb2089a12d7a Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 9 Mar 2020 22:00:00 +0530
Subject: [PATCH] Update Newsweek

Fixes #1866636 [newsweek won't download](https://bugs.launchpad.net/calibre/+bug/1866636)
---
 recipes/newsweek.recipe | 74 +++++++++++++++++------------------------
 1 file changed, 31 insertions(+), 43 deletions(-)
diff --git a/recipes/newsweek.recipe b/recipes/newsweek.recipe
index a8dc8d91e6..fc55dac112 100644
--- a/recipes/newsweek.recipe
+++ b/recipes/newsweek.recipe
@@ -1,3 +1,8 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+# License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
+
+import json
 from calibre.web.feeds.news import BasicNewsRecipe
 from collections import defaultdict
 
@@ -49,28 +54,23 @@ class Newsweek(BasicNewsRecipe):
         a = li.xpath('descendant::a[@href]')[0]
         url = href_to_url(a, add_piano=True)
         self.timefmt = self.tag_to_string(a)
-        img = li.xpath('descendant::a[@href]//img[@data-src]')[0]
-        self.cover_url = img.get('data-src').partition('?')[0]
+        img = li.xpath('descendant::a[@href]//source[@type="image/jpeg"]/@srcset')[0]
+        self.cover_url = img.partition('?')[0]
+        self.log('Found cover url:', self.cover_url)
         root = self.index_to_soup(url, as_tree=True)
         features = []
-        try:
-            div = root.xpath('//div[@class="magazine-features"]')[0]
-        except IndexError:
-            pass
-        else:
-            for a in div.xpath('descendant::div[@class="h1"]//a[@href]'):
-                title = self.tag_to_string(a)
-                article = a.xpath('ancestor::article')[0]
-                desc = ''
-                s = article.xpath('descendant::div[@class="summary"]')
-                if s:
-                    desc = self.tag_to_string(s[0])
-                features.append({'title': title, 'url': href_to_url(a), 'description': desc})
-                self.log(title, href_to_url(a))
+        for article in root.xpath('//div[@class="magazine-features"]//article'):
+            a = article.xpath('descendant::a[@class="article-link"]')[0]
+            title = self.tag_to_string(a)
+            url = href_to_url(a)
+            desc = ''
+            s = article.xpath('descendant::div[@class="summary"]')
+            if s:
+                desc = self.tag_to_string(s[0])
+            features.append({'title': title, 'url': href_to_url(a), 'description': desc})
+            self.log(title, url)
 
-        index = []
-        if features:
-            index.append(('Features', features))
+        index = [('Features', features)]
         sections = defaultdict(list)
         for widget in ('editor-pick',):
             self.parse_widget(widget, sections)
@@ -79,30 +79,18 @@ class Newsweek(BasicNewsRecipe):
         return index
 
     def parse_widget(self, widget, sections):
-        root = self.index_to_soup('https://d.newsweek.com/widget/' + widget, as_tree=True)
-        div = root.xpath('//div')[0]
-        href_xpath = 'descendant::*[local-name()="h1" or local-name()="h2" or local-name()="h3" or local-name()="h4"]/a[@href]'
-        for a in div.xpath(href_xpath):
-            title = self.tag_to_string(a)
-            article = a.xpath('ancestor::article')[0]
-            desc = ''
-            s = article.xpath('descendant::div[@class="summary"]')
-            if s:
-                desc = self.tag_to_string(s[0])
-            sec = article.xpath('descendant::div[@class="category"]')
-            if sec:
-                sec = self.tag_to_string(sec[0])
-            else:
-                sec = 'Articles'
-            sections[sec].append(
-                {'title': title, 'url': href_to_url(a), 'description': desc})
-            self.log(title, href_to_url(a))
-            if desc:
-                self.log('\t' + desc)
-            self.log('')
-
-    def print_version(self, url):
-        return url + '?piano_d=1'
+        raw = self.index_to_soup('https://d.newsweek.com/json/' + widget, raw=True)
+        data = json.loads(raw)['items']
+        for item in data:
+            title = item['title']
+            url = BASE + item['link']
+            self.log(title, url)
+            sections[item['label']].append(
+                {
+                    'title': title,
+                    'url': url,
+                    'description': item['description'],
+                })
 
     def preprocess_html(self, soup):
         # Parallax images in the articles are loaded as background images