From ecb302fa0498d7918941f881f2330233a3582282 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 16 Jan 2019 19:29:06 +0530
Subject: [PATCH] Update New York Times Book Review

---
 recipes/nytimesbook.recipe | 56 ++++++++++++++++----------------------
 1 file changed, 24 insertions(+), 32 deletions(-)

diff --git a/recipes/nytimesbook.recipe b/recipes/nytimesbook.recipe
index 2112226b31..afe5fe4e85 100644
--- a/recipes/nytimesbook.recipe
+++ b/recipes/nytimesbook.recipe
@@ -6,6 +6,12 @@ def classes(classes):
     return dict(attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)})
 
 
+def absolutize(url):
+    if url.startswith('/'):
+        url = 'https://www.nytimes.com' + url
+    return url
+
+
 class NewYorkTimesBookReview(BasicNewsRecipe):
     title = u'New York Times Book Review'
     language = 'en'
@@ -18,42 +24,27 @@ class NewYorkTimesBookReview(BasicNewsRecipe):
     encoding = 'utf-8'
 
     keep_only_tags = [
-        dict(id='story'),
-    ]
-    remove_tags = [
-        dict(attrs={'aria-label':'tools'.split()}),
-        dict(attrs={'aria-label': lambda x: x and 'New York Times Logo' in x}),
-        dict(href='#site-content #site-index'.split()),
-        dict(attrs={'aria-hidden':'true'}),
-        dict(attrs={'data-videoid':True}),
-        dict(name='button meta link'.split()),
-        dict(id=lambda x: x and x.startswith('story-ad-')),
-        dict(name='head'),
-        dict(role='toolbar'),
-        dict(name='a', href=lambda x: x and '#story-continues-' in x),
-        dict(name='a', href=lambda x: x and '#whats-next' in x),
-        dict(id=lambda x: x and 'sharetools-' in x),
-        dict(id='newsletter-promo supported-by-ad bottom-wrapper'.split()),
-        classes('story-print-citation supported-by accessibility-ad-header visually-hidden bottom-of-article ad nav-wrapper'),
-        dict(attrs={'class': lambda x: x and (
-            'SectionBar' in x or 'recirculation' in x or 'ResponsiveAd' in x or 'accessibility-visuallyHidden' in x or 'RelatedCoverage' in x)}),
+            dict(name='h1'),
+            dict(attrs={'data-testid':'photoviewer-wrapper'}),
+            dict(itemprop=['author creator', 'articleBody']),
     ]
 
     def parse_index(self):
         soup = self.index_to_soup(
-            'http://www.nytimes.com/pages/books/review/index.html')
+            'https://www.nytimes.com/pages/books/review/index.html')
 
         # Find TOC
-        toc = soup.find('div', attrs={'class': 'rank'})
+        toc = soup.find('section', id='collection-book-review').find('section').find('ol')
         main_articles, articles = [], []
         feeds = [('Features', main_articles), ('Latest', articles)]
-        for h2 in toc.findAll('h2', attrs={'class': 'headline'}):
+        for li in toc.findAll('li'):
+            h2 = li.find('h2')
             a = h2.find('a', href=True)
             if a is not None:
                 title = self.tag_to_string(a)
-                url = a['href']
+                url = absolutize(a['href'])
                 desc = ''
-                p = h2.findNextSibling('p', attrs={'class': 'summary'})
+                p = h2.findNextSibling('p')
                 if p:
                     desc = self.tag_to_string(p)
                 main_articles.append(
@@ -61,14 +52,15 @@ class NewYorkTimesBookReview(BasicNewsRecipe):
                 self.log('Found:', title, 'at', url)
                 if desc:
                     self.log('\t', desc)
-        for li in soup.find(id='latest-panel').find('ol').findAll('li'):
-            a = li.find('a', attrs={'class': 'story-link'}, href=True)
-            if a is None:
-                continue
-            url = a['href']
-            m = a.find(attrs={'class': 'story-meta'})
-            title = self.tag_to_string(m.find('h2'))
-            desc = self.tag_to_string(m.find(attrs={'class': 'summary'}))
+        for li in soup.find(id='stream-panel').find('ol').findAll('li'):
+            h2 = li.find('h2')
+            a = h2.findParent('a')
+            url = absolutize(a['href'])
+            p = h2.findNextSibling('p')
+            title = self.tag_to_string(h2)
+            desc = ''
+            if p:
+                desc = self.tag_to_string(p)
             articles.append({'title': title, 'url': url, 'description': desc})
             self.log('Found:', title, 'at', url)
             if desc: