Update The New York Times

2025-07-09 03:04:10 -04:00 · 2022-06-10 10:23:48 +05:30 · 2022-06-10 10:23:48 +05:30 · fdbf44e3bd
commit fdbf44e3bd
parent 8b1ae42869
2 changed files with 39 additions and 23 deletions
--- a/recipes/nytimes.recipe
+++ b/recipes/nytimes.recipe
@ -98,7 +98,7 @@ class NewYorkTimes(BasicNewsRecipe):
        dict(href='#site-content #site-index'.split()),
        dict(attrs={'aria-hidden':'true'}),
        dict(attrs={'data-videoid':True}),
-        dict(name='button meta link'.split()),
+        dict(name='button meta link time source'.split()),
        dict(id=lambda x: x and x.startswith('story-ad-')),
        dict(name='head'),
        dict(role='toolbar'),
@ -113,6 +113,9 @@ class NewYorkTimes(BasicNewsRecipe):

    def preprocess_html(self, soup):
        article = soup.find(id='story')
+        if article is None:
+            keep_only_tags = [dict(attrs={'aria-label': 'Main content'})]
+        else:
            # The NYT is apparently A/B testing a new page layout
            has_supplemental = article is not None and article.find(**classes('story-body-supplemental')) is not None
            if has_supplemental:
@ -153,6 +156,11 @@ class NewYorkTimes(BasicNewsRecipe):
                if span is not None and self.tag_to_string(span).strip().lower() == 'image':
                    span.name = 'img'
                    span['src'] = div['itemid']
+
+        # Remove live storline menu
+        for span in soup.findAll(attrs={'data-storyline-module-name': 'menu'}):
+            span.parent.extract()
+
        return soup

    def read_todays_paper(self):
--- a/recipes/nytimes_sub.recipe
+++ b/recipes/nytimes_sub.recipe
@ -98,7 +98,7 @@ class NewYorkTimes(BasicNewsRecipe):
        dict(href='#site-content #site-index'.split()),
        dict(attrs={'aria-hidden':'true'}),
        dict(attrs={'data-videoid':True}),
-        dict(name='button meta link'.split()),
+        dict(name='button meta link time source'.split()),
        dict(id=lambda x: x and x.startswith('story-ad-')),
        dict(name='head'),
        dict(role='toolbar'),
@ -113,6 +113,9 @@ class NewYorkTimes(BasicNewsRecipe):

    def preprocess_html(self, soup):
        article = soup.find(id='story')
+        if article is None:
+            keep_only_tags = [dict(attrs={'aria-label': 'Main content'})]
+        else:
            # The NYT is apparently A/B testing a new page layout
            has_supplemental = article is not None and article.find(**classes('story-body-supplemental')) is not None
            if has_supplemental:
@ -153,6 +156,11 @@ class NewYorkTimes(BasicNewsRecipe):
                if span is not None and self.tag_to_string(span).strip().lower() == 'image':
                    span.name = 'img'
                    span['src'] = div['itemid']
+
+        # Remove live storline menu
+        for span in soup.findAll(attrs={'data-storyline-module-name': 'menu'}):
+            span.parent.extract()
+
        return soup

    def read_todays_paper(self):
@ -310,7 +318,7 @@ class NewYorkTimes(BasicNewsRecipe):

    def parse_index(self):
        # return [('All articles', [
-        #     {'title': 'XXXXX', 'url': 'https://www.nytimes.com/2018/05/31/theater/best-25-plays-how-we-made-the-list.html'},
+        #     {'title': 'XXXXX', 'url': 'https://www.nytimes.com/2020/11/27/world/americas/coronavirus-migrants-venezuela.html'},
        # ])]
        if is_web_edition:
            return self.parse_web_sections()