Merge branch 'master' of https://github.com/yodha8/calibre

2025-07-09 03:04:10 -04:00 · 2022-04-25 08:15:04 +05:30 · 2022-04-25 08:15:04 +05:30 · 78e6966dae
commit 78e6966dae
parent 81b2ea24a7 57c8db0d13
1 changed files with 61 additions and 34 deletions
--- a/recipes/new_yorker.recipe
+++ b/recipes/new_yorker.recipe
@ -3,7 +3,7 @@
 # License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
 from __future__ import absolute_import, division, print_function, unicode_literals

-from collections import defaultdict
+from collections import OrderedDict

 from calibre import browser
 from calibre.ebooks.BeautifulSoup import Tag
@ -31,8 +31,8 @@ def new_tag(soup, name, attrs=()):

 class NewYorker(BasicNewsRecipe):

-    title = 'New Yorker Magazine'
-    description = 'Content from the New Yorker website'
+    title = "The New Yorker Magazine"
+    description = "Articles of the week's New Yorker magazine"

    url_list = []
    language = 'en'
@ -69,9 +69,9 @@ class NewYorker(BasicNewsRecipe):
        return soup

    def parse_index(self):
-        soup = self.index_to_soup(
-            'https://www.newyorker.com/magazine?intcid=magazine')
-        # soup = self.index_to_soup('file:///t/raw.html')
+
+        # Get cover
+
        cover_soup = self.index_to_soup('https://www.newyorker.com/archive')
        cover_img = cover_soup.find(
            attrs={'class': lambda x: x and 'MagazineSection__cover___' in x})
@ -86,36 +86,63 @@ class NewYorker(BasicNewsRecipe):
                    self.cover_url = self.cover_url.replace(old_width, "w_560")
                except Exception:
                    self.log('Failed enlarging cover img, using the original one')
-
                self.log('Found cover:', self.cover_url)
-        stories = defaultdict(list)
-        last_section = 'Unknown'
-        for story in soup.findAll(
-            attrs={'class': lambda x: x and 'River__riverItemContent___' in x}):
-            try:
-                section = self.tag_to_string(
-                    story.find('a')['title']) or last_section
-            except KeyError:
-                section = last_section
-            last_section = section
+
+        # Get content
+
+        soup = self.index_to_soup(
+            'https://www.newyorker.com/magazine?intcid=magazine')
+        stories = OrderedDict()  # So we can list sections in order
+
+        # Iterate sections of content
+
+        for section_soup in soup.findAll(
+            attrs={'class': lambda x: x and 'MagazinePageSection__section___21cc7' in x}):
+            section = section_soup.find('h2').text
+            self.log("Found section:", section)
+
+            # Iterate stories in section
+
+            is_mail_section = (section == "Mail")
+
+            if is_mail_section:
+                cname = "Link__link___"
+            else:
+                cname = "River__riverItemContent___"
+
+            for story in section_soup.findAll(
+                attrs={'class': lambda x: x and cname in x}):
+
+                title = ""
+                url = ""
+                desc = ""
+
+                if is_mail_section:
+                    title = story.text
+                    url = absurl(story['href'])
+                else:
                    h4 = story.find('h4')
                    title = self.tag_to_string(h4)
                    a = story.find('h4').parent
                    url = absurl(a['href'])
-            desc = ''
+                    # Get description
                    body = story.find(attrs={'class': 'River__dek___CayIg'})
                    if body is not None:
                        desc = body.contents[0]
+
                self.log('Found article:', title)
                self.log('\t' + url)
                self.log('\t' + desc)
                self.log('')
+
+                if section not in stories:
+                    stories[section] = []
                stories[section].append({
                    'title': title,
                    'url': url,
                    'description': desc})

-        return [(k, stories[k]) for k in sorted(stories)]
+        return [(k, stories[k]) for k, v in stories.items()]

    # The New Yorker changes the content it delivers based on cookies, so the
    # following ensures that we send no cookies