Merge branch 'master' of https://github.com/unkn0w7n/calibre

2025-07-09 03:04:10 -04:00 · 2024-03-02 09:51:18 +05:30 · 2024-03-02 09:51:18 +05:30 · a874ee037d
commit a874ee037d
parent f8abcadf2f d1f8cd255a
3 changed files with 21 additions and 9 deletions
--- a/recipes/new_york_review_of_books.recipe
+++ b/recipes/new_york_review_of_books.recipe
@ -62,7 +62,7 @@ class NewYorkReviewOfBooks(BasicNewsRecipe):
        # Find cover
        cover = soup.find('img', attrs={'class':'border-light-gray'})
        if cover is not None:
-            self.cover_url = absurl(cover['src'])
+            self.cover_url = absurl(cover['data-lazy-src'])
            self.log('Found cover at:', self.cover_url)

        # Find date
@ -91,3 +91,8 @@ class NewYorkReviewOfBooks(BasicNewsRecipe):
                                'description': desc})

        return [('Current Issue', articles)]
+
+    def preprocess_html(self, soup):
+        for img in soup.findAll('img', attrs={'data-lazy-src':True}):
+            img['src'] = img['data-lazy-src']
+        return soup
--- a/recipes/new_york_review_of_books_no_sub.recipe
+++ b/recipes/new_york_review_of_books_no_sub.recipe
@ -52,7 +52,7 @@ class NewYorkReviewOfBooks(BasicNewsRecipe):
        # Find cover
        cover = soup.find('img', attrs={'class':'border-light-gray'})
        if cover is not None:
-            self.cover_url = absurl(cover['src'])
+            self.cover_url = absurl(cover['data-lazy-src'])
            self.log('Found cover at:', self.cover_url)

        # Find date
@ -81,3 +81,8 @@ class NewYorkReviewOfBooks(BasicNewsRecipe):
                                'description': desc})

        return [('Current Issue', articles)]
+
+    def preprocess_html(self, soup):
+        for img in soup.findAll('img', attrs={'data-lazy-src':True}):
+            img['src'] = img['data-lazy-src']
+        return soup
--- a/recipes/scientific_american.recipe
+++ b/recipes/scientific_american.recipe
@ -67,22 +67,24 @@ class ScientificAmerican(BasicNewsRecipe):
        if not curr_issue_link:
            self.abort_recipe_processing("Unable to find issue link")
        issue_url = 'https://www.scientificamerican.com' + curr_issue_link.a["href"]
+        # for past editions https://www.scientificamerican.com/archive/issues/
+        # issue_url = 'https://www.scientificamerican.com/issue/sa/2024/01-01/'
        soup = self.index_to_soup(issue_url)
-        script = soup.find("script", id="__NEXT_DATA__")
+        script = soup.find("script", id="__DATA__")
        if not script:
            self.abort_recipe_processing("Unable to find script")

+        JSON = script.contents[0].split('JSON.parse(`')[1].replace("\\\\", "\\")
+        data = json.JSONDecoder().raw_decode(JSON)[0]
        issue_info = (
-            json.loads(script.contents[0])
-            .get("props", {})
-            .get("pageProps", {})
-            .get("issue", {})
+            data
+            .get("initialData", {})
+            .get("issueData", {})
        )
        if not issue_info:
            self.abort_recipe_processing("Unable to find issue info")

-        image_id, ext = splitext(issue_info["image"])
-        self.cover_url = f"https://static.scientificamerican.com/sciam/cache/file/{image_id}_source{ext}?w=800"
+        self.cover_url = issue_info["image_url"] + "?w=800"

        edition_date = datetime.strptime(issue_info["issue_date"], "%Y-%m-%d")
        self.timefmt = f" [{edition_date:%B %Y}]"