From c0f1f6670d41fab15c45dcc0d2e6f4b4cba6a3e8 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Sat, 2 Mar 2024 09:37:54 +0530 Subject: [PATCH 1/2] Update scientific_american.recipe --- recipes/scientific_american.recipe | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/recipes/scientific_american.recipe b/recipes/scientific_american.recipe index cdec0e3d76..ab7488e9e9 100644 --- a/recipes/scientific_american.recipe +++ b/recipes/scientific_american.recipe @@ -67,22 +67,24 @@ class ScientificAmerican(BasicNewsRecipe): if not curr_issue_link: self.abort_recipe_processing("Unable to find issue link") issue_url = 'https://www.scientificamerican.com' + curr_issue_link.a["href"] + # for past editions https://www.scientificamerican.com/archive/issues/ + # issue_url = 'https://www.scientificamerican.com/issue/sa/2024/01-01/' soup = self.index_to_soup(issue_url) - script = soup.find("script", id="__NEXT_DATA__") + script = soup.find("script", id="__DATA__") if not script: self.abort_recipe_processing("Unable to find script") + JSON = script.contents[0].split('JSON.parse(`')[1].replace("\\\\", "\\") + data = json.JSONDecoder().raw_decode(JSON)[0] issue_info = ( - json.loads(script.contents[0]) - .get("props", {}) - .get("pageProps", {}) - .get("issue", {}) + data + .get("initialData", {}) + .get("issueData", {}) ) if not issue_info: self.abort_recipe_processing("Unable to find issue info") - image_id, ext = splitext(issue_info["image"]) - self.cover_url = f"https://static.scientificamerican.com/sciam/cache/file/{image_id}_source{ext}?w=800" + self.cover_url = issue_info["image_url"] + "?w=800" edition_date = datetime.strptime(issue_info["issue_date"], "%Y-%m-%d") self.timefmt = f" [{edition_date:%B %Y}]" From d1f8cd255aa6926e497f2064c4f402ce356562bd Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Sat, 2 Mar 2024 09:46:48 +0530 Subject: [PATCH 2/2] NY Review of Books --- recipes/new_york_review_of_books.recipe | 7 ++++++- recipes/new_york_review_of_books_no_sub.recipe | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/recipes/new_york_review_of_books.recipe b/recipes/new_york_review_of_books.recipe index 5d368ee52c..48bca9d3d7 100644 --- a/recipes/new_york_review_of_books.recipe +++ b/recipes/new_york_review_of_books.recipe @@ -62,7 +62,7 @@ class NewYorkReviewOfBooks(BasicNewsRecipe): # Find cover cover = soup.find('img', attrs={'class':'border-light-gray'}) if cover is not None: - self.cover_url = absurl(cover['src']) + self.cover_url = absurl(cover['data-lazy-src']) self.log('Found cover at:', self.cover_url) # Find date @@ -91,3 +91,8 @@ class NewYorkReviewOfBooks(BasicNewsRecipe): 'description': desc}) return [('Current Issue', articles)] + + def preprocess_html(self, soup): + for img in soup.findAll('img', attrs={'data-lazy-src':True}): + img['src'] = img['data-lazy-src'] + return soup diff --git a/recipes/new_york_review_of_books_no_sub.recipe b/recipes/new_york_review_of_books_no_sub.recipe index 29de2aebec..d179aca304 100644 --- a/recipes/new_york_review_of_books_no_sub.recipe +++ b/recipes/new_york_review_of_books_no_sub.recipe @@ -52,7 +52,7 @@ class NewYorkReviewOfBooks(BasicNewsRecipe): # Find cover cover = soup.find('img', attrs={'class':'border-light-gray'}) if cover is not None: - self.cover_url = absurl(cover['src']) + self.cover_url = absurl(cover['data-lazy-src']) self.log('Found cover at:', self.cover_url) # Find date @@ -81,3 +81,8 @@ class NewYorkReviewOfBooks(BasicNewsRecipe): 'description': desc}) return [('Current Issue', articles)] + + def preprocess_html(self, soup): + for img in soup.findAll('img', attrs={'data-lazy-src':True}): + img['src'] = img['data-lazy-src'] + return soup