From 9f3fa2a62d72584a233890c36cc317a50e52e909 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 12 May 2023 07:55:31 +0530 Subject: [PATCH] Update Economist for website changes Fixes #2019248 [error with Economist recipe](https://bugs.launchpad.net/calibre/+bug/2019248) --- recipes/economist.recipe | 8 +++++--- recipes/economist_free.recipe | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/recipes/economist.recipe b/recipes/economist.recipe index 977a94c143..0b0bcc294f 100644 --- a/recipes/economist.recipe +++ b/recipes/economist.recipe @@ -269,6 +269,7 @@ class Economist(BasicNewsRecipe): self.timefmt = ' [' + edition_date + ']' else: url = 'https://www.economist.com/printedition' + # raw = open('/t/raw.html').read() raw = self.index_to_soup(url, raw=True) # with open('/t/raw.html', 'wb') as f: # f.write(raw) @@ -293,18 +294,19 @@ class Economist(BasicNewsRecipe): script_tag = soup.find("script", id="__NEXT_DATA__") if script_tag is not None: data = json.loads(script_tag.string) + # open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True)) self.cover_url = safe_dict(data, "props", "pageProps", "content", "image", "main", "url", "canonical") self.log('Got cover:', self.cover_url) feeds_dict = defaultdict(list) for part in safe_dict(data, "props", "pageProps", "content", "hasPart", "parts"): section = safe_dict(part, "print", "section", "headline") or '' - title = safe_dict(part, "print", "headline") or '' + title = safe_dict(part, "headline") or '' url = safe_dict(part, "url", "canonical") or '' if not section or not title or not url: continue - desc = safe_dict(part, "print", "description") or '' - sub = safe_dict(part, "print", "subheadline") or '' + desc = safe_dict(part, "description") or '' + sub = safe_dict(part, "subheadline") or '' if sub and section != sub: desc = sub + ' :: ' + desc if '/interactive/' in url: diff --git a/recipes/economist_free.recipe b/recipes/economist_free.recipe index 977a94c143..0b0bcc294f 100644 --- a/recipes/economist_free.recipe +++ b/recipes/economist_free.recipe @@ -269,6 +269,7 @@ class Economist(BasicNewsRecipe): self.timefmt = ' [' + edition_date + ']' else: url = 'https://www.economist.com/printedition' + # raw = open('/t/raw.html').read() raw = self.index_to_soup(url, raw=True) # with open('/t/raw.html', 'wb') as f: # f.write(raw) @@ -293,18 +294,19 @@ class Economist(BasicNewsRecipe): script_tag = soup.find("script", id="__NEXT_DATA__") if script_tag is not None: data = json.loads(script_tag.string) + # open('/t/raw.json', 'w').write(json.dumps(data, indent=2, sort_keys=True)) self.cover_url = safe_dict(data, "props", "pageProps", "content", "image", "main", "url", "canonical") self.log('Got cover:', self.cover_url) feeds_dict = defaultdict(list) for part in safe_dict(data, "props", "pageProps", "content", "hasPart", "parts"): section = safe_dict(part, "print", "section", "headline") or '' - title = safe_dict(part, "print", "headline") or '' + title = safe_dict(part, "headline") or '' url = safe_dict(part, "url", "canonical") or '' if not section or not title or not url: continue - desc = safe_dict(part, "print", "description") or '' - sub = safe_dict(part, "print", "subheadline") or '' + desc = safe_dict(part, "description") or '' + sub = safe_dict(part, "subheadline") or '' if sub and section != sub: desc = sub + ' :: ' + desc if '/interactive/' in url: