From 26bb850d62c3a091ef95ad9d75366c171871473c Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Sun, 16 Mar 2025 11:40:07 +0530 Subject: [PATCH] ... --- recipes/1843.recipe | 20 ++++++++++++++++++-- recipes/economist_world_ahead.recipe | 25 ++++++++++++++++++++++--- 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/recipes/1843.recipe b/recipes/1843.recipe index 75c7017f09..4b7901f5cd 100644 --- a/recipes/1843.recipe +++ b/recipes/1843.recipe @@ -11,13 +11,23 @@ from calibre.ebooks.BeautifulSoup import NavigableString, Tag from calibre.web.feeds.news import BasicNewsRecipe +def process_list(li_node): + li_html = '' + for li in li_node['items']: + if li.get('textHtml'): + li_html += f'
{node.get("textHtml")}
' return f'{node.get("text", "")}
' @@ -34,9 +44,15 @@ def process_node(node): return f'{node.get("text", "")}' elif ntype == 'DIVIDER': return '
{process_node(x)}' + elif ntype == 'UNORDERED_LIST': + if node.get('items'): + return process_list(node) elif ntype: print('** ', ntype) return '' @@ -121,7 +137,7 @@ def process_url(url): class Econ1843(BasicNewsRecipe): title = 'Economist 1843' - language = 'en' + language = 'en_GB' encoding = 'utf-8' masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png' diff --git a/recipes/economist_world_ahead.recipe b/recipes/economist_world_ahead.recipe index 6d7e2336ee..3c9c39fb6e 100644 --- a/recipes/economist_world_ahead.recipe +++ b/recipes/economist_world_ahead.recipe @@ -12,13 +12,23 @@ from calibre.ebooks.BeautifulSoup import NavigableString, Tag from calibre.web.feeds.news import BasicNewsRecipe +def process_list(li_node): + li_html = '' + for li in li_node['items']: + if li.get('textHtml'): + li_html += f'
{node.get("textHtml")}
' return f'{node.get("text", "")}
' @@ -35,9 +45,15 @@ def process_node(node): return f'{node.get("text", "")}' elif ntype == 'DIVIDER': return '
{process_node(x)}' + elif ntype == 'UNORDERED_LIST': + if node.get('items'): + return process_list(node) elif ntype: print('** ', ntype) return '' @@ -57,7 +73,10 @@ class JSONHasNoContent(ValueError): def load_article_from_json(raw): # open('/t/raw.json', 'w').write(raw) body = '' - data = json.loads(raw)['props']['pageProps']['cp2Content'] + try: + data = json.loads(raw)['props']['pageProps']['cp2Content'] + except Exception: + data = json.loads(raw)['props']['pageProps']['content'] body += f'