From 89bc2a94322de94e3787ae13babcbcc681b4b2ef Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Tue, 1 Jul 2025 20:46:36 +0530 Subject: [PATCH] Update economist_news.recipe --- recipes/economist_news.recipe | 39 ++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/recipes/economist_news.recipe b/recipes/economist_news.recipe index b33f9f5bb6..3fcb347c37 100644 --- a/recipes/economist_news.recipe +++ b/recipes/economist_news.recipe @@ -39,6 +39,33 @@ def process_info_box(bx): return info +def parse_txt(ty): + typ = ty.get('type', '') + children = ty.get('children', []) + attr = ty.get('attributes', [{}])[0].get('value', '#') + + tag_map = { + 'text': lambda: [ty.get('value', '')], + 'scaps': lambda: [f'{"".join(parse_txt(c))}' for c in children], + 'bold': lambda: [f'{"".join(parse_txt(c))}' for c in children], + 'drop_caps': lambda: [f'{"".join(parse_txt(c))}' for c in children], + 'italic': lambda: [f'{"".join(parse_txt(c))}' for c in children], + 'linebreak': lambda: ['
'], + 'external_link': lambda: [f'{"".join(parse_txt(children[0]))}'] if children else [], + 'internal_link': lambda: [f'{"".join(parse_txt(children[0]))}'] if children else [], + 'ufinish': lambda: [text for c in children for text in parse_txt(c)] + } + + if typ in tag_map: + yield from tag_map[typ]() + else: + print('** ', typ) + + +def parse_textjson(nt): + return ''.join(''.join(parse_txt(n)) for n in nt) + + def process_web_node(node): ntype = node.get('type', '') if ntype == 'CROSSHEAD': @@ -48,6 +75,8 @@ def process_web_node(node): elif ntype in ['PARAGRAPH', 'BOOK_INFO']: if node.get('textHtml'): return f'

{node.get("textHtml")}

' + elif node.get('textJson'): + return f'

{parse_textjson(node["textJson"])}

' return f'

{node.get("text", "")}

' elif ntype == 'IMAGE': alt = '' if node.get('altText') is None else node.get('altText') @@ -59,6 +88,8 @@ def process_web_node(node): elif ntype == 'PULL_QUOTE': if node.get('textHtml'): return f'
{node.get("textHtml")}
' + elif node.get('textJson'): + return f'
{parse_textjson(node["textJson"])}
' return f'
{node.get("text", "")}
' elif ntype == 'DIVIDER': return '
' @@ -167,7 +198,7 @@ class EconomistNews(BasicNewsRecipe): encoding = 'utf-8' masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png' - __author__ = 'Kovid Goyal' + __author__ = 'unkn0wn' description = ( 'Global news and current affairs from a European' ' perspective. Get the latest articles here.' @@ -240,6 +271,11 @@ class EconomistNews(BasicNewsRecipe): br = BasicNewsRecipe.get_browser(self, *args, **kwargs) return br + def economist_test_article(self): + return [('Articles', [{'title': 'test', + 'url': 'https://www.economist.com/interactive/britain/2025/06/26/how-wimbledon-gets-its-grass-so-green' + }])] + def economist_return_index(self, ans): if not ans: raise NoArticles( @@ -251,6 +287,7 @@ class EconomistNews(BasicNewsRecipe): return ans def parse_index(self): + # return self.economist_test_article() query = { 'operationName': 'FindHomepage', 'variables': '{"homepageType":"MOBILE"}',