From 3ebc50d03a24bdb2b15ea6f2462433b453024e31 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Sun, 16 Mar 2025 11:39:07 +0530 Subject: [PATCH] Update economist --- recipes/economist.recipe | 25 +++++++++++++++++++++---- recipes/economist_free.recipe | 25 +++++++++++++++++++++---- recipes/hindufeeds.recipe | 5 ++--- 3 files changed, 44 insertions(+), 11 deletions(-) diff --git a/recipes/economist.recipe b/recipes/economist.recipe index 0648e9e228..a22fe21969 100644 --- a/recipes/economist.recipe +++ b/recipes/economist.recipe @@ -85,6 +85,14 @@ def load_article_from_json(raw, root): for node in data.get('text') or (): process_node(node, article) +def process_web_list(li_node): + li_html = '' + for li in li_node['items']: + if li.get('textHtml'): + li_html += f'
  • {li.get("textHtml")}
  • ' + else: + li_html += f'
  • {li.get("text", "")}
  • ' + return li_html def process_web_node(node): ntype = node.get('type', '') @@ -92,7 +100,7 @@ def process_web_node(node): if node.get('textHtml'): return f'

    {node.get("textHtml")}

    ' return f'

    {node.get("text", "")}

    ' - elif ntype == 'PARAGRAPH': + elif ntype in ['PARAGRAPH', 'BOOK_INFO']: if node.get('textHtml'): return f'

    {node.get("textHtml")}

    ' return f'

    {node.get("text", "")}

    ' @@ -109,9 +117,15 @@ def process_web_node(node): return f'
    {node.get("text", "")}
    ' elif ntype == 'DIVIDER': return '
    ' + elif ntype == 'INFOGRAPHIC': + if node.get('fallback'): + return process_web_node(node['fallback']) elif ntype == 'INFOBOX': for x in safe_dict(node, 'components'): return f'
    {process_web_node(x)}
    ' + elif ntype == 'UNORDERED_LIST': + if node.get('items'): + return process_web_list(node) elif ntype: print('** ', ntype) return '' @@ -120,7 +134,10 @@ def process_web_node(node): def load_article_from_web_json(raw): # open('/t/raw.json', 'w').write(raw) body = '' - data = json.loads(raw)['props']['pageProps']['content'] + try: + data = json.loads(raw)['props']['pageProps']['cp2Content'] + except Exception: + data = json.loads(raw)['props']['pageProps']['content'] body += f'
    {data.get("flyTitle", "")}
    ' body += f'

    {data["headline"]}

    ' if data.get('rubric') and data.get('rubric') is not None: @@ -182,7 +199,7 @@ def process_url(url): class Economist(BasicNewsRecipe): title = 'The Economist' - language = 'en' + language = 'en_GB' encoding = 'utf-8' masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png' @@ -274,7 +291,7 @@ class Economist(BasicNewsRecipe): def economist_test_article(self): return [('Articles', [{'title':'test', - 'url':'https://www.economist.com/the-americas/2024/04/14/elon-musk-is-feuding-with-brazils-powerful-supreme-court' + 'url':'https://www.economist.com/leaders/2025/03/13/americas-bullied-allies-need-to-toughen-up' }])] def economist_return_index(self, ans): diff --git a/recipes/economist_free.recipe b/recipes/economist_free.recipe index 0648e9e228..a22fe21969 100644 --- a/recipes/economist_free.recipe +++ b/recipes/economist_free.recipe @@ -85,6 +85,14 @@ def load_article_from_json(raw, root): for node in data.get('text') or (): process_node(node, article) +def process_web_list(li_node): + li_html = '' + for li in li_node['items']: + if li.get('textHtml'): + li_html += f'
  • {li.get("textHtml")}
  • ' + else: + li_html += f'
  • {li.get("text", "")}
  • ' + return li_html def process_web_node(node): ntype = node.get('type', '') @@ -92,7 +100,7 @@ def process_web_node(node): if node.get('textHtml'): return f'

    {node.get("textHtml")}

    ' return f'

    {node.get("text", "")}

    ' - elif ntype == 'PARAGRAPH': + elif ntype in ['PARAGRAPH', 'BOOK_INFO']: if node.get('textHtml'): return f'

    {node.get("textHtml")}

    ' return f'

    {node.get("text", "")}

    ' @@ -109,9 +117,15 @@ def process_web_node(node): return f'
    {node.get("text", "")}
    ' elif ntype == 'DIVIDER': return '
    ' + elif ntype == 'INFOGRAPHIC': + if node.get('fallback'): + return process_web_node(node['fallback']) elif ntype == 'INFOBOX': for x in safe_dict(node, 'components'): return f'
    {process_web_node(x)}
    ' + elif ntype == 'UNORDERED_LIST': + if node.get('items'): + return process_web_list(node) elif ntype: print('** ', ntype) return '' @@ -120,7 +134,10 @@ def process_web_node(node): def load_article_from_web_json(raw): # open('/t/raw.json', 'w').write(raw) body = '' - data = json.loads(raw)['props']['pageProps']['content'] + try: + data = json.loads(raw)['props']['pageProps']['cp2Content'] + except Exception: + data = json.loads(raw)['props']['pageProps']['content'] body += f'
    {data.get("flyTitle", "")}
    ' body += f'

    {data["headline"]}

    ' if data.get('rubric') and data.get('rubric') is not None: @@ -182,7 +199,7 @@ def process_url(url): class Economist(BasicNewsRecipe): title = 'The Economist' - language = 'en' + language = 'en_GB' encoding = 'utf-8' masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png' @@ -274,7 +291,7 @@ class Economist(BasicNewsRecipe): def economist_test_article(self): return [('Articles', [{'title':'test', - 'url':'https://www.economist.com/the-americas/2024/04/14/elon-musk-is-feuding-with-brazils-powerful-supreme-court' + 'url':'https://www.economist.com/leaders/2025/03/13/americas-bullied-allies-need-to-toughen-up' }])] def economist_return_index(self, ans): diff --git a/recipes/hindufeeds.recipe b/recipes/hindufeeds.recipe index 73148c0ea9..1fc7e76cb0 100644 --- a/recipes/hindufeeds.recipe +++ b/recipes/hindufeeds.recipe @@ -21,7 +21,6 @@ class TheHindufeeds(BasicNewsRecipe): .author, .dateLine, .publish-time {font-size:small; font-weight:bold;} .subhead, .subhead_lead, .bold {font-weight:bold;} .update-publish-time, .publish-time-new {font-size:small; } - img {display:block; margin:0 auto;} .italic {font-style:italic; color:#202020;} ''' @@ -55,7 +54,7 @@ class TheHindufeeds(BasicNewsRecipe): def preprocess_html(self, soup): for cap in soup.findAll('p', attrs={'class': 'caption'}): - cap.name = 'figcaption' + cap.name = 'div' for img in soup.findAll('img', attrs={'data-original': True}): if img['data-original'].endswith('1x1_spacer.png'): source = img.findPrevious('source', srcset=True) @@ -91,7 +90,7 @@ class TheHindufeeds(BasicNewsRecipe): ('Business', 'https://www.thehindu.com/business/feeder/default.rss'), ('World', 'https://www.thehindu.com/news/international/feeder/default.rss'), # ('Sport', 'https://www.thehindu.com/sport/feeder/default.rss'), - ('Entertainment', 'https://www.thehindu.com/entertainment/feeder/default.rss'), + # ('Entertainment', 'https://www.thehindu.com/entertainment/feeder/default.rss'), # ('Crossword', 'https://crossword.thehindu.com/?utm_source=thehindu&utm_medium=mainmenufeeder/default.rss'), ('Science', 'https://www.thehindu.com/sci-tech/science/feeder/default.rss'), ('Life and Style', 'https://www.thehindu.com/life-and-style/feeder/default.rss'),