From 3ebc50d03a24bdb2b15ea6f2462433b453024e31 Mon Sep 17 00:00:00 2001
From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com>
Date: Sun, 16 Mar 2025 11:39:07 +0530
Subject: [PATCH] Update economist

---
 recipes/economist.recipe      | 25 +++++++++++++++++++++----
 recipes/economist_free.recipe | 25 +++++++++++++++++++++----
 recipes/hindufeeds.recipe     |  5 ++---
 3 files changed, 44 insertions(+), 11 deletions(-)
diff --git a/recipes/economist.recipe b/recipes/economist.recipe
index 0648e9e228..a22fe21969 100644
--- a/recipes/economist.recipe
+++ b/recipes/economist.recipe
@@ -85,6 +85,14 @@ def load_article_from_json(raw, root):
     for node in data.get('text') or ():
         process_node(node, article)
 
+def process_web_list(li_node):
+    li_html = ''
+    for li in li_node['items']:
+        if li.get('textHtml'):
+            li_html += f'<li>{li.get("textHtml")}</li>'
+        else:
+            li_html +=  f'<li>{li.get("text", "")}</li>'
+    return li_html
 
 def process_web_node(node):
     ntype = node.get('type', '')
@@ -92,7 +100,7 @@ def process_web_node(node):
         if node.get('textHtml'):
             return f'<h4>{node.get("textHtml")}</h4>'
         return f'<h4>{node.get("text", "")}</h4>'
-    elif ntype == 'PARAGRAPH':
+    elif ntype in ['PARAGRAPH', 'BOOK_INFO']:
         if node.get('textHtml'):
             return f'<p>{node.get("textHtml")}</p>'
         return f'<p>{node.get("text", "")}</p>'
@@ -109,9 +117,15 @@ def process_web_node(node):
         return f'<blockquote>{node.get("text", "")}</blockquote>'
     elif ntype == 'DIVIDER':
         return '<hr>'
+    elif ntype == 'INFOGRAPHIC':
+        if node.get('fallback'):
+            return process_web_node(node['fallback'])
     elif ntype == 'INFOBOX':
         for x in safe_dict(node, 'components'):
             return f'<blockquote>{process_web_node(x)}</blockquote>'
+    elif ntype == 'UNORDERED_LIST':
+        if node.get('items'):
+            return process_web_list(node)
     elif ntype:
         print('** ', ntype)
         return ''
@@ -120,7 +134,10 @@ def process_web_node(node):
 def load_article_from_web_json(raw):
     # open('/t/raw.json', 'w').write(raw)
     body = ''
-    data = json.loads(raw)['props']['pageProps']['content']
+    try:
+        data = json.loads(raw)['props']['pageProps']['cp2Content']
+    except Exception:
+        data = json.loads(raw)['props']['pageProps']['content']
     body += f'<div style="color: red; font-size:small; font-weight:bold;">{data.get("flyTitle", "")}</div>'
     body += f'<h1>{data["headline"]}</h1>'
     if data.get('rubric') and data.get('rubric') is not None:
@@ -182,7 +199,7 @@ def process_url(url):
 
 class Economist(BasicNewsRecipe):
     title = 'The Economist'
-    language = 'en'
+    language = 'en_GB'
     encoding = 'utf-8'
     masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
 
@@ -274,7 +291,7 @@ class Economist(BasicNewsRecipe):
 
     def economist_test_article(self):
         return [('Articles', [{'title':'test',
-            'url':'https://www.economist.com/the-americas/2024/04/14/elon-musk-is-feuding-with-brazils-powerful-supreme-court'
+            'url':'https://www.economist.com/leaders/2025/03/13/americas-bullied-allies-need-to-toughen-up'
         }])]
 
     def economist_return_index(self, ans):
diff --git a/recipes/economist_free.recipe b/recipes/economist_free.recipe
index 0648e9e228..a22fe21969 100644
--- a/recipes/economist_free.recipe
+++ b/recipes/economist_free.recipe
@@ -85,6 +85,14 @@ def load_article_from_json(raw, root):
     for node in data.get('text') or ():
         process_node(node, article)
 
+def process_web_list(li_node):
+    li_html = ''
+    for li in li_node['items']:
+        if li.get('textHtml'):
+            li_html += f'<li>{li.get("textHtml")}</li>'
+        else:
+            li_html +=  f'<li>{li.get("text", "")}</li>'
+    return li_html
 
 def process_web_node(node):
     ntype = node.get('type', '')
@@ -92,7 +100,7 @@ def process_web_node(node):
         if node.get('textHtml'):
             return f'<h4>{node.get("textHtml")}</h4>'
         return f'<h4>{node.get("text", "")}</h4>'
-    elif ntype == 'PARAGRAPH':
+    elif ntype in ['PARAGRAPH', 'BOOK_INFO']:
         if node.get('textHtml'):
             return f'<p>{node.get("textHtml")}</p>'
         return f'<p>{node.get("text", "")}</p>'
@@ -109,9 +117,15 @@ def process_web_node(node):
         return f'<blockquote>{node.get("text", "")}</blockquote>'
     elif ntype == 'DIVIDER':
         return '<hr>'
+    elif ntype == 'INFOGRAPHIC':
+        if node.get('fallback'):
+            return process_web_node(node['fallback'])
     elif ntype == 'INFOBOX':
         for x in safe_dict(node, 'components'):
             return f'<blockquote>{process_web_node(x)}</blockquote>'
+    elif ntype == 'UNORDERED_LIST':
+        if node.get('items'):
+            return process_web_list(node)
     elif ntype:
         print('** ', ntype)
         return ''
@@ -120,7 +134,10 @@ def process_web_node(node):
 def load_article_from_web_json(raw):
     # open('/t/raw.json', 'w').write(raw)
     body = ''
-    data = json.loads(raw)['props']['pageProps']['content']
+    try:
+        data = json.loads(raw)['props']['pageProps']['cp2Content']
+    except Exception:
+        data = json.loads(raw)['props']['pageProps']['content']
     body += f'<div style="color: red; font-size:small; font-weight:bold;">{data.get("flyTitle", "")}</div>'
     body += f'<h1>{data["headline"]}</h1>'
     if data.get('rubric') and data.get('rubric') is not None:
@@ -182,7 +199,7 @@ def process_url(url):
 
 class Economist(BasicNewsRecipe):
     title = 'The Economist'
-    language = 'en'
+    language = 'en_GB'
     encoding = 'utf-8'
     masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
 
@@ -274,7 +291,7 @@ class Economist(BasicNewsRecipe):
 
     def economist_test_article(self):
         return [('Articles', [{'title':'test',
-            'url':'https://www.economist.com/the-americas/2024/04/14/elon-musk-is-feuding-with-brazils-powerful-supreme-court'
+            'url':'https://www.economist.com/leaders/2025/03/13/americas-bullied-allies-need-to-toughen-up'
         }])]
 
     def economist_return_index(self, ans):
diff --git a/recipes/hindufeeds.recipe b/recipes/hindufeeds.recipe
index 73148c0ea9..1fc7e76cb0 100644
--- a/recipes/hindufeeds.recipe
+++ b/recipes/hindufeeds.recipe
@@ -21,7 +21,6 @@ class TheHindufeeds(BasicNewsRecipe):
         .author, .dateLine, .publish-time {font-size:small; font-weight:bold;}
         .subhead, .subhead_lead, .bold {font-weight:bold;}
         .update-publish-time, .publish-time-new {font-size:small; }
-        img {display:block; margin:0 auto;}
         .italic {font-style:italic; color:#202020;}
     '''
 
@@ -55,7 +54,7 @@ class TheHindufeeds(BasicNewsRecipe):
 
     def preprocess_html(self, soup):
         for cap in soup.findAll('p', attrs={'class': 'caption'}):
-            cap.name = 'figcaption'
+            cap.name = 'div'
         for img in soup.findAll('img', attrs={'data-original': True}):
             if img['data-original'].endswith('1x1_spacer.png'):
                 source = img.findPrevious('source', srcset=True)
@@ -91,7 +90,7 @@ class TheHindufeeds(BasicNewsRecipe):
         ('Business', 'https://www.thehindu.com/business/feeder/default.rss'),
         ('World', 'https://www.thehindu.com/news/international/feeder/default.rss'),
         # ('Sport', 'https://www.thehindu.com/sport/feeder/default.rss'),
-        ('Entertainment', 'https://www.thehindu.com/entertainment/feeder/default.rss'),
+        # ('Entertainment', 'https://www.thehindu.com/entertainment/feeder/default.rss'),
         # ('Crossword', 'https://crossword.thehindu.com/?utm_source=thehindu&utm_medium=mainmenufeeder/default.rss'),
         ('Science', 'https://www.thehindu.com/sci-tech/science/feeder/default.rss'),
         ('Life and Style', 'https://www.thehindu.com/life-and-style/feeder/default.rss'),