...

2025-08-11 09:13:57 -04:00 · 2025-03-16 11:40:07 +05:30 · 2025-03-16 11:40:07 +05:30 · 26bb850d62
commit 26bb850d62
parent 3ebc50d03a
2 changed files with 40 additions and 5 deletions
--- a/recipes/1843.recipe
+++ b/recipes/1843.recipe
@ -11,13 +11,23 @@ from calibre.ebooks.BeautifulSoup import NavigableString, Tag
 from calibre.web.feeds.news import BasicNewsRecipe
 def process_list(li_node):
    li_html = ''
    for li in li_node['items']:
        if li.get('textHtml'):
            li_html += f'<li>{li.get("textHtml")}</li>'
        else:
            li_html +=  f'<li>{li.get("text", "")}</li>'
    return li_html
 def process_node(node):
    ntype = node.get('type', '')
    if ntype == 'CROSSHEAD':
        if node.get('textHtml'):
            return f'<h4>{node.get("textHtml")}</h4>'
        return f'<h4>{node.get("text", "")}</h4>'
-    elif ntype == 'PARAGRAPH':
+    elif ntype in ['PARAGRAPH', 'BOOK_INFO']:
        if node.get('textHtml'):
            return f'<p>{node.get("textHtml")}</p>'
        return f'<p>{node.get("text", "")}</p>'
@ -34,9 +44,15 @@ def process_node(node):
        return f'<blockquote>{node.get("text", "")}</blockquote>'
    elif ntype == 'DIVIDER':
        return '<hr>'
    elif ntype == 'INFOGRAPHIC':
        if node.get('fallback'):
            return process_node(node['fallback'])
    elif ntype == 'INFOBOX':
        for x in safe_dict(node, 'components'):
            return f'<blockquote>{process_node(x)}</blockquote>'
    elif ntype == 'UNORDERED_LIST':
        if node.get('items'):
            return process_list(node)
    elif ntype:
        print('** ', ntype)
        return ''
@ -121,7 +137,7 @@ def process_url(url):
 class Econ1843(BasicNewsRecipe):
    title = 'Economist 1843'
-    language = 'en'
+    language = 'en_GB'
    encoding = 'utf-8'
    masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
--- a/recipes/economist_world_ahead.recipe
+++ b/recipes/economist_world_ahead.recipe
@ -12,13 +12,23 @@ from calibre.ebooks.BeautifulSoup import NavigableString, Tag
 from calibre.web.feeds.news import BasicNewsRecipe
 def process_list(li_node):
    li_html = ''
    for li in li_node['items']:
        if li.get('textHtml'):
            li_html += f'<li>{li.get("textHtml")}</li>'
        else:
            li_html +=  f'<li>{li.get("text", "")}</li>'
    return li_html
 def process_node(node):
    ntype = node.get('type', '')
    if ntype == 'CROSSHEAD':
        if node.get('textHtml'):
            return f'<h4>{node.get("textHtml")}</h4>'
        return f'<h4>{node.get("text", "")}</h4>'
-    elif ntype == 'PARAGRAPH':
+    elif ntype in ['PARAGRAPH', 'BOOK_INFO']:
        if node.get('textHtml'):
            return f'<p>{node.get("textHtml")}</p>'
        return f'<p>{node.get("text", "")}</p>'
@ -35,9 +45,15 @@ def process_node(node):
        return f'<blockquote>{node.get("text", "")}</blockquote>'
    elif ntype == 'DIVIDER':
        return '<hr>'
    elif ntype == 'INFOGRAPHIC':
        if node.get('fallback'):
            return process_node(node['fallback'])
    elif ntype == 'INFOBOX':
        for x in safe_dict(node, 'components'):
            return f'<blockquote>{process_node(x)}</blockquote>'
    elif ntype == 'UNORDERED_LIST':
        if node.get('items'):
            return process_list(node)
    elif ntype:
        print('** ', ntype)
        return ''
@ -57,7 +73,10 @@ class JSONHasNoContent(ValueError):
 def load_article_from_json(raw):
    # open('/t/raw.json', 'w').write(raw)
    body = ''
-    data = json.loads(raw)['props']['pageProps']['cp2Content']
+    try:
        data = json.loads(raw)['props']['pageProps']['cp2Content']
    except Exception:
        data = json.loads(raw)['props']['pageProps']['content']
    body += f'<div style="color: red; font-size:small; font-weight:bold;">{data.get("flyTitle", "")}</div>'
    body += f'<h1>{data["headline"]}</h1>'
    body += f'<div style="font-style: italic; color:#202020;">{data.get("rubric", "")}</div>'
@ -118,7 +137,7 @@ def process_url(url):
 class EconomistWorld(BasicNewsRecipe):
    title = 'The Economist World Ahead'
-    language = 'en'
+    language = 'en_GB'
    encoding = 'utf-8'
    masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'