1843 & econ news

2025-12-10 23:25:01 -05:00 · 2025-07-31 16:54:40 +05:30 · 2025-07-31 16:54:40 +05:30 · f7fd4b3c8a
commit f7fd4b3c8a
parent 0fcccff441
2 changed files with 18 additions and 16 deletions
--- a/recipes/1843.recipe
+++ b/recipes/1843.recipe
@ -6,8 +6,10 @@ from urllib.parse import quote, urlencode
 from uuid import uuid4

 from html5_parser import parse
+from mechanize import Request
 from lxml import etree

+from calibre import browser
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.web.feeds.news import BasicNewsRecipe

@ -87,7 +89,7 @@ def process_web_node(node):
    elif ntype in ['PARAGRAPH', 'BOOK_INFO']:
        if node.get('textHtml'):
            return f'\n<p>{node.get("textHtml")}</p>'
-        elif node.get('textJson'):
+        if node.get('textJson'):
            return f'\n<p>{parse_textjson(node["textJson"])}</p>'
        return f'\n<p>{node.get("text", "")}</p>'
    elif (ntype == 'IMAGE') or (node.get('__typename', '') == 'ImageComponent'):
@ -104,13 +106,13 @@ def process_web_node(node):
    elif ntype == 'PULL_QUOTE':
        if node.get('textHtml'):
            return f'<blockquote>{node.get("textHtml")}</blockquote>'
-        elif node.get('textJson'):
+        if node.get('textJson'):
            return f'<blockquote>{parse_textjson(node["textJson"])}</blockquote>'
        return f'<blockquote>{node.get("text", "")}</blockquote>'
    elif ntype == 'BLOCK_QUOTE':
        if node.get('textHtml'):
            return f'<blockquote><i>{node.get("textHtml")}</i></blockquote>'
-        elif node.get('textJson'):
+        if node.get('textJson'):
            return f'<blockquote><i>{parse_textjson(node["textJson"])}</i></blockquote>'
        return f'<blockquote><i>{node.get("text", "")}</i></blockquote>'
    elif ntype == 'DIVIDER':
@ -162,9 +164,7 @@ class NoArticles(Exception):


 def get_content(url_):
-    from mechanize import Request

-    from calibre import browser
    headers = {
        'User-Agent': 'TheEconomist-Liskov-android',
        'accept': 'multipart/mixed; deferSpec=20220824, application/json',
@ -287,8 +287,6 @@ class Econ1843(BasicNewsRecipe):
        # open('/t/raw.html', 'wb').write(raw.encode('utf-8'))
        html = load_article_from_web_json(raw)
        root = parse(html)
-        for x in root.xpath('//*[name()="script" or name()="style" or name()="source" or name()="meta"]'):
-            x.getparent().remove(x)
        # the economist uses <small> for small caps with a custom font
        for init in root.xpath('//span[@data-caps="initial"]'):
            init.set('style', 'font-weight:bold;')
@ -296,7 +294,10 @@ class Econ1843(BasicNewsRecipe):
            if x.text and len(x) == 0:
                x.text = x.text.upper()
                x.tag = 'span'
-                x.set('style', 'text-transform: uppercase; font-size: 0.85em; letter-spacing: 0.05em;')
+                x.set(
+                    'style',
+                    'text-transform: uppercase; font-size: 0.85em; letter-spacing: 0.05em;',
+                )
        for h2 in root.xpath('//h2'):
            h2.tag = 'h4'
        for x in root.xpath('//figcaption'):
--- a/recipes/economist_news.recipe
+++ b/recipes/economist_news.recipe
@ -8,8 +8,10 @@ from urllib.parse import quote, urlencode
 from uuid import uuid4

 from html5_parser import parse
+from mechanize import Request
 from lxml import etree

+from calibre import browser
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.web.feeds.news import BasicNewsRecipe

@ -89,7 +91,7 @@ def process_web_node(node):
    elif ntype in ['PARAGRAPH', 'BOOK_INFO']:
        if node.get('textHtml'):
            return f'\n<p>{node.get("textHtml")}</p>'
-        elif node.get('textJson'):
+        if node.get('textJson'):
            return f'\n<p>{parse_textjson(node["textJson"])}</p>'
        return f'\n<p>{node.get("text", "")}</p>'
    elif (ntype == 'IMAGE') or (node.get('__typename', '') == 'ImageComponent'):
@ -106,13 +108,13 @@ def process_web_node(node):
    elif ntype == 'PULL_QUOTE':
        if node.get('textHtml'):
            return f'<blockquote>{node.get("textHtml")}</blockquote>'
-        elif node.get('textJson'):
+        if node.get('textJson'):
            return f'<blockquote>{parse_textjson(node["textJson"])}</blockquote>'
        return f'<blockquote>{node.get("text", "")}</blockquote>'
    elif ntype == 'BLOCK_QUOTE':
        if node.get('textHtml'):
            return f'<blockquote><i>{node.get("textHtml")}</i></blockquote>'
-        elif node.get('textJson'):
+        if node.get('textJson'):
            return f'<blockquote><i>{parse_textjson(node["textJson"])}</i></blockquote>'
        return f'<blockquote><i>{node.get("text", "")}</i></blockquote>'
    elif ntype == 'DIVIDER':
@ -164,9 +166,7 @@ class NoArticles(Exception):


 def get_content(url_):
-    from mechanize import Request

-    from calibre import browser
    headers = {
        'User-Agent': 'TheEconomist-Liskov-android',
        'accept': 'multipart/mixed; deferSpec=20220824, application/json',
@ -326,8 +326,6 @@ class EconomistNews(BasicNewsRecipe):
        # open('/t/raw.html', 'wb').write(raw.encode('utf-8'))
        html = load_article_from_web_json(raw)
        root = parse(html)
-        for x in root.xpath('//*[name()="script" or name()="style" or name()="source" or name()="meta"]'):
-            x.getparent().remove(x)
        # the economist uses <small> for small caps with a custom font
        for init in root.xpath('//span[@data-caps="initial"]'):
            init.set('style', 'font-weight:bold;')
@ -335,7 +333,10 @@ class EconomistNews(BasicNewsRecipe):
            if x.text and len(x) == 0:
                x.text = x.text.upper()
                x.tag = 'span'
-                x.set('style', 'text-transform: uppercase; font-size: 0.85em; letter-spacing: 0.05em;')
+                x.set(
+                    'style',
+                    'text-transform: uppercase; font-size: 0.85em; letter-spacing: 0.05em;'
+                )
        for h2 in root.xpath('//h2'):
            h2.tag = 'h4'
        for x in root.xpath('//figcaption'):