Merge branch 'master' of https://github.com/unkn0w7n/calibre

2025-11-21 22:13:04 -05:00 · 2024-11-28 11:31:02 +05:30 · 2024-11-28 11:31:02 +05:30 · 904bccb353
commit 904bccb353
parent 5e4b2aa5ec 0adbd16f21
2 changed files with 18 additions and 11 deletions
--- a/recipes/economist_world_ahead.recipe
+++ b/recipes/economist_world_ahead.recipe
@ -28,7 +28,7 @@ def process_node(node):
    if ntype == 'PARAGRAPH':
        if node.get('textHtml'):
            return f'<p>{node.get("textHtml")}</p>'
-        return f'<p>{node.get("tex", "")}</p>'
+        return f'<p>{node.get("text", "")}</p>'
    elif ntype == 'IMAGE':
        alt = "" if node.get("altText") is None else node.get("altText")
        cap = ""
@ -49,6 +49,7 @@ def process_node(node):
        print('** ', ntype)
        return ''
 def safe_dict(data, *names):
    ans = data
    for x in names:
@ -187,15 +188,14 @@ class EconomistWorld(BasicNewsRecipe):
    }
    def get_browser(self, *args, **kwargs):
-        kwargs['user_agent'] = 'Mozilla/5.0 (Linux; Android 14; 330333QCG Build/AP1A.140705.005; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/125.0.6422.165 Mobile Safari/537.36 Lamarr/3.37.0-3037003 (android)' # noqa
+        kwargs['user_agent'] = (
            'Mozilla/5.0 (Linux; Android 14; 330333QCG Build/AP1A.140705.005; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/125.0.6422.165 Mobile Safari/537.36 Lamarr/3.37.0-3037003 (android)' # noqa
        )
        br = BasicNewsRecipe.get_browser(self, *args, **kwargs)
-        br.addheaders += [
+        br.addheaders += [('x-requested-with', 'com.economist.lamarr')]
            ('x-requested-with', 'com.economist.lamarr')
        ]
        return br
    def economist_test_article(self):
        self.cover_url = None
        return [('Articles', [{'title':'test',
            'url':'https://www.economist.com/the-world-ahead/2024/11/20/ten-business-trends-for-2025-and-forecasts-for-15-industries'
        }])]
@ -257,9 +257,12 @@ class EconomistWorld(BasicNewsRecipe):
        # open('/t/raw.html', 'wb').write(raw.encode('utf-8'))
        root_ = parse(raw)
        if '/interactive/' in url:
-            return '<html><body><article><h1>' + root_.xpath('//h1')[0].text + '</h1><em>' \
+            return (
-                        + 'This article is supposed to be read in a browser' \
+                '<html><body><article><h1>'
-                            + '</em></article></body></html>'
+                + root_.xpath('//h1')[0].text + '</h1><em>'
                + 'This article is supposed to be read in a browser'
                + '</em></article></body></html>'
            )
        script = root_.xpath('//script[@id="__NEXT_DATA__"]')
--- a/recipes/indian_express.recipe
+++ b/recipes/indian_express.recipe
@ -45,7 +45,7 @@ class IndianExpress(BasicNewsRecipe):
            'digital-subscriber-only h-text-widget ie-premium ie-first-publish adboxtop adsizes immigrationimg '
            'next-story-wrap ie-ie-share next-story-box brand-logo quote_section ie-customshare osv-ad-class '
            'custom-share o-story-paper-quite ie-network-commenting audio-player-tts-sec o-story-list subscriber_hide '
-            'author-social author-follow author-img premium_widget_below_article'
+            'author-social author-follow author-img premium_widget_below_article author-block'
        )
    ]
@ -136,9 +136,13 @@ class IndianExpress(BasicNewsRecipe):
        return citem['content'].replace('300', '600')
    def preprocess_html(self, soup):
-        if h2 := soup.find(attrs={'itemprop': 'description'}):
+        if h2 := (soup.find(attrs={"itemprop": "description"}) or soup.find(**classes("synopsis"))):
            h2.name = 'p'
            h2['id'] = 'sub-d'
        for span in soup.findAll(
            "span", attrs={"class": ["ie-custom-caption", "custom-caption"]}
        ):
            span["id"] = "img-cap"
        for img in soup.findAll('img', attrs={'data-src': True}):
            img['src'] = img['data-src']
        if span := soup.find('span', content=True, attrs={'itemprop': 'dateModified'}):