From b716a00da5e35f4afb663eaff36e374fac891993 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Thu, 28 Nov 2024 09:53:17 +0530 Subject: [PATCH] Update indian express --- recipes/economist_world_ahead.recipe | 2 +- recipes/indian_express.recipe | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/recipes/economist_world_ahead.recipe b/recipes/economist_world_ahead.recipe index 03c0519065..d54afffead 100644 --- a/recipes/economist_world_ahead.recipe +++ b/recipes/economist_world_ahead.recipe @@ -28,7 +28,7 @@ def process_node(node): if ntype == 'PARAGRAPH': if node.get('textHtml'): return f'
{node.get("textHtml")}
' - return f'{node.get("tex", "")}
' + return f'{node.get("text", "")}
' elif ntype == 'IMAGE': alt = "" if node.get("altText") is None else node.get("altText") cap = "" diff --git a/recipes/indian_express.recipe b/recipes/indian_express.recipe index 91394fef19..8d6392636c 100644 --- a/recipes/indian_express.recipe +++ b/recipes/indian_express.recipe @@ -45,7 +45,7 @@ class IndianExpress(BasicNewsRecipe): 'digital-subscriber-only h-text-widget ie-premium ie-first-publish adboxtop adsizes immigrationimg ' 'next-story-wrap ie-ie-share next-story-box brand-logo quote_section ie-customshare osv-ad-class ' 'custom-share o-story-paper-quite ie-network-commenting audio-player-tts-sec o-story-list subscriber_hide ' - 'author-social author-follow author-img premium_widget_below_article' + 'author-social author-follow author-img premium_widget_below_article author-block' ) ] @@ -136,9 +136,13 @@ class IndianExpress(BasicNewsRecipe): return citem['content'].replace('300', '600') def preprocess_html(self, soup): - if h2 := soup.find(attrs={'itemprop': 'description'}): + if h2 := (soup.find(attrs={"itemprop": "description"}) or soup.find(**classes("synopsis"))):: h2.name = 'p' h2['id'] = 'sub-d' + for span in soup.findAll( + "span", attrs={"class": ["ie-custom-caption", "custom-caption"]} + ): + span["id"] = "img-cap" for img in soup.findAll('img', attrs={'data-src': True}): img['src'] = img['data-src'] if span := soup.find('span', content=True, attrs={'itemprop': 'dateModified'}):