Update indian express

This commit is contained in:
unkn0w7n 2024-11-28 09:53:17 +05:30
parent 5e4b2aa5ec
commit b716a00da5
2 changed files with 7 additions and 3 deletions

View File

@ -28,7 +28,7 @@ def process_node(node):
if ntype == 'PARAGRAPH':
if node.get('textHtml'):
return f'<p>{node.get("textHtml")}</p>'
return f'<p>{node.get("tex", "")}</p>'
return f'<p>{node.get("text", "")}</p>'
elif ntype == 'IMAGE':
alt = "" if node.get("altText") is None else node.get("altText")
cap = ""

View File

@ -45,7 +45,7 @@ class IndianExpress(BasicNewsRecipe):
'digital-subscriber-only h-text-widget ie-premium ie-first-publish adboxtop adsizes immigrationimg '
'next-story-wrap ie-ie-share next-story-box brand-logo quote_section ie-customshare osv-ad-class '
'custom-share o-story-paper-quite ie-network-commenting audio-player-tts-sec o-story-list subscriber_hide '
'author-social author-follow author-img premium_widget_below_article'
'author-social author-follow author-img premium_widget_below_article author-block'
)
]
@ -136,9 +136,13 @@ class IndianExpress(BasicNewsRecipe):
return citem['content'].replace('300', '600')
def preprocess_html(self, soup):
if h2 := soup.find(attrs={'itemprop': 'description'}):
if h2 := (soup.find(attrs={"itemprop": "description"}) or soup.find(**classes("synopsis")))::
h2.name = 'p'
h2['id'] = 'sub-d'
for span in soup.findAll(
"span", attrs={"class": ["ie-custom-caption", "custom-caption"]}
):
span["id"] = "img-cap"
for img in soup.findAll('img', attrs={'data-src': True}):
img['src'] = img['data-src']
if span := soup.find('span', content=True, attrs={'itemprop': 'dateModified'}):