This commit is contained in:
Kovid Goyal 2024-01-13 13:18:28 +05:30
commit 0fc78f89e0
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -66,6 +66,9 @@ class LiveMint(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
if h2 := soup.find('h2'): if h2 := soup.find('h2'):
h2.name = 'p' h2.name = 'p'
for also in soup.findAll('h2'):
if self.tag_to_string(also).strip().startswith('Also'):
also.extract()
for img in soup.findAll('img', attrs={'data-img': True}): for img in soup.findAll('img', attrs={'data-img': True}):
img['src'] = img['data-img'] img['src'] = img['data-img']
return soup return soup
@ -95,6 +98,7 @@ class LiveMint(BasicNewsRecipe):
classes('contentSec') classes('contentSec')
] ]
remove_tags = [ remove_tags = [
dict(name=['meta', 'link', 'svg', 'button', 'iframe']),
classes( classes(
'trendingSimilarHeight moreNews mobAppDownload label msgError msgOk taboolaHeight' 'trendingSimilarHeight moreNews mobAppDownload label msgError msgOk taboolaHeight'
' socialHolder imgbig disclamerText disqus-comment-count openinApp2 lastAdSlot' ' socialHolder imgbig disclamerText disqus-comment-count openinApp2 lastAdSlot'
@ -138,6 +142,9 @@ class LiveMint(BasicNewsRecipe):
return raw return raw
def preprocess_html(self, soup): def preprocess_html(self, soup):
for strong in soup.findAll('strong'):
if strong.find('p'):
strong.name = 'div'
for embed in soup.findAll('div', attrs={'class':'embed'}): for embed in soup.findAll('div', attrs={'class':'embed'}):
if nos := embed.find('noscript'): if nos := embed.find('noscript'):
nos.name = 'span' nos.name = 'span'