Update livemint.recipe

This commit is contained in:
unkn0w7n 2024-07-24 11:29:31 +05:30
parent ab5ff807af
commit dc90b7840e

View File

@ -97,9 +97,8 @@ class LiveMint(BasicNewsRecipe):
.summary, .highlights, .synopsis { .summary, .highlights, .synopsis {
font-weight:normal !important; font-style:italic; color:#202020; font-weight:normal !important; font-style:italic; color:#202020;
} }
h2 {font-size:normal !important;}
em, blockquote {color:#202020;} em, blockquote {color:#202020;}
.moreAbout, .articleInfo, .metaData, .psTopicsHeading, .topicsTag {font-size:small;} .moreAbout, .articleInfo, .metaData, .psTopicsHeading, .topicsTag, .auth {font-size:small;}
''' '''
keep_only_tags = [ keep_only_tags = [
@ -109,12 +108,15 @@ class LiveMint(BasicNewsRecipe):
] ]
remove_tags = [ remove_tags = [
dict(name=['meta', 'link', 'svg', 'button', 'iframe']), dict(name=['meta', 'link', 'svg', 'button', 'iframe']),
dict(attrs={'class':lambda x: x and x.startswith(
('storyPage_alsoRead__', 'storyPage_firstPublishDate__', 'storyPage_bcrumb__')
)}),
dict(attrs={'id':['faqSection', 'seoText', 'ellipsisId']}),
classes( classes(
'trendingSimilarHeight moreNews mobAppDownload label msgError msgOk taboolaHeight gadgetSlider' 'trendingSimilarHeight moreNews mobAppDownload label msgError msgOk taboolaHeight gadgetSlider ninSec'
' socialHolder imgbig disclamerText disqus-comment-count openinApp2 lastAdSlot bs_logo author-widget' ' socialHolder imgbig disclamerText disqus-comment-count openinApp2 lastAdSlot bs_logo author-widget'
' datePublish sepStory premiumSlider moreStory Joinus moreAbout milestone benefitText checkCibilBtn' ' datePublish sepStory premiumSlider moreStory Joinus moreAbout milestone benefitText checkCibilBtn trade'
), )
dict(attrs={'class':lambda x: x and x.startswith('storyPage_alsoRead__')})
] ]
feeds = [ feeds = [
@ -160,22 +162,36 @@ class LiveMint(BasicNewsRecipe):
return raw return raw
def preprocess_html(self, soup): def preprocess_html(self, soup):
for h2 in soup.findAll('h2'):
h2.name = 'h4'
auth = soup.find(attrs={'class':lambda x: x and x.startswith(('storyPage_authorInfo__', 'storyPage_authorSocial__'))})
if auth:
auth['class'] = 'auth'
summ = soup.find(attrs={'class':lambda x: x and x.startswith('storyPage_summary__')})
if summ:
summ['class'] = 'summary'
for strong in soup.findAll('strong'): for strong in soup.findAll('strong'):
if strong.find('p'): if strong.find('p'):
strong.name = 'div' strong.name = 'div'
for embed in soup.findAll('div', attrs={'class':'embed'}): for embed in soup.findAll('div', attrs={'class':'embed'}):
if nos := embed.find('noscript'): nos = embed.find('noscript')
if nos:
nos.name = 'span' nos.name = 'span'
for span in soup.findAll('figcaption'): for span in soup.findAll('figcaption'):
span['id'] = 'img-cap' span['id'] = 'img-cap'
for auth in soup.findAll('span', attrs={'class':lambda x: x and 'articleInfo' in x.split()}): for auth in soup.findAll('span', attrs={'class':lambda x: x and 'articleInfo' in x.split()}):
auth.name = 'div' auth.name = 'div'
for span in soup.findAll('span', attrs={'class':'exclusive'}):
span.extract()
for img in soup.findAll('img', attrs={'data-src': True}): for img in soup.findAll('img', attrs={'data-src': True}):
img['src'] = img['data-src'] img['src'] = img['data-src']
for span in soup.findAll('span', attrs={'class':'exclusive'}):
span.extract()
for al in soup.findAll('a', attrs={'class':'manualbacklink'}):
pa = al.findParent('p')
if pa:
pa.extract()
if wa := soup.find(**classes('autobacklink-topic')): if wa := soup.find(**classes('autobacklink-topic')):
if p := wa.findParent('p'): p = wa.findParent('p')
if p:
p.extract() p.extract()
return soup return soup