mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update livemint.recipe
This commit is contained in:
parent
ab5ff807af
commit
dc90b7840e
@ -97,9 +97,8 @@ class LiveMint(BasicNewsRecipe):
|
|||||||
.summary, .highlights, .synopsis {
|
.summary, .highlights, .synopsis {
|
||||||
font-weight:normal !important; font-style:italic; color:#202020;
|
font-weight:normal !important; font-style:italic; color:#202020;
|
||||||
}
|
}
|
||||||
h2 {font-size:normal !important;}
|
|
||||||
em, blockquote {color:#202020;}
|
em, blockquote {color:#202020;}
|
||||||
.moreAbout, .articleInfo, .metaData, .psTopicsHeading, .topicsTag {font-size:small;}
|
.moreAbout, .articleInfo, .metaData, .psTopicsHeading, .topicsTag, .auth {font-size:small;}
|
||||||
'''
|
'''
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
@ -109,12 +108,15 @@ class LiveMint(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['meta', 'link', 'svg', 'button', 'iframe']),
|
dict(name=['meta', 'link', 'svg', 'button', 'iframe']),
|
||||||
|
dict(attrs={'class':lambda x: x and x.startswith(
|
||||||
|
('storyPage_alsoRead__', 'storyPage_firstPublishDate__', 'storyPage_bcrumb__')
|
||||||
|
)}),
|
||||||
|
dict(attrs={'id':['faqSection', 'seoText', 'ellipsisId']}),
|
||||||
classes(
|
classes(
|
||||||
'trendingSimilarHeight moreNews mobAppDownload label msgError msgOk taboolaHeight gadgetSlider'
|
'trendingSimilarHeight moreNews mobAppDownload label msgError msgOk taboolaHeight gadgetSlider ninSec'
|
||||||
' socialHolder imgbig disclamerText disqus-comment-count openinApp2 lastAdSlot bs_logo author-widget'
|
' socialHolder imgbig disclamerText disqus-comment-count openinApp2 lastAdSlot bs_logo author-widget'
|
||||||
' datePublish sepStory premiumSlider moreStory Joinus moreAbout milestone benefitText checkCibilBtn'
|
' datePublish sepStory premiumSlider moreStory Joinus moreAbout milestone benefitText checkCibilBtn trade'
|
||||||
),
|
)
|
||||||
dict(attrs={'class':lambda x: x and x.startswith('storyPage_alsoRead__')})
|
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
@ -160,22 +162,36 @@ class LiveMint(BasicNewsRecipe):
|
|||||||
return raw
|
return raw
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
|
for h2 in soup.findAll('h2'):
|
||||||
|
h2.name = 'h4'
|
||||||
|
auth = soup.find(attrs={'class':lambda x: x and x.startswith(('storyPage_authorInfo__', 'storyPage_authorSocial__'))})
|
||||||
|
if auth:
|
||||||
|
auth['class'] = 'auth'
|
||||||
|
summ = soup.find(attrs={'class':lambda x: x and x.startswith('storyPage_summary__')})
|
||||||
|
if summ:
|
||||||
|
summ['class'] = 'summary'
|
||||||
for strong in soup.findAll('strong'):
|
for strong in soup.findAll('strong'):
|
||||||
if strong.find('p'):
|
if strong.find('p'):
|
||||||
strong.name = 'div'
|
strong.name = 'div'
|
||||||
for embed in soup.findAll('div', attrs={'class':'embed'}):
|
for embed in soup.findAll('div', attrs={'class':'embed'}):
|
||||||
if nos := embed.find('noscript'):
|
nos = embed.find('noscript')
|
||||||
|
if nos:
|
||||||
nos.name = 'span'
|
nos.name = 'span'
|
||||||
for span in soup.findAll('figcaption'):
|
for span in soup.findAll('figcaption'):
|
||||||
span['id'] = 'img-cap'
|
span['id'] = 'img-cap'
|
||||||
for auth in soup.findAll('span', attrs={'class':lambda x: x and 'articleInfo' in x.split()}):
|
for auth in soup.findAll('span', attrs={'class':lambda x: x and 'articleInfo' in x.split()}):
|
||||||
auth.name = 'div'
|
auth.name = 'div'
|
||||||
for span in soup.findAll('span', attrs={'class':'exclusive'}):
|
|
||||||
span.extract()
|
|
||||||
for img in soup.findAll('img', attrs={'data-src': True}):
|
for img in soup.findAll('img', attrs={'data-src': True}):
|
||||||
img['src'] = img['data-src']
|
img['src'] = img['data-src']
|
||||||
|
for span in soup.findAll('span', attrs={'class':'exclusive'}):
|
||||||
|
span.extract()
|
||||||
|
for al in soup.findAll('a', attrs={'class':'manualbacklink'}):
|
||||||
|
pa = al.findParent('p')
|
||||||
|
if pa:
|
||||||
|
pa.extract()
|
||||||
if wa := soup.find(**classes('autobacklink-topic')):
|
if wa := soup.find(**classes('autobacklink-topic')):
|
||||||
if p := wa.findParent('p'):
|
p = wa.findParent('p')
|
||||||
|
if p:
|
||||||
p.extract()
|
p.extract()
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user