Update Live Mint

This commit is contained in:
Kovid Goyal 2023-03-18 14:21:13 +05:30
parent fbec3adb2c
commit 32618d741f
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -1,14 +1,10 @@
#!/usr/bin/env python
import json import json
import re import re
from datetime import date from datetime import date
from calibre.web.feeds.news import BasicNewsRecipe, classes from calibre.web.feeds.news import BasicNewsRecipe, classes
is_saturday = date.today().weekday() == 5 is_saturday = date.today().weekday() == 5
class LiveMint(BasicNewsRecipe): class LiveMint(BasicNewsRecipe):
title = u'Live Mint' title = u'Live Mint'
description = 'Financial News from India.' description = 'Financial News from India.'
@ -34,28 +30,35 @@ class LiveMint(BasicNewsRecipe):
if is_saturday: if is_saturday:
oldest_article = 6 # days
extra_css = '''
#story-summary-0 {font-style:italic; color:#202020;}
.innerBanner, .storyImgSec {text-align:center; font-size:small;}
.author {font-size:small;}
'''
keep_only_tags = [ keep_only_tags = [
dict(name='h1'), classes('storyPageHeading storyContent innerBanner author')
dict(name='h2', attrs={'id':'story-summary-0'}),
dict(name='picture'),
dict(name='div', attrs={'class':'innerBanCaption'}),
dict(name='div', attrs={'id':'date-display-before-content'}),
dict(name='div', attrs={'class':'storyContent'}),
] ]
remove_tags = [ remove_tags = [
classes( classes('hidden-article-url sidebarAdv similarStoriesClass moreFromSecClass linkStories publishDetail'),
'sidebarAdv similarStoriesClass moreFromSecClass' dict(attrs={'id':['hidden-article-id-0', 'hidden-article-type-0']})
)
] ]
feeds = [ feeds = [
('News', 'https://lifestyle.livemint.com/rss/news'), ('Lounge News', 'https://lifestyle.livemint.com/rss/news'),
('Food', 'https://lifestyle.livemint.com/rss/food'), ('Food', 'https://lifestyle.livemint.com/rss/food'),
('Fashion', 'https://lifestyle.livemint.com/rss/fashion'), ('Fashion', 'https://lifestyle.livemint.com/rss/fashion'),
('How to Lounge', 'https://lifestyle.livemint.com/rss/how-to-lounge'), ('How to Lounge', 'https://lifestyle.livemint.com/rss/how-to-lounge'),
('Smart Living', 'https://lifestyle.livemint.com/rss/smart-living'), ('Smart Living', 'https://lifestyle.livemint.com/rss/smart-living'),
('Health', 'https://lifestyle.livemint.com/rss/health'),
('Relationships', 'https://lifestyle.livemint.com//rss/relationships')
] ]
def preprocess_html(self, soup): def preprocess_html(self, soup):
if h2 := soup.find('h2'):
h2.name = 'p'
for img in soup.findAll('img', attrs={'data-img': True}): for img in soup.findAll('img', attrs={'data-img': True}):
img['src'] = img['data-img'] img['src'] = img['data-img']
return soup return soup
@ -72,7 +75,7 @@ class LiveMint(BasicNewsRecipe):
''' '''
keep_only_tags = [ keep_only_tags = [
dict(name='article'), dict(name='article', attrs={'id':lambda x: x and x.startswith('article_')}),
classes('contentSec') classes('contentSec')
] ]
remove_tags = [ remove_tags = [
@ -128,3 +131,6 @@ class LiveMint(BasicNewsRecipe):
for img in soup.findAll('img', attrs={'data-src': True}): for img in soup.findAll('img', attrs={'data-src': True}):
img['src'] = img['data-src'] img['src'] = img['data-src']
return soup return soup
def populate_article_metadata(self, article, soup, first):
article.title = article.title.replace('<span class="webrupee">₹</span>','₹')