mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update India Today Outlook Magazine and Live Mint
This commit is contained in:
parent
8114376b2f
commit
0ebd840d6a
@ -63,11 +63,10 @@ class IndiaToday(BasicNewsRecipe):
|
||||
sections = {}
|
||||
|
||||
date = soup.find(attrs={'class':lambda x: x and x.startswith('MagazineEdition_edition__date')})
|
||||
edition = soup.find(attrs={'class':lambda x: x and x.startswith('MagazineEdition_magazineprime')})
|
||||
self.timefmt =' (' + self.tag_to_string(edition) + ') [' + self.tag_to_string(date).strip() + ']'
|
||||
p = edition.findNext('p')
|
||||
if p:
|
||||
self.description = self.tag_to_string(p).strip()
|
||||
edition = soup.find(attrs={'class':'prime__magazine'})
|
||||
self.timefmt = '(' + self.tag_to_string(edition).strip() +') [' + self.tag_to_string(date).strip() + ']'
|
||||
if p := edition.findNext('p'):
|
||||
self.description = self.tag_to_string(p)
|
||||
self.log('Downloading Issue: ', self.timefmt)
|
||||
|
||||
for tag in soup.findAll('div', attrs={'class': lambda x: x and 'NoCard_story__grid__' in x}):
|
||||
@ -125,11 +124,5 @@ class IndiaToday(BasicNewsRecipe):
|
||||
quo.name = 'blockquote'
|
||||
return soup
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
if first and hasattr(self, 'add_toc_thumbnail'):
|
||||
image = soup.find('img', src=True, attrs={'class':'i-amphtml-fill-content'})
|
||||
if image is not None:
|
||||
self.add_toc_thumbnail(article, image['src'])
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('.in/','.in/amp/')
|
||||
|
@ -24,9 +24,14 @@ class LiveMint(BasicNewsRecipe):
|
||||
|
||||
remove_empty_feeds = True
|
||||
|
||||
if is_saturday:
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup(
|
||||
'https://www.magzter.com/IN/HT-Digital-Streams-Ltd./Mint-Mumbai/Newspaper/'
|
||||
)
|
||||
for citem in soup.findAll('meta', content=lambda s: s and s.endswith('view/3.jpg')):
|
||||
return citem['content']
|
||||
|
||||
cover_url = 'https://epsfs.hindustantimes.com/MINT/2022/04/16/Delhi/Delhi/5_01/bf867ea1_01_mr.jpg'
|
||||
if is_saturday:
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1'),
|
||||
@ -54,14 +59,13 @@ class LiveMint(BasicNewsRecipe):
|
||||
img['src'] = img['data-img']
|
||||
return soup
|
||||
else:
|
||||
# some wsj articles wont load
|
||||
|
||||
extra_css = '''
|
||||
#img-cap {font-size:small; text-align:center;}
|
||||
#auth-info {font-size:small; text-align:center;}
|
||||
.highlights {font-style:italic;}
|
||||
.summary{font-style:italic; color:#404040;}
|
||||
'''
|
||||
cover_url = 'https://epsfs.hindustantimes.com/MINT/2022/04/05/Delhi/Delhi/5_01/1ec7ad14_01_mr.jpg'
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1'),
|
||||
|
@ -18,13 +18,17 @@ class outlook(BasicNewsRecipe):
|
||||
remove_attributes = ['height', 'width', 'style']
|
||||
ignore_duplicate_articles = {'url'}
|
||||
resolve_internal_links = True
|
||||
masthead_url = 'https://www.outlookindia.com/images/home_new_v4/logo_outlook.svg'
|
||||
extra_css = '''
|
||||
.story-summary{font-style:italic; color:#202020;}
|
||||
.author_wrapper, .relatedCategory{font-size:small; color:#404040;}
|
||||
#figcap{font-size:small; text-align:center;}
|
||||
'''
|
||||
|
||||
keep_only_tags = [classes('__story_detail')]
|
||||
remove_tags = [
|
||||
classes(
|
||||
'social_sharing_article left_trending left-sticky __tag_links'
|
||||
' next_prev_stories downarrow uparrow more_from_author_links next prev __related_stories_thumbs'
|
||||
'social_sharing_article left_trending left-sticky __tag_links next_prev_stories '
|
||||
'downarrow uparrow more_from_author_links next prev __related_stories_thumbs'
|
||||
)
|
||||
]
|
||||
|
||||
@ -33,8 +37,8 @@ class outlook(BasicNewsRecipe):
|
||||
div = soup.find('div', attrs={'class':'wrapper'})
|
||||
a = div.find('a', href=lambda x: x and x.startswith('/magazine/issue/'))
|
||||
url = a['href']
|
||||
self.log('Downloading issue:', url)
|
||||
self.timefmt = ' [' + self.tag_to_string(a) + ']'
|
||||
self.timefmt = ' [' + self.tag_to_string(a.find('p')).strip() + ']'
|
||||
self.log('Downloading issue:', url, self.timefmt)
|
||||
soup = self.index_to_soup('https://www.outlookindia.com' + url)
|
||||
cover = soup.find(**classes('listingPage_lead_story'))
|
||||
self.cover_url = cover.find('img', attrs={'src': True})['src']
|
||||
@ -42,7 +46,7 @@ class outlook(BasicNewsRecipe):
|
||||
|
||||
for h3 in soup.findAll(['h3', 'h4'],
|
||||
attrs={'class': 'tk-kepler-std-condensed-subhead'}):
|
||||
a = h3.find('a', href=lambda x: x)
|
||||
a = h3.find('a', href=True)
|
||||
url = a['href']
|
||||
title = self.tag_to_string(a)
|
||||
desc = ''
|
||||
@ -55,6 +59,11 @@ class outlook(BasicNewsRecipe):
|
||||
ans.append({'title': title, 'url': url, 'description': desc})
|
||||
return [('Articles', ans)]
|
||||
|
||||
def preprocess_html(self,soup):
|
||||
for fig in soup.findAll('figure'):
|
||||
fig['id'] = 'figcap'
|
||||
return soup
|
||||
|
||||
def preprocess_raw_html(self, raw, *a):
|
||||
return raw
|
||||
m = re.search('<!-- NewsArticle Schema -->.*?script.*?>', raw, flags=re.DOTALL)
|
||||
|
Loading…
x
Reference in New Issue
Block a user