Update India Today and Indian Express

This commit is contained in:
Kovid Goyal 2022-07-26 08:46:01 +05:30
parent 86dbab7283
commit f7b592023f
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 14 additions and 4 deletions

View File

@ -9,7 +9,6 @@ class IndiaToday(BasicNewsRecipe):
use_embedded_content = False
remove_attributes = ['style', 'height', 'width']
ignore_duplicate_articles = {'url'}
extra_css = '[itemprop^="description"] {font-size: small; font-style: italic;}'
description = (
'Indias Most Reputed, Credible and Popular news magazine.'
' Read the most preferred magazine of 9.5 million Indians to access highly researched and unbiased content.'
@ -19,6 +18,7 @@ class IndiaToday(BasicNewsRecipe):
extra_css = '''
.body_caption{font-size:small;}
.image-alt{font-size:small;}
[itemprop^="description"] {font-size: small; font-style: italic;}
'''
def get_cover_url(self):
@ -63,11 +63,19 @@ class IndiaToday(BasicNewsRecipe):
else:
url = 'https://www.indiatoday.in' + url
title = self.tag_to_string(a).strip()
try:
desc = self.tag_to_string(a.findParent(
'span', attrs={'class':'field-content'}).findNext(
'div', attrs={'class':'views-field'})).strip()
except Exception:
desc = self.tag_to_string(a.findParent(
('h3','p')).findNext('span', attrs={'class':'kicket-text'})).strip()
if not url or not title:
continue
self.log('\t', title)
self.log('\t', desc)
self.log('\t\t', url)
sections[section].append({'title': title, 'url': url})
sections[section].append({'title': title, 'url': url, 'description': desc})
def sort_key(x):
section = x[0]
@ -99,4 +107,6 @@ class IndiaToday(BasicNewsRecipe):
style.extract()
for img in soup.findAll('img', attrs={'data-src': True}):
img['src'] = img['data-src']
for h2 in soup.findAll('h2'):
h2.name = 'h5'
return str(soup)

View File

@ -62,7 +62,7 @@ class IndianExpress(BasicNewsRecipe):
classes(
'share-social appstext ie-int-campign-ad ie-breadcrumb custom_read_button unitimg copyright'
' storytags pdsc-related-modify news-guard premium-story append_social_share'
' digital-subscriber-only h-text-widget'
' digital-subscriber-only h-text-widget ie-premium'
)
]
@ -107,7 +107,7 @@ class IndianExpress(BasicNewsRecipe):
def preprocess_html(self, soup):
h2 = soup.findAll('h2')
for sub in h2:
sub.name = 'h4'
sub.name = 'h5'
for span in soup.findAll(
'span', attrs={'class': ['ie-custom-caption', 'custom-caption']}
):