Update India Today and Indian Express

This commit is contained in:
Kovid Goyal 2022-07-26 08:46:01 +05:30
parent 86dbab7283
commit f7b592023f
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 14 additions and 4 deletions

View File

@ -9,7 +9,6 @@ class IndiaToday(BasicNewsRecipe):
use_embedded_content = False use_embedded_content = False
remove_attributes = ['style', 'height', 'width'] remove_attributes = ['style', 'height', 'width']
ignore_duplicate_articles = {'url'} ignore_duplicate_articles = {'url'}
extra_css = '[itemprop^="description"] {font-size: small; font-style: italic;}'
description = ( description = (
'Indias Most Reputed, Credible and Popular news magazine.' 'Indias Most Reputed, Credible and Popular news magazine.'
' Read the most preferred magazine of 9.5 million Indians to access highly researched and unbiased content.' ' Read the most preferred magazine of 9.5 million Indians to access highly researched and unbiased content.'
@ -19,6 +18,7 @@ class IndiaToday(BasicNewsRecipe):
extra_css = ''' extra_css = '''
.body_caption{font-size:small;} .body_caption{font-size:small;}
.image-alt{font-size:small;} .image-alt{font-size:small;}
[itemprop^="description"] {font-size: small; font-style: italic;}
''' '''
def get_cover_url(self): def get_cover_url(self):
@ -63,11 +63,19 @@ class IndiaToday(BasicNewsRecipe):
else: else:
url = 'https://www.indiatoday.in' + url url = 'https://www.indiatoday.in' + url
title = self.tag_to_string(a).strip() title = self.tag_to_string(a).strip()
try:
desc = self.tag_to_string(a.findParent(
'span', attrs={'class':'field-content'}).findNext(
'div', attrs={'class':'views-field'})).strip()
except Exception:
desc = self.tag_to_string(a.findParent(
('h3','p')).findNext('span', attrs={'class':'kicket-text'})).strip()
if not url or not title: if not url or not title:
continue continue
self.log('\t', title) self.log('\t', title)
self.log('\t', desc)
self.log('\t\t', url) self.log('\t\t', url)
sections[section].append({'title': title, 'url': url}) sections[section].append({'title': title, 'url': url, 'description': desc})
def sort_key(x): def sort_key(x):
section = x[0] section = x[0]
@ -99,4 +107,6 @@ class IndiaToday(BasicNewsRecipe):
style.extract() style.extract()
for img in soup.findAll('img', attrs={'data-src': True}): for img in soup.findAll('img', attrs={'data-src': True}):
img['src'] = img['data-src'] img['src'] = img['data-src']
for h2 in soup.findAll('h2'):
h2.name = 'h5'
return str(soup) return str(soup)

View File

@ -62,7 +62,7 @@ class IndianExpress(BasicNewsRecipe):
classes( classes(
'share-social appstext ie-int-campign-ad ie-breadcrumb custom_read_button unitimg copyright' 'share-social appstext ie-int-campign-ad ie-breadcrumb custom_read_button unitimg copyright'
' storytags pdsc-related-modify news-guard premium-story append_social_share' ' storytags pdsc-related-modify news-guard premium-story append_social_share'
' digital-subscriber-only h-text-widget' ' digital-subscriber-only h-text-widget ie-premium'
) )
] ]
@ -107,7 +107,7 @@ class IndianExpress(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
h2 = soup.findAll('h2') h2 = soup.findAll('h2')
for sub in h2: for sub in h2:
sub.name = 'h4' sub.name = 'h5'
for span in soup.findAll( for span in soup.findAll(
'span', attrs={'class': ['ie-custom-caption', 'custom-caption']} 'span', attrs={'class': ['ie-custom-caption', 'custom-caption']}
): ):