This commit is contained in:
Kovid Goyal 2023-06-20 17:05:15 +05:30
commit b440de92c9
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 28 additions and 9 deletions

View File

@ -16,6 +16,13 @@ local_edition = None
# For past editions, set date to, for example, '2023-01-28' # For past editions, set date to, for example, '2023-01-28'
past_edition = None past_edition = None
is_sunday = date.today().weekday() == 6
if past_edition:
year, month, day = (int(x) for x in past_edition.split('-'))
dt = date(year, month, day)
is_sunday = dt.weekday() == 6
class TheHindu(BasicNewsRecipe): class TheHindu(BasicNewsRecipe):
title = 'The Hindu' title = 'The Hindu'
__author__ = 'unkn0wn' __author__ = 'unkn0wn'
@ -54,9 +61,11 @@ class TheHindu(BasicNewsRecipe):
if self.output_profile.short_name.startswith('kindle'): if self.output_profile.short_name.startswith('kindle'):
if not past_edition: if not past_edition:
self.title = 'The Hindu ' + date.today().strftime('%b %d, %Y') self.title = 'The Hindu ' + date.today().strftime('%b %d, %Y')
else:
self.title = 'The Hindu ' + dt.strftime('%b %d, %Y')
def parse_index(self): def parse_index(self):
mag_url = None
global local_edition global local_edition
if local_edition or past_edition: if local_edition or past_edition:
if local_edition is None: if local_edition is None:
@ -66,8 +75,12 @@ class TheHindu(BasicNewsRecipe):
today = past_edition today = past_edition
self.log('Downloading past edition of', local_edition + ' from ' + today) self.log('Downloading past edition of', local_edition + ' from ' + today)
url = absurl('/todays-paper/' + today + '/' + local_edition + '/') url = absurl('/todays-paper/' + today + '/' + local_edition + '/')
if is_sunday:
mag_url = url + '?supplement=' + local_edition + '-sm'
else: else:
url = 'https://www.thehindu.com/todays-paper/' url = 'https://www.thehindu.com/todays-paper/'
if is_sunday:
mag_url = url + '?supplement=th_chennai-sm'
raw = self.index_to_soup(url, raw=True) raw = self.index_to_soup(url, raw=True)
soup = self.index_to_soup(raw) soup = self.index_to_soup(raw)
@ -79,6 +92,12 @@ class TheHindu(BasicNewsRecipe):
raise ValueError( raise ValueError(
'The Hindu Newspaper is not published Today.' 'The Hindu Newspaper is not published Today.'
) )
if mag_url:
self.log('\nFetching Sunday Magazine')
soup = self.index_to_soup(mag_url)
ans2 = self.hindu_parse_index(soup)
if ans2:
return ans + ans2
return ans return ans
def hindu_parse_index(self, soup): def hindu_parse_index(self, soup):

View File

@ -39,7 +39,7 @@ class BusinessLine(BasicNewsRecipe):
] ]
remove_tags = [ remove_tags = [
classes('hide-mobile comments-shares share-page editiondetails') classes('hide-mobile comments-shares share-page editiondetails author-img')
] ]
def preprocess_html(self, soup): def preprocess_html(self, soup):
@ -50,13 +50,13 @@ class BusinessLine(BasicNewsRecipe):
return soup return soup
def parse_index(self): def parse_index(self):
dt = date.today().strftime('%Y-%m-%d')
# For past editions, set date to, for example, '2023-01-28'
# dt = '2023-01-28'
if local_edition: if local_edition:
yr = str(date.today().year) url = absurl('/todays-paper/' + dt + '/' + local_edition + '/')
mn = date.today().strftime('%m')
dy = date.today().strftime('%d')
url = absurl('/todays-paper/' + yr + '-' + mn + '-' + dy + '/' + local_edition + '/')
else: else:
url = 'https://www.thehindubusinessline.com/todays-paper/' url = absurl('/todays-paper/' + dt + '/bl_chennai/')
raw = self.index_to_soup(url, raw=True) raw = self.index_to_soup(url, raw=True)
soup = self.index_to_soup(raw) soup = self.index_to_soup(raw)
ans = self.hindu_parse_index(soup) ans = self.hindu_parse_index(soup)
@ -74,8 +74,8 @@ class BusinessLine(BasicNewsRecipe):
if not self.tag_to_string(script).strip().startswith('let grouped_articles = {}'): if not self.tag_to_string(script).strip().startswith('let grouped_articles = {}'):
continue continue
if script is not None: if script is not None:
art = re.search(r'grouped_articles = ({\"[^<]+?]})', self.tag_to_string(script)) art = re.search(r'grouped_articles = ({\".*)', self.tag_to_string(script))
data = json.loads(art.group(1)) data = json.JSONDecoder().raw_decode(art.group(1))[0]
feeds_dict = defaultdict(list) feeds_dict = defaultdict(list)