Update The Hindu

fetch yesterdays paper by default
This commit is contained in:
unkn0w7n 2025-04-06 18:13:46 +05:30
parent 25c7a4c871
commit 640236e09c
2 changed files with 12 additions and 8 deletions

View File

@ -5,6 +5,7 @@ import base64
import json import json
import re import re
from collections import defaultdict
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
@ -102,8 +103,6 @@ class Barrons(BasicNewsRecipe):
raw=True, raw=True,
) )
feeds = []
scrn = json.loads(archive)['screens'][0]['frames'] scrn = json.loads(archive)['screens'][0]['frames']
self.log( self.log(
'Available Editions: ', 'Available Editions: ',
@ -123,20 +122,25 @@ class Barrons(BasicNewsRecipe):
self.timefmt = ' [' + nme[6:] + ']' self.timefmt = ' [' + nme[6:] + ']'
break break
feeds_dict = defaultdict(list)
data = json.loads(self.index_to_soup(index + bseurl, raw=True)) data = json.loads(self.index_to_soup(index + bseurl, raw=True))
for x in data['screens'][0]['frames']: for x in data['screens'][0]['frames']:
if x['type'] != 'article': if x['type'] != 'article':
continue continue
url = index + theatre + cid + '?screen_ids=' + x['articleId'] url = index + theatre + cid + '?screen_ids=' + x['articleId']
title = x['title']['text'] title = x['title']['text']
section = 'Articles'
if x.get('label'):
section = x['label'].get('text', 'Articles').split('|')[0].strip()
desc = '' desc = ''
if x.get('summary'): if x.get('summary'):
desc = x['summary']['text'] desc = x['summary']['text']
if x.get('byline'): if x.get('byline'):
desc = x['byline']['text'] + ' | ' + desc desc = x['byline']['text'] + ' | ' + desc
self.log(' ', title, '\n\t', desc) self.log(' ', title, '\n\t', desc)
feeds.append({'title': title, 'description': desc, 'url': url}) feeds_dict[section].append({'title': title, 'url': url, 'description': desc})
return [('Articles', feeds)] return list(feeds_dict.items())
def preprocess_raw_html(self, raw, url): def preprocess_raw_html(self, raw, url):
rdata = json.loads(raw) rdata = json.loads(raw)

View File

@ -3,7 +3,7 @@
import json import json
import re import re
from collections import defaultdict from collections import defaultdict
from datetime import date from datetime import date, timedelta
from calibre.web.feeds.news import BasicNewsRecipe, classes from calibre.web.feeds.news import BasicNewsRecipe, classes
@ -15,9 +15,9 @@ def absurl(url):
class TheHindu(BasicNewsRecipe): class TheHindu(BasicNewsRecipe):
title = 'The Hindu' title = 'The Hindu '
__author__ = 'unkn0wn' __author__ = 'unkn0wn'
description = "Articles from The Hindu, Today's Paper." description = "Articles from The Hindu, Today\'s Paper. This recipe now works only for yesterday\'s edition."
language = 'en_IN' language = 'en_IN'
no_stylesheets = True no_stylesheets = True
masthead_url = 'https://www.thehindu.com/theme/images/th-online/thehindu-logo.svg' masthead_url = 'https://www.thehindu.com/theme/images/th-online/thehindu-logo.svg'
@ -75,7 +75,7 @@ class TheHindu(BasicNewsRecipe):
past_edition = self.recipe_specific_options.get('date') past_edition = self.recipe_specific_options.get('date')
dt = date.today() dt = date.today() - timedelta(days=1)
if past_edition and isinstance(past_edition, str): if past_edition and isinstance(past_edition, str):
year, month, day = (int(x) for x in past_edition.split('-')) year, month, day = (int(x) for x in past_edition.split('-'))
dt = date(year, month, day) dt = date(year, month, day)