From 640236e09ca917cc204a0f36e15ae7221bf414f2 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Sun, 6 Apr 2025 18:13:46 +0530 Subject: [PATCH 1/2] Update The Hindu fetch yesterdays paper by default --- recipes/barrons.recipe | 12 ++++++++---- recipes/hindu.recipe | 8 ++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/recipes/barrons.recipe b/recipes/barrons.recipe index e23958d88e..bf3dca9b2d 100644 --- a/recipes/barrons.recipe +++ b/recipes/barrons.recipe @@ -5,6 +5,7 @@ import base64 import json import re +from collections import defaultdict from calibre.web.feeds.news import BasicNewsRecipe @@ -102,8 +103,6 @@ class Barrons(BasicNewsRecipe): raw=True, ) - feeds = [] - scrn = json.loads(archive)['screens'][0]['frames'] self.log( 'Available Editions: ', @@ -123,20 +122,25 @@ class Barrons(BasicNewsRecipe): self.timefmt = ' [' + nme[6:] + ']' break + feeds_dict = defaultdict(list) + data = json.loads(self.index_to_soup(index + bseurl, raw=True)) for x in data['screens'][0]['frames']: if x['type'] != 'article': continue url = index + theatre + cid + '?screen_ids=' + x['articleId'] title = x['title']['text'] + section = 'Articles' + if x.get('label'): + section = x['label'].get('text', 'Articles').split('|')[0].strip() desc = '' if x.get('summary'): desc = x['summary']['text'] if x.get('byline'): desc = x['byline']['text'] + ' | ' + desc self.log(' ', title, '\n\t', desc) - feeds.append({'title': title, 'description': desc, 'url': url}) - return [('Articles', feeds)] + feeds_dict[section].append({'title': title, 'url': url, 'description': desc}) + return list(feeds_dict.items()) def preprocess_raw_html(self, raw, url): rdata = json.loads(raw) diff --git a/recipes/hindu.recipe b/recipes/hindu.recipe index c9d0316ac6..a103ab5633 100644 --- a/recipes/hindu.recipe +++ b/recipes/hindu.recipe @@ -3,7 +3,7 @@ import json import re from collections import defaultdict -from datetime import date +from datetime import date, timedelta from calibre.web.feeds.news import BasicNewsRecipe, classes @@ -15,9 +15,9 @@ def absurl(url): class TheHindu(BasicNewsRecipe): - title = 'The Hindu' + title = 'The Hindu ' __author__ = 'unkn0wn' - description = "Articles from The Hindu, Today's Paper." + description = "Articles from The Hindu, Today\'s Paper. This recipe now works only for yesterday\'s edition." language = 'en_IN' no_stylesheets = True masthead_url = 'https://www.thehindu.com/theme/images/th-online/thehindu-logo.svg' @@ -75,7 +75,7 @@ class TheHindu(BasicNewsRecipe): past_edition = self.recipe_specific_options.get('date') - dt = date.today() + dt = date.today() - timedelta(days=1) if past_edition and isinstance(past_edition, str): year, month, day = (int(x) for x in past_edition.split('-')) dt = date(year, month, day) From a0edf08944793db72ea5fbdd4e7507574f38033a Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Sun, 6 Apr 2025 18:15:14 +0530 Subject: [PATCH 2/2] ... --- recipes/hindu.recipe | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/recipes/hindu.recipe b/recipes/hindu.recipe index a103ab5633..88a0067a0c 100644 --- a/recipes/hindu.recipe +++ b/recipes/hindu.recipe @@ -15,7 +15,7 @@ def absurl(url): class TheHindu(BasicNewsRecipe): - title = 'The Hindu ' + title = 'The Hindu Print Edition' __author__ = 'unkn0wn' description = "Articles from The Hindu, Today\'s Paper. This recipe now works only for yesterday\'s edition." language = 'en_IN' @@ -68,6 +68,7 @@ class TheHindu(BasicNewsRecipe): return soup def parse_index(self): + self.title = 'The Hindu' local_edition = 'th_chennai' d = self.recipe_specific_options.get('location') if d and isinstance(d, str):