From 26af835e54cf5c4d56e88b793f48da2d5f7ec844 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Wed, 24 Jul 2024 11:30:08 +0530 Subject: [PATCH] ... --- recipes/barrons.recipe | 15 ++++++++++- recipes/business_today.recipe | 6 ++--- recipes/hindustan_times_print.recipe | 5 +++- recipes/horizons.recipe | 37 +++++++++++++++++++--------- recipes/lrb.recipe | 11 +++++++++ recipes/wsj_news.recipe | 6 ++++- 6 files changed, 62 insertions(+), 18 deletions(-) diff --git a/recipes/barrons.recipe b/recipes/barrons.recipe index bedea5cb3f..319b13c6b6 100644 --- a/recipes/barrons.recipe +++ b/recipes/barrons.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 import re from collections import defaultdict from datetime import date @@ -77,12 +79,23 @@ class barrons(BasicNewsRecipe): ] return br + recipe_specific_options = { + 'date': { + 'short': 'The date of the edition to download (YYYYMMDD format)', + 'long': 'For example, 20240722.\nIf it didn\'t work, try again later.' + } + } + def parse_index(self): self.log( '\n***\nif this recipe fails, report it on: ' 'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n' ) - archive = self.index_to_soup('https://www.barrons.com/magazine?archives=' + date.today().strftime('%Y')) + issue_url = 'https://www.barrons.com/magazine?archives=' + date.today().strftime('%Y')' + d = self.recipe_specific_options.get('date') + if d and isinstance(d, str): + issue_url = 'https://www.barrons.com/magazine?archives=' + d + archive = self.index_to_soup(issue_url) issue = archive.find(**prefixed_classes('BarronsTheme--archive-box--')) self.timefmt = ' [' + self.tag_to_string(issue.find(**prefixed_classes('BarronsTheme--date--'))) + ']' self.description = self.tag_to_string(issue.find(**prefixed_classes('BarronsTheme--headline--'))) diff --git a/recipes/business_today.recipe b/recipes/business_today.recipe index 881dc16fe0..b8188de06a 100644 --- a/recipes/business_today.recipe +++ b/recipes/business_today.recipe @@ -51,14 +51,14 @@ class BT(BasicNewsRecipe): '\n***\nif this recipe fails, report it on: ' 'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n' ) - soup = self.index_to_soup('https://www.businesstoday.in') - a = soup.findAll('a', attrs={'class':'mag_sld_img'})[1] - url = a['href'] d = self.recipe_specific_options.get('date') if d and isinstance(d, str): url = 'https://www.businesstoday.in/magazine/issue/' + d else: + soup = self.index_to_soup('https://www.businesstoday.in') + a = soup.findAll('a', attrs={'class':'mag_sld_img'})[1] + url = a['href'] self.cover_url = a.img['data-src'].split('?')[0] self.log('issue =', url) diff --git a/recipes/hindustan_times_print.recipe b/recipes/hindustan_times_print.recipe index f6bc688aa1..a378f777fc 100644 --- a/recipes/hindustan_times_print.recipe +++ b/recipes/hindustan_times_print.recipe @@ -55,7 +55,10 @@ class ht(BasicNewsRecipe): if p and isinstance(p, str): today = p - self.timefmt = ' [%s]' % today + day, month, year = (int(x) for x in today.split('/')) + dt = date(year, month, day) + + self.timefmt = ' [%s]' % dt.strftime('%b %d, %Y') today = today.replace('/', '%2F') diff --git a/recipes/horizons.recipe b/recipes/horizons.recipe index 5a28915076..098ff00406 100644 --- a/recipes/horizons.recipe +++ b/recipes/horizons.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 ''' https://www.cirsd.org/en/horizons ''' @@ -28,23 +30,34 @@ class horizons(BasicNewsRecipe): classes('back-link'), dict(name='div', attrs={'class':'single-post-footer'}) ] + + recipe_specific_options = { + 'issue_url': { + 'short': 'The issue URL ', + 'long': 'For example, https://www.cirsd.org/en/horizons/horizons-winter-2024--issue-no-25', + } + } def get_browser(self): return BasicNewsRecipe.get_browser(self, verify_ssl_certificates=False) def parse_index(self): - soup = self.index_to_soup('https://www.cirsd.org/en/horizons') - a = soup.findAll('a', href=True, attrs={'class':'horizon-gallery-box'})[0] #use 1 for previous edition - url = a['href'] - if url.startswith('/'): - url = 'https://www.cirsd.org' + url - self.cover_url = a.find('img')['src'] - self.log(self.cover_url) - issue = a.find('div', attrs={'class':'horizon-gallery-title'}) - if issue: - self.title = self.tag_to_string(issue).strip() - self.timefmt = ' [' + self.tag_to_string(issue).strip().replace('Horizons ', '') + ']' - self.log('Downloading Issue: ', self.timefmt, self.title) + d = self.recipe_specific_options.get('issue_url') + if d and isinstance(d, str): + url = d + else: + soup = self.index_to_soup('https://www.cirsd.org/en/horizons') + a = soup.findAll('a', href=True, attrs={'class':'horizon-gallery-box'})[0] #use 1 for previous edition + url = a['href'] + if url.startswith('/'): + url = 'https://www.cirsd.org' + url + self.cover_url = a.find('img')['src'] + self.log(self.cover_url) + issue = a.find('div', attrs={'class':'horizon-gallery-title'}) + if issue: + self.title = self.tag_to_string(issue).strip() + self.timefmt = ' [' + self.tag_to_string(issue).strip().replace('Horizons ', '') + ']' + self.log('Downloading Issue: ', self.timefmt, self.title) soup = self.index_to_soup(url) feeds = [] diff --git a/recipes/lrb.recipe b/recipes/lrb.recipe index ec30147f2a..93c0d927a2 100644 --- a/recipes/lrb.recipe +++ b/recipes/lrb.recipe @@ -67,7 +67,18 @@ class LondonReviewOfBooksPayed(BasicNewsRecipe): img['src'] = 'https://www.lrb.co.uk/storage/400_filter/images/' + img['data-appsrc'].split('/images/')[-1] return soup + recipe_specific_options = { + 'issue_url': { + 'short': 'The issue URL ', + 'long': 'For example, https://www.lrb.co.uk/the-paper/v46/n01', + 'default': 'https://www.lrb.co.uk/the-paper/' + } + } + def parse_index(self): + d = self.recipe_specific_options.get('issue_url') + if d and isinstance(d, str): + self.INDEX = d soup = self.index_to_soup(self.INDEX) container = soup.find('div', attrs={'class': 'article-issue-cover-image'}) if container: diff --git a/recipes/wsj_news.recipe b/recipes/wsj_news.recipe index 3c8912b7de..9926ae906c 100644 --- a/recipes/wsj_news.recipe +++ b/recipes/wsj_news.recipe @@ -168,7 +168,11 @@ class WSJ(BasicNewsRecipe): sec_parse = json.loads(self.index_to_soup(index + v, raw=True)) data = sec_parse['articles'] for art in data: - dt = datetime.fromtimestamp(data[art]['pubdateNumber'] + time.timezone) + try: + tme = data[art]['pubdateNumber'] + except Exception: + tme = data[art]['origPubdateNumber'] + dt = datetime.fromtimestamp(tme + time.timezone) if (datetime.now() - dt) > timedelta(self.oldest_article): continue title = data[art]['headline']