diff --git a/recipes/ancient_egypt.recipe b/recipes/ancient_egypt.recipe index fdede7664d..c2116a00a4 100644 --- a/recipes/ancient_egypt.recipe +++ b/recipes/ancient_egypt.recipe @@ -46,13 +46,21 @@ class ancientegypt(BasicNewsRecipe): exp.name = 'p' return soup + recipe_specific_options = { + 'issue': { + 'short': 'Enter the Issue Number you want to download ', + 'long': 'For example, 136' + } + } + def parse_index(self): soup = self.index_to_soup('https://the-past.com/category/magazines/ae/') art = soup.find('article', attrs={'class':lambda x: x and 'tag-magazines' in x.split()}) url = art.h2.a['href'] - # for past editions, add url - # url = '' + d = self.recipe_specific_options.get('issue') + if d and isinstance(d, str): + url = 'https://the-past.com/magazines/ae/ancient-egypt-magazine-' + d + '/' issue = self.index_to_soup(url) ti = issue.find('h1', attrs={'class':lambda x: x and 'post-title' in x.split()}) diff --git a/recipes/atlantic.recipe b/recipes/atlantic.recipe index 9766ed9ae3..0733c23895 100644 --- a/recipes/atlantic.recipe +++ b/recipes/atlantic.recipe @@ -199,7 +199,9 @@ class TheAtlantic(BasicNewsRecipe): self.cover_url = img['src'] current_section, current_articles = 'Cover Story', [] feeds = [] - for x in soup.findAll(**prefix_classes('TocFeaturedSection_heading__ TocSection_heading__ TocHeroGridItem_hedLink__ TocGridItem_hedLink__')): + for x in soup.findAll(**prefix_classes( + 'TocFeaturedSection_heading__ TocSection_heading__ TocHeroGridItem_hedLink__ TocGridItem_hedLink__ RiverGridItem_hedLink__' + )): cls = x['class'] if not isinstance(cls, str): cls = ' '.join(cls) diff --git a/recipes/bloomberg-business-week.recipe b/recipes/bloomberg-business-week.recipe index 3d94f6868a..d6bfa7f12d 100644 --- a/recipes/bloomberg-business-week.recipe +++ b/recipes/bloomberg-business-week.recipe @@ -5,9 +5,6 @@ from datetime import datetime from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.web.feeds.news import BasicNewsRecipe, classes -# https://www.bloomberg.com/magazine/businessweek/24_12 -# Set past_edition to edition id, which is '24_12'. -past_edition = None def get_contents(x): if x == '': @@ -47,7 +44,7 @@ def get_contents(x): class Bloomberg(BasicNewsRecipe): title = 'Bloomberg Businessweek' - language = 'en' + language = 'en_US' __author__ = 'unkn0wn' no_stylesheets = True remove_attributes = ['style', 'height', 'width'] @@ -60,6 +57,13 @@ class Bloomberg(BasicNewsRecipe): ) remove_empty_feeds = True + recipe_specific_options = { + 'date': { + 'short': 'The ID of the edition to download (YY_XX format)', + 'long': 'For example, 24_17\nHint: Edition ID can be found at the end of its URL' + } + } + remove_tags = [ dict(name=['button', 'svg', 'meta']), dict(name='div', attrs={'id':['bb-that', 'bb-nav']}), @@ -82,7 +86,8 @@ class Bloomberg(BasicNewsRecipe): inx = 'https://cdn-mobapi.bloomberg.com' sec = self.index_to_soup(inx + '/wssmobile/v1/bw/news/list?limit=1', raw=True) id = json.loads(sec)['magazines'][0]['id'] - if past_edition: + past_edition = self.recipe_specific_options.get('date') + if past_edition and isinstance(past_edition, str): id = past_edition edit = self.index_to_soup(inx + '/wssmobile/v1/bw/news/week/' + id, raw=True) d = json.loads(edit) diff --git a/recipes/bloomberg.recipe b/recipes/bloomberg.recipe index 3a9557570b..656a9aa9fe 100644 --- a/recipes/bloomberg.recipe +++ b/recipes/bloomberg.recipe @@ -55,7 +55,7 @@ class Bloomberg(BasicNewsRecipe): 'Bloomberg delivers business and markets news, data, analysis, and video' ' to the world, featuring stories from Businessweek and Bloomberg News.' ) - oldest_article = 1 # days + oldest_article = 1.2 # days resolve_internal_links = True remove_empty_feeds = True cover_url = 'https://assets.bwbx.io/images/users/iqjWHBFdfxIU/ivUxvlPidC3M/v0/600x-1.jpg' @@ -78,7 +78,18 @@ class Bloomberg(BasicNewsRecipe): .news-figure-credit {font-size:small; text-align:center; color:#202020;} ''' + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles for the past 12 hours', + 'default': str(oldest_article), + } + } + def parse_index(self): + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) inx = 'https://cdn-mobapi.bloomberg.com' sec = self.index_to_soup(inx + '/wssmobile/v1/navigation/bloomberg_app/search-v2', raw=True) sec_data = json.loads(sec)['searchNav'] diff --git a/recipes/business_standard_print.recipe b/recipes/business_standard_print.recipe index b7c772e0d9..ff7d63ff98 100644 --- a/recipes/business_standard_print.recipe +++ b/recipes/business_standard_print.recipe @@ -4,12 +4,6 @@ from datetime import datetime from calibre.web.feeds.news import BasicNewsRecipe from html5_parser import parse -today = datetime.today().strftime('%d-%m-%Y') - -# today = '20-09-2023' - -day, month, year = (int(x) for x in today.split('-')) -dt = datetime(year, month, day) class BusinessStandardPrint(BasicNewsRecipe): title = 'Business Standard Print Edition' @@ -18,18 +12,12 @@ class BusinessStandardPrint(BasicNewsRecipe): language = 'en_IN' masthead_url = 'https://bsmedia.business-standard.com/include/_mod/site/html5/images/business-standard-logo.png' encoding = 'utf-8' - timefmt = ' [' + dt.strftime('%b %d, %Y') + ']' resolve_internal_links = True remove_empty_feeds = True no_stylesheets = True remove_javascript = True - remove_attributes = ['width', 'height', 'float', 'style'] - - def __init__(self, *args, **kwargs): - BasicNewsRecipe.__init__(self, *args, **kwargs) - if self.output_profile.short_name.startswith('kindle'): - self.title = 'Business Standard ' + dt.strftime('%b %d, %Y') + remove_attributes = ['width', 'height', 'style'] def get_browser(self): return BasicNewsRecipe.get_browser(self, user_agent='common_words/based') @@ -40,16 +28,35 @@ class BusinessStandardPrint(BasicNewsRecipe): extra_css = ''' img {display:block; margin:0 auto;} + .sub { font-style:italic; color:#202020; } .auth, .cat { font-size:small; color:#202020; } .cap { font-size:small; text-align:center; } ''' + recipe_specific_options = { + 'date': { + 'short': 'The date of the print edition to download (DD-MM-YYYY format)', + 'long': 'For example, 20-09-2023' + } + } + def get_cover_url(self): - soup = self.index_to_soup('https://www.magzter.com/IN/Business-Standard-Private-Ltd/Business-Standard/Newspaper/') - for citem in soup.findAll('meta', content=lambda s: s and s.endswith('view/3.jpg')): - return citem['content'] + d = self.recipe_specific_options.get('date') + if not (d and isinstance(d, str)): + soup = self.index_to_soup('https://www.magzter.com/IN/Business-Standard-Private-Ltd/Business-Standard/Newspaper/') + for citem in soup.findAll('meta', content=lambda s: s and s.endswith('view/3.jpg')): + return citem['content'] def parse_index(self): + today = datetime.today().strftime('%d-%m-%Y') + d = self.recipe_specific_options.get('date') + if d and isinstance(d, str): + today = d + + day, month, year = (int(x) for x in today.split('-')) + dt = datetime(year, month, day) + self.timefmt = ' [' + dt.strftime('%b %d, %Y') + ']' + if dt.weekday() == 6: self.log.warn( 'Business Standard Does Not Have A Print Publication On Sunday. The Reports' @@ -97,10 +104,10 @@ class BusinessStandardPrint(BasicNewsRecipe): if 'defaultArticleCat' in data and data['defaultArticleCat'] is not None: if 'h1_tag' in data['defaultArticleCat'] and data['defaultArticleCat']['h1_tag'] is not None: - cat = '

' + data['defaultArticleCat']['h1_tag'] + '

' + cat = '
' + data['defaultArticleCat']['h1_tag'] + '
' if 'metaDescription' in data and data['metaDescription'] is not None: - subhead = '

' + data['metaDescription'] + '

' + subhead = '

' + data['metaDescription'] + '

' self.art_desc = data['metaDescription'] date = (datetime.fromtimestamp(int(data['publishDate']))).strftime('%b %d, %Y | %I:%M %p') @@ -120,6 +127,13 @@ class BusinessStandardPrint(BasicNewsRecipe): if 'alt_text' in data['featuredImageObj']: caption = '' + data['featuredImageObj']['alt_text'] + '

' - body = data['htmlContent'] + body = data['htmlContent'].replace('
\r\n\t\t ', '
') - return '' + cat + title + subhead + auth + lede + caption + '

' + body + '
' + return '' + cat + title + subhead + auth + lede + caption + '

' + body + '
' + + def preprocess_html(self, soup): + for img in soup.findAll('img'): + img.attrs = {'src': img.get('src', '')} + for x in soup.findAll('div'): + x.attrs = {'class': x.get('class', '')} + return soup diff --git a/recipes/economist.recipe b/recipes/economist.recipe index 3ecf3082f5..9139ef5e20 100644 --- a/recipes/economist.recipe +++ b/recipes/economist.recipe @@ -215,7 +215,7 @@ class Economist(BasicNewsRecipe): def publication_date(self): edition_date = self.recipe_specific_options.get('date') - if edition_date: + if edition_date and isinstance(edition_date, str): return parse_only_date(edition_date, as_utc=False) url = self.browser.open("https://www.economist.com/printedition").geturl() return parse_only_date(url.split("/")[-1], as_utc=False) @@ -245,7 +245,7 @@ class Economist(BasicNewsRecipe): 'operationName': 'LatestWeeklyAutoEditionQuery', 'variables': '{"ref":"/content/d06tg8j85rifiq3oo544c6b9j61dno2n"}', } - if edition_date: + if edition_date and isinstance(edition_date, str): url = 'https://www.economist.com/weeklyedition/' + edition_date soup = self.index_to_soup(url) script_tag = soup.find("script", id="__NEXT_DATA__") @@ -268,7 +268,7 @@ class Economist(BasicNewsRecipe): def economist_parse_index(self, raw): edition_date = self.recipe_specific_options.get('date') - if edition_date: + if edition_date and isinstance(edition_date, str): data = json.loads(raw)['data']['section'] else: data = json.loads(raw)['data']['canonical']['hasPart']['parts'][0] @@ -336,7 +336,7 @@ class Economist(BasicNewsRecipe): def parse_index(self): edition_date = self.recipe_specific_options.get('date') # return self.economist_test_article() - if edition_date: + if edition_date and isinstance(edition_date, str): url = 'https://www.economist.com/weeklyedition/' + edition_date self.timefmt = ' [' + edition_date + ']' else: @@ -423,10 +423,11 @@ class Economist(BasicNewsRecipe): x.set('style', 'color:#404040;') raw = etree.tostring(root, encoding='unicode') return raw + def parse_index_from_printedition(self): # return self.economist_test_article() edition_date = self.recipe_specific_options.get('date') - if edition_date: + if edition_date and isinstance(edition_date, str): url = 'https://www.economist.com/weeklyedition/' + edition_date self.timefmt = ' [' + edition_date + ']' else: diff --git a/recipes/economist_free.recipe b/recipes/economist_free.recipe index 3ecf3082f5..9139ef5e20 100644 --- a/recipes/economist_free.recipe +++ b/recipes/economist_free.recipe @@ -215,7 +215,7 @@ class Economist(BasicNewsRecipe): def publication_date(self): edition_date = self.recipe_specific_options.get('date') - if edition_date: + if edition_date and isinstance(edition_date, str): return parse_only_date(edition_date, as_utc=False) url = self.browser.open("https://www.economist.com/printedition").geturl() return parse_only_date(url.split("/")[-1], as_utc=False) @@ -245,7 +245,7 @@ class Economist(BasicNewsRecipe): 'operationName': 'LatestWeeklyAutoEditionQuery', 'variables': '{"ref":"/content/d06tg8j85rifiq3oo544c6b9j61dno2n"}', } - if edition_date: + if edition_date and isinstance(edition_date, str): url = 'https://www.economist.com/weeklyedition/' + edition_date soup = self.index_to_soup(url) script_tag = soup.find("script", id="__NEXT_DATA__") @@ -268,7 +268,7 @@ class Economist(BasicNewsRecipe): def economist_parse_index(self, raw): edition_date = self.recipe_specific_options.get('date') - if edition_date: + if edition_date and isinstance(edition_date, str): data = json.loads(raw)['data']['section'] else: data = json.loads(raw)['data']['canonical']['hasPart']['parts'][0] @@ -336,7 +336,7 @@ class Economist(BasicNewsRecipe): def parse_index(self): edition_date = self.recipe_specific_options.get('date') # return self.economist_test_article() - if edition_date: + if edition_date and isinstance(edition_date, str): url = 'https://www.economist.com/weeklyedition/' + edition_date self.timefmt = ' [' + edition_date + ']' else: @@ -423,10 +423,11 @@ class Economist(BasicNewsRecipe): x.set('style', 'color:#404040;') raw = etree.tostring(root, encoding='unicode') return raw + def parse_index_from_printedition(self): # return self.economist_test_article() edition_date = self.recipe_specific_options.get('date') - if edition_date: + if edition_date and isinstance(edition_date, str): url = 'https://www.economist.com/weeklyedition/' + edition_date self.timefmt = ' [' + edition_date + ']' else: diff --git a/recipes/harpers.recipe b/recipes/harpers.recipe index 0c7ac1250c..be5a87086e 100644 --- a/recipes/harpers.recipe +++ b/recipes/harpers.recipe @@ -54,11 +54,24 @@ class Harpers(BasicNewsRecipe): img['src'] = src.split()[0] return soup + recipe_specific_options = { + 'date': { + 'short': 'The date of the edition to download (YYYY/MM format)', + 'long': 'For example, 2023/08', + } + } + def parse_index(self): issues_soup = self.index_to_soup("https://harpers.org/issues/") a_ele = issues_soup.select_one("div.issue-card a") self.timefmt = ' [' + self.tag_to_string(a_ele.find(attrs={'class':'issue-title'})) + ']' url = a_ele['href'] + + edition = self.recipe_specific_options.get('date') + if edition and isinstance(edition, str): + url = 'https://harpers.org/archive/' + edition + self.timefmt = ' [' +edition + ']' + soup = self.index_to_soup(url) cov_div = soup.find('div', attrs={'class':'issue-cover'}) if cov_div: diff --git a/recipes/hindu.recipe b/recipes/hindu.recipe index 616e123cdd..8b81d0b405 100644 --- a/recipes/hindu.recipe +++ b/recipes/hindu.recipe @@ -32,7 +32,8 @@ class TheHindu(BasicNewsRecipe): recipe_specific_options = { 'location': { 'short': 'The name of the local edition', - 'long': 'If The Hindu is available in your local town/city,\nset this to your location, for example, hyderabad' + 'long': 'If The Hindu is available in your local town/city,\nset this to your location, for example, hyderabad', + 'default': 'international' }, 'date': { 'short': 'The date of the edition to download (YYYY-MM-DD format)', @@ -60,47 +61,32 @@ class TheHindu(BasicNewsRecipe): return soup def parse_index(self): - mag_url = None - local_edition = self.recipe_specific_options.get('location') - if local_edition: - local_edition = 'th_' + local_edition + local_edition = 'th_international' + d = self.recipe_specific_options.get('location') + if d and isinstance(d, str): + local_edition = 'th_' + d + past_edition = self.recipe_specific_options.get('date') dt = date.today() - if past_edition: - year, month, day = (int(x) for x in past_edition.split('-')) + if past_edition and isinstance(past_edition, str): + year, month, day = (int(x) for x in past_edition.split('-')) dt = date(year, month, day) - is_monday = dt.weekday() == 0 - is_friday = dt.weekday() == 4 - is_saturday = dt.weekday() == 5 - is_sunday = dt.weekday() == 6 + today = dt.strftime('%Y-%m-%d') + self.log('Downloading The Hindu, ' + local_edition[3:] + ' edition, ' + today) + url = absurl('/todays-paper/' + today + '/' + local_edition + '/') - if local_edition or past_edition: - if local_edition is None: - local_edition = 'th_chennai' - today = date.today().strftime('%Y-%m-%d') - if past_edition: - today = past_edition - self.log('Downloading past edition of', local_edition + ' from ' + today) - url = absurl('/todays-paper/' + today + '/' + local_edition + '/') - if is_monday: - mag_url = url + '?supplement=' + local_edition + '-epbs' - if is_saturday: - mag_url = url + '?supplement=' + local_edition + '-mp' - if is_sunday: - mag_url = url + '?supplement=' + local_edition + '-sm' - else: - url = 'https://www.thehindu.com/todays-paper/' - if is_monday: - mag_url = url + '?supplement=th_chennai-epbs' - if is_friday: - mag_url = url + '?supplement=th_chennai-fr' - if is_saturday: - mag_url = url + '?supplement=th_chennai-mp' - if is_sunday: - mag_url = url + '?supplement=th_chennai-sm' + mag_url = None + if dt.weekday() == 0: + mag_url = url + '?supplement=' + local_edition + '-epbs' + if dt.weekday() == 4: + mag_url = url + '?supplement=' + local_edition + '-fr' + if dt.weekday() == 5: + mag_url = url + '?supplement=' + local_edition + '-mp' + if dt.weekday() == 6: + mag_url = url + '?supplement=' + local_edition + '-sm' raw = self.index_to_soup(url, raw=True) soup = self.index_to_soup(raw) @@ -139,7 +125,7 @@ class TheHindu(BasicNewsRecipe): title = item['articleheadline'] url = absurl(item['href']) desc = 'Page no.' + item['pageno'] + ' | ' + item['teaser_text'] or '' - self.log('\t', title, '\n\t\t', url) + self.log(' ', title, '\n\t', url) feeds_dict[section].append({"title": title, "url": url, "description": desc}) return [(section, articles) for section, articles in feeds_dict.items()] else: diff --git a/recipes/lex_fridman_podcast.recipe b/recipes/lex_fridman_podcast.recipe index 814c7d7902..5fc387f036 100644 --- a/recipes/lex_fridman_podcast.recipe +++ b/recipes/lex_fridman_podcast.recipe @@ -19,6 +19,20 @@ class lexfridman(BasicNewsRecipe): timefmt = ' [%b, %Y]' cover_url = 'https://i.scdn.co/image/ab6765630000ba8a563ebb538d297875b10114b7' + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + extra_css = ''' .ts-name { font-weight:bold; } .ts-timestamp { font-size:small; } diff --git a/recipes/military_history.recipe b/recipes/military_history.recipe index ae35606799..09d330a09b 100644 --- a/recipes/military_history.recipe +++ b/recipes/military_history.recipe @@ -45,13 +45,21 @@ class milthist(BasicNewsRecipe): exp.name = 'p' return soup + recipe_specific_options = { + 'issue': { + 'short': 'Enter the Issue Number you want to download ', + 'long': 'For example, 136' + } + } + def parse_index(self): soup = self.index_to_soup('https://the-past.com/category/magazines/mhm/') art = soup.find('article', attrs={'class':lambda x: x and 'tag-magazines' in x.split()}) url = art.h2.a['href'] - # for past editions, add url - # url = '' + d = self.recipe_specific_options.get('issue') + if d and isinstance(d, str): + url = 'https://the-past.com/magazines/military-history-matters-' + d + '/' issue = self.index_to_soup(url) ti = issue.find('h1', attrs={'class':lambda x: x and 'post-title' in x.split()}) diff --git a/recipes/minerva_magazine.recipe b/recipes/minerva_magazine.recipe index 181d5c88f9..203d9b3520 100644 --- a/recipes/minerva_magazine.recipe +++ b/recipes/minerva_magazine.recipe @@ -45,13 +45,21 @@ class minerva(BasicNewsRecipe): exp.name = 'p' return soup + recipe_specific_options = { + 'issue': { + 'short': 'Enter the Issue Number you want to download ', + 'long': 'For example, 136' + } + } + def parse_index(self): soup = self.index_to_soup('https://the-past.com/category/magazines/minerva/') art = soup.find('article', attrs={'class':lambda x: x and 'tag-magazines' in x.split()}) url = art.h2.a['href'] - # for past editions, add url - # url = '' + d = self.recipe_specific_options.get('issue') + if d and isinstance(d, str): + url = 'https://the-past.com/magazines/minerva-magazine-' + d + '/' issue = self.index_to_soup(url) ti = issue.find('h1', attrs={'class':lambda x: x and 'post-title' in x.split()}) diff --git a/recipes/natgeomag.recipe b/recipes/natgeomag.recipe index d8933ee1c5..c95b239d8f 100644 --- a/recipes/natgeomag.recipe +++ b/recipes/natgeomag.recipe @@ -10,9 +10,6 @@ from calibre import prepare_string_for_xml as escape from calibre.utils.iso8601 import parse_iso8601 from calibre.web.feeds.news import BasicNewsRecipe -edition = date.today().strftime('%B-%Y') - -# edition = 'March-2023' def classes(classes): q = frozenset(classes.split(' ')) @@ -175,7 +172,18 @@ class NatGeo(BasicNewsRecipe): .auth, .time, .sub { font-size:small; color:#5c5c5c; } ''' + recipe_specific_options = { + 'date': { + 'short': 'The date of the edition to download (Month-YYYY format)', + 'long': 'For example, March-2023' + } + } + def parse_index(self): + edition = date.today().strftime('%B-%Y') + d = self.recipe_specific_options.get('date') + if d and isinstance(d, str): + edition = d url = 'https://www.nationalgeographic.com/magazine/issue/' + edition.lower() self.log('Downloading ', url) self.timefmt = ' [' + edition + ']' @@ -185,7 +193,7 @@ class NatGeo(BasicNewsRecipe): self.cover_url = soup.find('meta', attrs={'property':'og:image'})['content'].split('?')[0] + '?w=1000' name = soup.find(attrs={'class':lambda x: x and 'Header__Description' in x.split()}) - self.title = 'National Geographic ' + self.tag_to_string(name) + # self.title = 'National Geographic ' + self.tag_to_string(name) ans = {} if photoart := soup.find(attrs={'class':lambda x: x and 'BgImagePromo__Container__Text__Link' in x.split()}): section = 'Photo Essay' diff --git a/recipes/psych.recipe b/recipes/psych.recipe index 5cad8e823b..b5353a4f1e 100644 --- a/recipes/psych.recipe +++ b/recipes/psych.recipe @@ -53,7 +53,7 @@ class PsychologyToday(BasicNewsRecipe): a = soup.find(**classes('magazine-thumbnail')).a url = a['href'] past_edition = self.recipe_specific_options.get('date') - if past_edition: + if past_edition and isinstance(past_edition, str): url = '/us/magazine/archive/' + past_edition soup = self.index_to_soup(absurl(url)) cov = soup.find(**classes('content-header--cover-image')) diff --git a/recipes/reuters.recipe b/recipes/reuters.recipe index 417d40a9e2..e22b1a61f4 100644 --- a/recipes/reuters.recipe +++ b/recipes/reuters.recipe @@ -21,7 +21,7 @@ class Reuters(BasicNewsRecipe): masthead_url = 'https://www.reutersprofessional.com/wp-content/uploads/2024/03/primary-logo.svg' language = 'en' encoding = 'utf-8' - oldest_article = 2 # days + oldest_article = 1.2 # days no_javascript = True no_stylesheets = True remove_attributes = ['style', 'height', 'width'] @@ -30,10 +30,24 @@ class Reuters(BasicNewsRecipe): extra_css = ''' .label, .auth { font-size:small; color:#202020; } - .figc { font-size:small; text-align:center; } + .figc { font-size:small; } img {display:block; margin:0 auto;} ''' + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + def parse_index(self): index = 'https://www.reuters.com' today = datetime.now() diff --git a/recipes/tls_mag.recipe b/recipes/tls_mag.recipe index d29b798b4e..c8f8b239b6 100644 --- a/recipes/tls_mag.recipe +++ b/recipes/tls_mag.recipe @@ -48,9 +48,21 @@ class tls(BasicNewsRecipe): .det { font-size:small; color:#202020; font-weight:bold; } ''' + recipe_specific_options = { + 'date': { + 'short': 'The date of the edition to download\nlower case Month-DD-YYYY format', + 'long': 'For example, july-12-2024', + 'default': 'current-issue' + } + } + def parse_index(self): - # for past edition, change the issue link below issue = 'https://www.the-tls.co.uk/issues/current-issue/' + + d = self.recipe_specific_options.get('date') + if d and isinstance(d, str): + issue = 'https://www.the-tls.co.uk/issues/' + d + '/' + url = 'https://www.the-tls.co.uk/wp-json/tls/v2/contents-page/' + get_id(issue) raw = self.index_to_soup(url, raw=True) data = json.loads(raw) diff --git a/recipes/world_archeology.recipe b/recipes/world_archeology.recipe index 103f159eb5..31d467a3fd 100644 --- a/recipes/world_archeology.recipe +++ b/recipes/world_archeology.recipe @@ -47,13 +47,21 @@ class worldarch(BasicNewsRecipe): exp.name = 'p' return soup + recipe_specific_options = { + 'issue': { + 'short': 'Enter the Issue Number you want to download ', + 'long': 'For example, 136' + } + } + def parse_index(self): soup = self.index_to_soup('https://the-past.com/category/magazines/cwa/') art = soup.find('article', attrs={'class':lambda x: x and 'tag-magazines' in x.split()}) url = art.h2.a['href'] - # for past editions, add url - # url = '' + d = self.recipe_specific_options.get('issue') + if d and isinstance(d, str): + url = 'https://the-past.com/magazines/current-world-archaeology-' + d + '/' issue = self.index_to_soup(url) ti = issue.find('h1', attrs={'class':lambda x: x and 'post-title' in x.split()}) diff --git a/recipes/wsj.recipe b/recipes/wsj.recipe index b8571040dc..35587f971d 100644 --- a/recipes/wsj.recipe +++ b/recipes/wsj.recipe @@ -107,7 +107,8 @@ class WSJ(BasicNewsRecipe): return soup def _download_cover(self): - if not self.recipe_specific_options.get('date'): + d = self.recipe_specific_options.get('date') + if not (d and isinstance(d, str)): import os from contextlib import closing @@ -143,7 +144,7 @@ class WSJ(BasicNewsRecipe): past_edition = self.recipe_specific_options.get('date') for itm in catalog['items']: - if past_edition: + if past_edition and isinstance(past_edition, str): if itm['key'] == 'ITPNEXTGEN' + past_edition: key = itm['key'] manifest = itm['manifest']