diff --git a/recipes/mit_technology_review.recipe b/recipes/mit_technology_review.recipe index 364fdc02a9..d9f00e42c4 100644 --- a/recipes/mit_technology_review.recipe +++ b/recipes/mit_technology_review.recipe @@ -65,9 +65,20 @@ class MitTechnologyReview(BasicNewsRecipe): ), ] + recipe_specific_options = { + 'issue_url': { + 'short': 'The issue URL ', + 'long': 'For example, https://www.technologyreview.com/magazines/the-education-issue/', + 'default': 'http://www.technologyreview.com/magazine/' + } + } + def parse_index(self): # for past editions, change the issue link below issue = 'http://www.technologyreview.com/magazine/' + d = self.recipe_specific_options.get('issue_url') + if d and isinstance(d, str): + issue = d soup = self.index_to_soup(issue) if script := soup.find('script', id='preload'): raw = script.contents[0] diff --git a/recipes/nytimes_sub.recipe b/recipes/nytimes_sub.recipe index 93b65b4a08..fa3b7b0a6b 100644 --- a/recipes/nytimes_sub.recipe +++ b/recipes/nytimes_sub.recipe @@ -121,9 +121,19 @@ class NewYorkTimes(BasicNewsRecipe): tf.write(self.get_nyt_page(url)) return tf.name + recipe_specific_options = { + 'date': { + 'short': 'The date of the edition to download (YYYY/MM/DD format)', + 'long': 'For example, 2024/07/16' + } + } + def read_todays_paper(self): INDEX = 'https://www.nytimes.com/section/todayspaper' # INDEX = 'file:///t/raw.html' + d = self.recipe_specific_options.get('date') + if d and isinstance(d, str): + INDEX = 'https://www.nytimes.com/issue/todayspaper/' + d + '/todays-new-york-times' return self.index_to_soup(self.get_nyt_page(INDEX, skip_wayback=True)) def read_nyt_metadata(self): diff --git a/recipes/people_daily.recipe b/recipes/people_daily.recipe index 26881d67cb..4ad18a436c 100644 --- a/recipes/people_daily.recipe +++ b/recipes/people_daily.recipe @@ -24,6 +24,20 @@ class AdvancedUserRecipe1277129332(BasicNewsRecipe): conversion_options = {'linearize_tables': True} masthead_url = 'http://www.people.com.cn/img/2010wb/images/logo.gif' + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + feeds = [ (u'时政', u'http://www.people.com.cn/rss/politics.xml'), (u'国际', u'http://www.people.com.cn/rss/world.xml'), diff --git a/recipes/phillosophy_now.recipe b/recipes/phillosophy_now.recipe index 2a46dcf0eb..14d044e489 100644 --- a/recipes/phillosophy_now.recipe +++ b/recipes/phillosophy_now.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 from collections import OrderedDict from calibre import browser @@ -31,19 +33,29 @@ class PhilosophyNow(BasicNewsRecipe): .articleImageCaption { font-size:small; text-align:center; } em, blockquote { color:#202020; } ''' + + recipe_specific_options = { + 'issue': { + 'short': 'Enter the Issue Number you want to download ', + 'long': 'For example, 136' + } + } def parse_index(self): soup = self.index_to_soup('https://philosophynow.org/') div = soup.find('div', attrs={'id': 'aside_issue_cover'}) - url = div.find('a', href=True)['href'] - issue = div.find('div', attrs={'id':'aside_issue_text'}) - if issue: - self.log('Downloading issue:', self.tag_to_string(issue).strip()) - self.timefmt = ' [' + self.tag_to_string(issue.find(attrs={'id':'aside_issue_date'})) + ']' - self.title = 'Philosophy Now ' + self.tag_to_string(issue.find(attrs={'id':'aside_issue_number'})) + url = 'https://philosophynow.org' + div.find('a', href=True)['href'] + + d = self.recipe_specific_options.get('issue') + if d and isinstance(d, str): + url = 'https://philosophynow.org/issues/' + d + + soup = self.index_to_soup(url) + + div = soup.find('div', attrs={'id': 'issue_contents_cover_div'}) cov_url = div.find('img', src=True)['src'] self.cover_url = 'https://philosophynow.org' + cov_url - soup = self.index_to_soup('https://philosophynow.org' + url) + self.timefmt = ' [' + self.tag_to_string(soup.find('h1')) + ']' feeds = OrderedDict() diff --git a/recipes/science_x.recipe b/recipes/science_x.recipe index 9faaaa707b..1ae4dc8b07 100644 --- a/recipes/science_x.recipe +++ b/recipes/science_x.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 ''' https://sciencex.com/ ''' @@ -26,6 +28,20 @@ class scix(BasicNewsRecipe): .article__info, .article-byline, .article-main__more, .d-print-block {font-size:small; color:#404040;} ''' + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + resolve_internal_links = True remove_empty_feeds = True diff --git a/recipes/scientific_american.recipe b/recipes/scientific_american.recipe index bf13b4b945..a5bbd7d609 100644 --- a/recipes/scientific_american.recipe +++ b/recipes/scientific_american.recipe @@ -59,16 +59,29 @@ class ScientificAmerican(BasicNewsRecipe): br.submit() return br + recipe_specific_options = { + 'issue_url': { + 'short': 'The issue URL ', + 'long': ( + 'For example, https://www.scientificamerican.com/issue/sa/2024/07-01/' + '\nYou can also download special-editions, physics, health, mind magazines by pasting the URL here.' + ) + } + } + def parse_index(self): # Get the cover, date and issue URL - fp_soup = self.index_to_soup("https://www.scientificamerican.com") - curr_issue_link = fp_soup.find(**prefixed_classes('latest_issue_links-')) - if not curr_issue_link: - self.abort_recipe_processing("Unable to find issue link") - issue_url = 'https://www.scientificamerican.com' + curr_issue_link.a["href"] - # for past editions https://www.scientificamerican.com/archive/issues/ - # issue_url = 'https://www.scientificamerican.com/issue/sa/2024/01-01/' - soup = self.index_to_soup(issue_url) + d = self.recipe_specific_options.get('issue_url') + if d and isinstance(d, str): + issue = d + else: + fp_soup = self.index_to_soup("https://www.scientificamerican.com") + curr_issue_link = fp_soup.find(**prefixed_classes('latest_issue_links-')) + if not curr_issue_link: + self.abort_recipe_processing("Unable to find issue link") + issue = 'https://www.scientificamerican.com' + curr_issue_link.a["href"] + + soup = self.index_to_soup(issue) script = soup.find("script", id="__DATA__") if not script: self.abort_recipe_processing("Unable to find script")