This commit is contained in:
Kovid Goyal 2024-07-23 17:38:04 +05:30
commit ab9cb22eac
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
6 changed files with 91 additions and 15 deletions

View File

@ -65,9 +65,20 @@ class MitTechnologyReview(BasicNewsRecipe):
),
]
recipe_specific_options = {
'issue_url': {
'short': 'The issue URL ',
'long': 'For example, https://www.technologyreview.com/magazines/the-education-issue/',
'default': 'http://www.technologyreview.com/magazine/'
}
}
def parse_index(self):
# for past editions, change the issue link below
issue = 'http://www.technologyreview.com/magazine/'
d = self.recipe_specific_options.get('issue_url')
if d and isinstance(d, str):
issue = d
soup = self.index_to_soup(issue)
if script := soup.find('script', id='preload'):
raw = script.contents[0]

View File

@ -121,9 +121,19 @@ class NewYorkTimes(BasicNewsRecipe):
tf.write(self.get_nyt_page(url))
return tf.name
recipe_specific_options = {
'date': {
'short': 'The date of the edition to download (YYYY/MM/DD format)',
'long': 'For example, 2024/07/16'
}
}
def read_todays_paper(self):
INDEX = 'https://www.nytimes.com/section/todayspaper'
# INDEX = 'file:///t/raw.html'
d = self.recipe_specific_options.get('date')
if d and isinstance(d, str):
INDEX = 'https://www.nytimes.com/issue/todayspaper/' + d + '/todays-new-york-times'
return self.index_to_soup(self.get_nyt_page(INDEX, skip_wayback=True))
def read_nyt_metadata(self):

View File

@ -24,6 +24,20 @@ class AdvancedUserRecipe1277129332(BasicNewsRecipe):
conversion_options = {'linearize_tables': True}
masthead_url = 'http://www.people.com.cn/img/2010wb/images/logo.gif'
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
feeds = [
(u'时政', u'http://www.people.com.cn/rss/politics.xml'),
(u'国际', u'http://www.people.com.cn/rss/world.xml'),

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from collections import OrderedDict
from calibre import browser
@ -32,18 +34,28 @@ class PhilosophyNow(BasicNewsRecipe):
em, blockquote { color:#202020; }
'''
recipe_specific_options = {
'issue': {
'short': 'Enter the Issue Number you want to download ',
'long': 'For example, 136'
}
}
def parse_index(self):
soup = self.index_to_soup('https://philosophynow.org/')
div = soup.find('div', attrs={'id': 'aside_issue_cover'})
url = div.find('a', href=True)['href']
issue = div.find('div', attrs={'id':'aside_issue_text'})
if issue:
self.log('Downloading issue:', self.tag_to_string(issue).strip())
self.timefmt = ' [' + self.tag_to_string(issue.find(attrs={'id':'aside_issue_date'})) + ']'
self.title = 'Philosophy Now ' + self.tag_to_string(issue.find(attrs={'id':'aside_issue_number'}))
url = 'https://philosophynow.org' + div.find('a', href=True)['href']
d = self.recipe_specific_options.get('issue')
if d and isinstance(d, str):
url = 'https://philosophynow.org/issues/' + d
soup = self.index_to_soup(url)
div = soup.find('div', attrs={'id': 'issue_contents_cover_div'})
cov_url = div.find('img', src=True)['src']
self.cover_url = 'https://philosophynow.org' + cov_url
soup = self.index_to_soup('https://philosophynow.org' + url)
self.timefmt = ' [' + self.tag_to_string(soup.find('h1')) + ']'
feeds = OrderedDict()

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
'''
https://sciencex.com/
'''
@ -26,6 +28,20 @@ class scix(BasicNewsRecipe):
.article__info, .article-byline, .article-main__more, .d-print-block {font-size:small; color:#404040;}
'''
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
resolve_internal_links = True
remove_empty_feeds = True

View File

@ -59,16 +59,29 @@ class ScientificAmerican(BasicNewsRecipe):
br.submit()
return br
recipe_specific_options = {
'issue_url': {
'short': 'The issue URL ',
'long': (
'For example, https://www.scientificamerican.com/issue/sa/2024/07-01/'
'\nYou can also download special-editions, physics, health, mind magazines by pasting the URL here.'
)
}
}
def parse_index(self):
# Get the cover, date and issue URL
d = self.recipe_specific_options.get('issue_url')
if d and isinstance(d, str):
issue = d
else:
fp_soup = self.index_to_soup("https://www.scientificamerican.com")
curr_issue_link = fp_soup.find(**prefixed_classes('latest_issue_links-'))
if not curr_issue_link:
self.abort_recipe_processing("Unable to find issue link")
issue_url = 'https://www.scientificamerican.com' + curr_issue_link.a["href"]
# for past editions https://www.scientificamerican.com/archive/issues/
# issue_url = 'https://www.scientificamerican.com/issue/sa/2024/01-01/'
soup = self.index_to_soup(issue_url)
issue = 'https://www.scientificamerican.com' + curr_issue_link.a["href"]
soup = self.index_to_soup(issue)
script = soup.find("script", id="__DATA__")
if not script:
self.abort_recipe_processing("Unable to find script")