mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge branch 'master' of https://github.com/unkn0w7n/calibre
This commit is contained in:
commit
ab9cb22eac
@ -65,9 +65,20 @@ class MitTechnologyReview(BasicNewsRecipe):
|
|||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
recipe_specific_options = {
|
||||||
|
'issue_url': {
|
||||||
|
'short': 'The issue URL ',
|
||||||
|
'long': 'For example, https://www.technologyreview.com/magazines/the-education-issue/',
|
||||||
|
'default': 'http://www.technologyreview.com/magazine/'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
# for past editions, change the issue link below
|
# for past editions, change the issue link below
|
||||||
issue = 'http://www.technologyreview.com/magazine/'
|
issue = 'http://www.technologyreview.com/magazine/'
|
||||||
|
d = self.recipe_specific_options.get('issue_url')
|
||||||
|
if d and isinstance(d, str):
|
||||||
|
issue = d
|
||||||
soup = self.index_to_soup(issue)
|
soup = self.index_to_soup(issue)
|
||||||
if script := soup.find('script', id='preload'):
|
if script := soup.find('script', id='preload'):
|
||||||
raw = script.contents[0]
|
raw = script.contents[0]
|
||||||
|
@ -121,9 +121,19 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
tf.write(self.get_nyt_page(url))
|
tf.write(self.get_nyt_page(url))
|
||||||
return tf.name
|
return tf.name
|
||||||
|
|
||||||
|
recipe_specific_options = {
|
||||||
|
'date': {
|
||||||
|
'short': 'The date of the edition to download (YYYY/MM/DD format)',
|
||||||
|
'long': 'For example, 2024/07/16'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
def read_todays_paper(self):
|
def read_todays_paper(self):
|
||||||
INDEX = 'https://www.nytimes.com/section/todayspaper'
|
INDEX = 'https://www.nytimes.com/section/todayspaper'
|
||||||
# INDEX = 'file:///t/raw.html'
|
# INDEX = 'file:///t/raw.html'
|
||||||
|
d = self.recipe_specific_options.get('date')
|
||||||
|
if d and isinstance(d, str):
|
||||||
|
INDEX = 'https://www.nytimes.com/issue/todayspaper/' + d + '/todays-new-york-times'
|
||||||
return self.index_to_soup(self.get_nyt_page(INDEX, skip_wayback=True))
|
return self.index_to_soup(self.get_nyt_page(INDEX, skip_wayback=True))
|
||||||
|
|
||||||
def read_nyt_metadata(self):
|
def read_nyt_metadata(self):
|
||||||
|
@ -24,6 +24,20 @@ class AdvancedUserRecipe1277129332(BasicNewsRecipe):
|
|||||||
conversion_options = {'linearize_tables': True}
|
conversion_options = {'linearize_tables': True}
|
||||||
masthead_url = 'http://www.people.com.cn/img/2010wb/images/logo.gif'
|
masthead_url = 'http://www.people.com.cn/img/2010wb/images/logo.gif'
|
||||||
|
|
||||||
|
recipe_specific_options = {
|
||||||
|
'days': {
|
||||||
|
'short': 'Oldest article to download from this news source. In days ',
|
||||||
|
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||||
|
'default': str(oldest_article)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||||
|
d = self.recipe_specific_options.get('days')
|
||||||
|
if d and isinstance(d, str):
|
||||||
|
self.oldest_article = float(d)
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'时政', u'http://www.people.com.cn/rss/politics.xml'),
|
(u'时政', u'http://www.people.com.cn/rss/politics.xml'),
|
||||||
(u'国际', u'http://www.people.com.cn/rss/world.xml'),
|
(u'国际', u'http://www.people.com.cn/rss/world.xml'),
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=utf-8
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
|
|
||||||
from calibre import browser
|
from calibre import browser
|
||||||
@ -31,19 +33,29 @@ class PhilosophyNow(BasicNewsRecipe):
|
|||||||
.articleImageCaption { font-size:small; text-align:center; }
|
.articleImageCaption { font-size:small; text-align:center; }
|
||||||
em, blockquote { color:#202020; }
|
em, blockquote { color:#202020; }
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
recipe_specific_options = {
|
||||||
|
'issue': {
|
||||||
|
'short': 'Enter the Issue Number you want to download ',
|
||||||
|
'long': 'For example, 136'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
soup = self.index_to_soup('https://philosophynow.org/')
|
soup = self.index_to_soup('https://philosophynow.org/')
|
||||||
div = soup.find('div', attrs={'id': 'aside_issue_cover'})
|
div = soup.find('div', attrs={'id': 'aside_issue_cover'})
|
||||||
url = div.find('a', href=True)['href']
|
url = 'https://philosophynow.org' + div.find('a', href=True)['href']
|
||||||
issue = div.find('div', attrs={'id':'aside_issue_text'})
|
|
||||||
if issue:
|
d = self.recipe_specific_options.get('issue')
|
||||||
self.log('Downloading issue:', self.tag_to_string(issue).strip())
|
if d and isinstance(d, str):
|
||||||
self.timefmt = ' [' + self.tag_to_string(issue.find(attrs={'id':'aside_issue_date'})) + ']'
|
url = 'https://philosophynow.org/issues/' + d
|
||||||
self.title = 'Philosophy Now ' + self.tag_to_string(issue.find(attrs={'id':'aside_issue_number'}))
|
|
||||||
|
soup = self.index_to_soup(url)
|
||||||
|
|
||||||
|
div = soup.find('div', attrs={'id': 'issue_contents_cover_div'})
|
||||||
cov_url = div.find('img', src=True)['src']
|
cov_url = div.find('img', src=True)['src']
|
||||||
self.cover_url = 'https://philosophynow.org' + cov_url
|
self.cover_url = 'https://philosophynow.org' + cov_url
|
||||||
soup = self.index_to_soup('https://philosophynow.org' + url)
|
self.timefmt = ' [' + self.tag_to_string(soup.find('h1')) + ']'
|
||||||
|
|
||||||
feeds = OrderedDict()
|
feeds = OrderedDict()
|
||||||
|
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=utf-8
|
||||||
'''
|
'''
|
||||||
https://sciencex.com/
|
https://sciencex.com/
|
||||||
'''
|
'''
|
||||||
@ -26,6 +28,20 @@ class scix(BasicNewsRecipe):
|
|||||||
.article__info, .article-byline, .article-main__more, .d-print-block {font-size:small; color:#404040;}
|
.article__info, .article-byline, .article-main__more, .d-print-block {font-size:small; color:#404040;}
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
recipe_specific_options = {
|
||||||
|
'days': {
|
||||||
|
'short': 'Oldest article to download from this news source. In days ',
|
||||||
|
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||||
|
'default': str(oldest_article)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||||
|
d = self.recipe_specific_options.get('days')
|
||||||
|
if d and isinstance(d, str):
|
||||||
|
self.oldest_article = float(d)
|
||||||
|
|
||||||
resolve_internal_links = True
|
resolve_internal_links = True
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
|
|
||||||
|
@ -59,16 +59,29 @@ class ScientificAmerican(BasicNewsRecipe):
|
|||||||
br.submit()
|
br.submit()
|
||||||
return br
|
return br
|
||||||
|
|
||||||
|
recipe_specific_options = {
|
||||||
|
'issue_url': {
|
||||||
|
'short': 'The issue URL ',
|
||||||
|
'long': (
|
||||||
|
'For example, https://www.scientificamerican.com/issue/sa/2024/07-01/'
|
||||||
|
'\nYou can also download special-editions, physics, health, mind magazines by pasting the URL here.'
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
# Get the cover, date and issue URL
|
# Get the cover, date and issue URL
|
||||||
fp_soup = self.index_to_soup("https://www.scientificamerican.com")
|
d = self.recipe_specific_options.get('issue_url')
|
||||||
curr_issue_link = fp_soup.find(**prefixed_classes('latest_issue_links-'))
|
if d and isinstance(d, str):
|
||||||
if not curr_issue_link:
|
issue = d
|
||||||
self.abort_recipe_processing("Unable to find issue link")
|
else:
|
||||||
issue_url = 'https://www.scientificamerican.com' + curr_issue_link.a["href"]
|
fp_soup = self.index_to_soup("https://www.scientificamerican.com")
|
||||||
# for past editions https://www.scientificamerican.com/archive/issues/
|
curr_issue_link = fp_soup.find(**prefixed_classes('latest_issue_links-'))
|
||||||
# issue_url = 'https://www.scientificamerican.com/issue/sa/2024/01-01/'
|
if not curr_issue_link:
|
||||||
soup = self.index_to_soup(issue_url)
|
self.abort_recipe_processing("Unable to find issue link")
|
||||||
|
issue = 'https://www.scientificamerican.com' + curr_issue_link.a["href"]
|
||||||
|
|
||||||
|
soup = self.index_to_soup(issue)
|
||||||
script = soup.find("script", id="__DATA__")
|
script = soup.find("script", id="__DATA__")
|
||||||
if not script:
|
if not script:
|
||||||
self.abort_recipe_processing("Unable to find script")
|
self.abort_recipe_processing("Unable to find script")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user