mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 18:54:09 -04:00
Merge branch 'master' of https://github.com/unkn0w7n/calibre
This commit is contained in:
commit
71e8654c80
@ -1,3 +1,4 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
'''
|
'''
|
||||||
nautil.us
|
nautil.us
|
||||||
'''
|
'''
|
||||||
@ -5,8 +6,8 @@ from calibre.web.feeds.news import BasicNewsRecipe, classes
|
|||||||
|
|
||||||
|
|
||||||
class Nautilus(BasicNewsRecipe):
|
class Nautilus(BasicNewsRecipe):
|
||||||
title = u'Nautilus'
|
title = u'Nautilus Magazine'
|
||||||
language = 'en'
|
language = 'en_US'
|
||||||
__author__ = 'unkn0wn'
|
__author__ = 'unkn0wn'
|
||||||
oldest_article = 45 # days
|
oldest_article = 45 # days
|
||||||
max_articles_per_feed = 50
|
max_articles_per_feed = 50
|
||||||
@ -16,7 +17,7 @@ class Nautilus(BasicNewsRecipe):
|
|||||||
' no matter how complex, can be explained with clarity and vitality.')
|
' no matter how complex, can be explained with clarity and vitality.')
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
masthead_url = 'https://nautil.us/wp-content/themes/nautilus/images/logo/light.svg'
|
masthead_url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/1/1b/Nautilus.svg/640px-Nautilus.svg.png'
|
||||||
remove_attributes = ['height', 'width']
|
remove_attributes = ['height', 'width']
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
@ -28,6 +29,21 @@ class Nautilus(BasicNewsRecipe):
|
|||||||
.breadcrumb{color:gray; font-size:small;}
|
.breadcrumb{color:gray; font-size:small;}
|
||||||
.article-author{font-size:small;}
|
.article-author{font-size:small;}
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
recipe_specific_options = {
|
||||||
|
'days': {
|
||||||
|
'short': 'Oldest article to download from this news source. In days ',
|
||||||
|
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||||
|
'default': str(oldest_article)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||||||
|
d = self.recipe_specific_options.get('days')
|
||||||
|
if d and isinstance(d, str):
|
||||||
|
self.oldest_article = float(d)
|
||||||
|
|
||||||
keep_only_tags = [classes('article-left-col feature-image article-content')]
|
keep_only_tags = [classes('article-left-col feature-image article-content')]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
@ -85,4 +101,6 @@ class Nautilus(BasicNewsRecipe):
|
|||||||
ul.name = 'span'
|
ul.name = 'span'
|
||||||
for li in ul.findAll('li'):
|
for li in ul.findAll('li'):
|
||||||
li.name = 'p'
|
li.name = 'p'
|
||||||
|
for img in soup.findAll('img', attrs={'srcset':True}):
|
||||||
|
img['src'] = img['srcset'].split(',')[-1].split()[0]
|
||||||
return soup
|
return soup
|
||||||
|
@ -13,15 +13,6 @@ def re_html(y):
|
|||||||
soup = BeautifulSoup(y.rstrip())
|
soup = BeautifulSoup(y.rstrip())
|
||||||
return soup.text
|
return soup.text
|
||||||
|
|
||||||
def get_cont(x):
|
|
||||||
url = x['url']
|
|
||||||
title = x['headline']
|
|
||||||
desc = x['standfirst']
|
|
||||||
if x['byline']['text']:
|
|
||||||
desc = 'By ' + x['byline']['text'] + ' | ' + desc
|
|
||||||
print(' ', re_html(title), '\n\t', re_html(desc), '\n\t', url)
|
|
||||||
return ({ 'title': re_html(title), 'description': re_html(desc), 'url': url })
|
|
||||||
|
|
||||||
def get_id(url):
|
def get_id(url):
|
||||||
rq = browser().open(url)
|
rq = browser().open(url)
|
||||||
return re.search('\?p=(\S+)>', str(rq.info())).group(1)
|
return re.search('\?p=(\S+)>', str(rq.info())).group(1)
|
||||||
@ -77,7 +68,7 @@ class tls(BasicNewsRecipe):
|
|||||||
|
|
||||||
if data['featuredarticle']:
|
if data['featuredarticle']:
|
||||||
self.log('A note from the Editor')
|
self.log('A note from the Editor')
|
||||||
feeds.append(('A note from the Editor', [get_cont(data['featuredarticle'])]))
|
feeds.append(('A note from the Editor', [self.get_cont(data['featuredarticle'])]))
|
||||||
|
|
||||||
cont = data['contents']
|
cont = data['contents']
|
||||||
for c in cont:
|
for c in cont:
|
||||||
@ -87,11 +78,20 @@ class tls(BasicNewsRecipe):
|
|||||||
self.log(section)
|
self.log(section)
|
||||||
articles = []
|
articles = []
|
||||||
for arts in cont[c]['articleslist']:
|
for arts in cont[c]['articleslist']:
|
||||||
articles.append(get_cont(arts))
|
articles.append(self.get_cont(arts))
|
||||||
if articles:
|
if articles:
|
||||||
feeds.append((section, articles))
|
feeds.append((section, articles))
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
|
def get_cont(self, x):
|
||||||
|
url = x['url']
|
||||||
|
title = re_html(x['headline'])
|
||||||
|
desc = re_html(x['standfirst'])
|
||||||
|
if x['byline']['text']:
|
||||||
|
desc = 'By ' + re_html(x['byline']['text']) + ' | ' + desc
|
||||||
|
self.log(' ', title, '\n\t', desc, '\n\t', url)
|
||||||
|
return ({ 'title': title, 'description': desc, 'url': url })
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return 'https://www.the-tls.co.uk/wp-json/tls/v2/single-article/' + get_id(url)
|
return 'https://www.the-tls.co.uk/wp-json/tls/v2/single-article/' + get_id(url)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user