This commit is contained in:
Kovid Goyal 2024-08-16 12:47:12 +05:30
commit 71e8654c80
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 32 additions and 14 deletions

View File

@ -1,3 +1,4 @@
#!/usr/bin/env python
'''
nautil.us
'''
@ -5,8 +6,8 @@ from calibre.web.feeds.news import BasicNewsRecipe, classes
class Nautilus(BasicNewsRecipe):
title = u'Nautilus'
language = 'en'
title = u'Nautilus Magazine'
language = 'en_US'
__author__ = 'unkn0wn'
oldest_article = 45 # days
max_articles_per_feed = 50
@ -16,7 +17,7 @@ class Nautilus(BasicNewsRecipe):
' no matter how complex, can be explained with clarity and vitality.')
no_stylesheets = True
use_embedded_content = False
masthead_url = 'https://nautil.us/wp-content/themes/nautilus/images/logo/light.svg'
masthead_url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/1/1b/Nautilus.svg/640px-Nautilus.svg.png'
remove_attributes = ['height', 'width']
ignore_duplicate_articles = {'title', 'url'}
remove_empty_feeds = True
@ -28,6 +29,21 @@ class Nautilus(BasicNewsRecipe):
.breadcrumb{color:gray; font-size:small;}
.article-author{font-size:small;}
'''
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
keep_only_tags = [classes('article-left-col feature-image article-content')]
remove_tags = [
@ -85,4 +101,6 @@ class Nautilus(BasicNewsRecipe):
ul.name = 'span'
for li in ul.findAll('li'):
li.name = 'p'
for img in soup.findAll('img', attrs={'srcset':True}):
img['src'] = img['srcset'].split(',')[-1].split()[0]
return soup

View File

@ -13,15 +13,6 @@ def re_html(y):
soup = BeautifulSoup(y.rstrip())
return soup.text
def get_cont(x):
url = x['url']
title = x['headline']
desc = x['standfirst']
if x['byline']['text']:
desc = 'By ' + x['byline']['text'] + ' | ' + desc
print(' ', re_html(title), '\n\t', re_html(desc), '\n\t', url)
return ({ 'title': re_html(title), 'description': re_html(desc), 'url': url })
def get_id(url):
rq = browser().open(url)
return re.search('\?p=(\S+)>', str(rq.info())).group(1)
@ -77,7 +68,7 @@ class tls(BasicNewsRecipe):
if data['featuredarticle']:
self.log('A note from the Editor')
feeds.append(('A note from the Editor', [get_cont(data['featuredarticle'])]))
feeds.append(('A note from the Editor', [self.get_cont(data['featuredarticle'])]))
cont = data['contents']
for c in cont:
@ -87,11 +78,20 @@ class tls(BasicNewsRecipe):
self.log(section)
articles = []
for arts in cont[c]['articleslist']:
articles.append(get_cont(arts))
articles.append(self.get_cont(arts))
if articles:
feeds.append((section, articles))
return feeds
def get_cont(self, x):
url = x['url']
title = re_html(x['headline'])
desc = re_html(x['standfirst'])
if x['byline']['text']:
desc = 'By ' + re_html(x['byline']['text']) + ' | ' + desc
self.log(' ', title, '\n\t', desc, '\n\t', url)
return ({ 'title': title, 'description': desc, 'url': url })
def print_version(self, url):
return 'https://www.the-tls.co.uk/wp-json/tls/v2/single-article/' + get_id(url)