diff --git a/recipes/nautilus.recipe b/recipes/nautilus.recipe index 2c8e4138b4..be056573ef 100644 --- a/recipes/nautilus.recipe +++ b/recipes/nautilus.recipe @@ -1,3 +1,4 @@ +#!/usr/bin/env python ''' nautil.us ''' @@ -5,8 +6,8 @@ from calibre.web.feeds.news import BasicNewsRecipe, classes class Nautilus(BasicNewsRecipe): - title = u'Nautilus' - language = 'en' + title = u'Nautilus Magazine' + language = 'en_US' __author__ = 'unkn0wn' oldest_article = 45 # days max_articles_per_feed = 50 @@ -16,7 +17,7 @@ class Nautilus(BasicNewsRecipe): ' no matter how complex, can be explained with clarity and vitality.') no_stylesheets = True use_embedded_content = False - masthead_url = 'https://nautil.us/wp-content/themes/nautilus/images/logo/light.svg' + masthead_url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/1/1b/Nautilus.svg/640px-Nautilus.svg.png' remove_attributes = ['height', 'width'] ignore_duplicate_articles = {'title', 'url'} remove_empty_feeds = True @@ -28,6 +29,21 @@ class Nautilus(BasicNewsRecipe): .breadcrumb{color:gray; font-size:small;} .article-author{font-size:small;} ''' + + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + keep_only_tags = [classes('article-left-col feature-image article-content')] remove_tags = [ @@ -85,4 +101,6 @@ class Nautilus(BasicNewsRecipe): ul.name = 'span' for li in ul.findAll('li'): li.name = 'p' + for img in soup.findAll('img', attrs={'srcset':True}): + img['src'] = img['srcset'].split(',')[-1].split()[0] return soup diff --git a/recipes/tls_mag.recipe b/recipes/tls_mag.recipe index 9cd10dcf51..f888c47a08 100644 --- a/recipes/tls_mag.recipe +++ b/recipes/tls_mag.recipe @@ -13,15 +13,6 @@ def re_html(y): soup = BeautifulSoup(y.rstrip()) return soup.text -def get_cont(x): - url = x['url'] - title = x['headline'] - desc = x['standfirst'] - if x['byline']['text']: - desc = 'By ' + x['byline']['text'] + ' | ' + desc - print(' ', re_html(title), '\n\t', re_html(desc), '\n\t', url) - return ({ 'title': re_html(title), 'description': re_html(desc), 'url': url }) - def get_id(url): rq = browser().open(url) return re.search('\?p=(\S+)>', str(rq.info())).group(1) @@ -77,7 +68,7 @@ class tls(BasicNewsRecipe): if data['featuredarticle']: self.log('A note from the Editor') - feeds.append(('A note from the Editor', [get_cont(data['featuredarticle'])])) + feeds.append(('A note from the Editor', [self.get_cont(data['featuredarticle'])])) cont = data['contents'] for c in cont: @@ -87,11 +78,20 @@ class tls(BasicNewsRecipe): self.log(section) articles = [] for arts in cont[c]['articleslist']: - articles.append(get_cont(arts)) + articles.append(self.get_cont(arts)) if articles: feeds.append((section, articles)) return feeds + def get_cont(self, x): + url = x['url'] + title = re_html(x['headline']) + desc = re_html(x['standfirst']) + if x['byline']['text']: + desc = 'By ' + re_html(x['byline']['text']) + ' | ' + desc + self.log(' ', title, '\n\t', desc, '\n\t', url) + return ({ 'title': title, 'description': desc, 'url': url }) + def print_version(self, url): return 'https://www.the-tls.co.uk/wp-json/tls/v2/single-article/' + get_id(url)