Merge branch 'master' of https://github.com/unkn0w7n/calibre

2025-12-10 15:15:03 -05:00 · 2024-08-16 12:47:12 +05:30 · 2024-08-16 12:47:12 +05:30 · 71e8654c80
commit 71e8654c80
parent aad7f706d5 63ca3ff11a
2 changed files with 32 additions and 14 deletions
--- a/recipes/nautilus.recipe
+++ b/recipes/nautilus.recipe
@ -1,3 +1,4 @@
+#!/usr/bin/env  python
 '''
 nautil.us
 '''
@ -5,8 +6,8 @@ from calibre.web.feeds.news import BasicNewsRecipe, classes


 class Nautilus(BasicNewsRecipe):
-    title = u'Nautilus'
-    language = 'en'
+    title = u'Nautilus Magazine'
+    language = 'en_US'
    __author__ = 'unkn0wn'
    oldest_article = 45  # days
    max_articles_per_feed = 50
@ -16,7 +17,7 @@ class Nautilus(BasicNewsRecipe):
        ' no matter how complex, can be explained with clarity and vitality.')
    no_stylesheets = True
    use_embedded_content = False
-    masthead_url = 'https://nautil.us/wp-content/themes/nautilus/images/logo/light.svg'
+    masthead_url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/1/1b/Nautilus.svg/640px-Nautilus.svg.png'
    remove_attributes = ['height', 'width']
    ignore_duplicate_articles = {'title', 'url'}
    remove_empty_feeds = True
@ -28,6 +29,21 @@ class Nautilus(BasicNewsRecipe):
        .breadcrumb{color:gray; font-size:small;}
        .article-author{font-size:small;}
    '''
+
+    recipe_specific_options = {
+        'days': {
+            'short': 'Oldest article to download from this news source. In days ',
+            'long': 'For example, 0.5, gives you articles from the past 12 hours',
+            'default': str(oldest_article)
+        }
+    }
+
+    def __init__(self, *args, **kwargs):
+        BasicNewsRecipe.__init__(self, *args, **kwargs)
+        d = self.recipe_specific_options.get('days')
+        if d and isinstance(d, str):
+            self.oldest_article = float(d)
+
    keep_only_tags = [classes('article-left-col feature-image article-content')]

    remove_tags = [
@ -85,4 +101,6 @@ class Nautilus(BasicNewsRecipe):
            ul.name = 'span'
            for li in ul.findAll('li'):
                li.name = 'p'
+        for img in soup.findAll('img', attrs={'srcset':True}):
+            img['src'] = img['srcset'].split(',')[-1].split()[0]
        return soup
--- a/recipes/tls_mag.recipe
+++ b/recipes/tls_mag.recipe
@ -13,15 +13,6 @@ def re_html(y):
        soup = BeautifulSoup(y.rstrip())
        return soup.text

-def get_cont(x):
-    url = x['url']
-    title = x['headline']
-    desc = x['standfirst']
-    if x['byline']['text']:
-        desc = 'By ' + x['byline']['text'] + ' | ' + desc
-    print('              ', re_html(title), '\n\t', re_html(desc), '\n\t', url)
-    return ({ 'title': re_html(title), 'description': re_html(desc), 'url': url })
-
 def get_id(url):
    rq = browser().open(url)
    return re.search('\?p=(\S+)>', str(rq.info())).group(1)
@ -77,7 +68,7 @@ class tls(BasicNewsRecipe):

        if data['featuredarticle']:
            self.log('A note from the Editor')
-            feeds.append(('A note from the Editor', [get_cont(data['featuredarticle'])]))
+            feeds.append(('A note from the Editor', [self.get_cont(data['featuredarticle'])]))

        cont = data['contents']
        for c in cont:
@ -87,11 +78,20 @@ class tls(BasicNewsRecipe):
            self.log(section)
            articles = []
            for arts in cont[c]['articleslist']:
-                articles.append(get_cont(arts))
+                articles.append(self.get_cont(arts))
            if articles:
                feeds.append((section, articles))
        return feeds

+    def get_cont(self, x):
+        url = x['url']
+        title = re_html(x['headline'])
+        desc = re_html(x['standfirst'])
+        if x['byline']['text']:
+            desc = 'By ' + re_html(x['byline']['text']) + ' | ' + desc
+        self.log('              ', title, '\n\t', desc, '\n\t', url)
+        return ({ 'title': title, 'description': desc, 'url': url })
+
    def print_version(self, url):
        return 'https://www.the-tls.co.uk/wp-json/tls/v2/single-article/' + get_id(url)