Merge branch 'master' of https://github.com/unkn0w7n/calibre

2025-08-11 09:13:57 -04:00 · 2024-06-28 10:43:38 +05:30 · 2024-06-28 10:43:38 +05:30 · 11e3d27768
commit 11e3d27768
parent c721338b43 7e8d6a6403
2 changed files with 20 additions and 12 deletions
--- a/recipes/harpers.recipe
+++ b/recipes/harpers.recipe
@ -51,7 +51,7 @@ class Harpers(BasicNewsRecipe):
        for img in soup.findAll('img', attrs={'srcset':True}):
            for src in img['srcset'].split(','):
                if '768w' in src:
-                    img['src'] = img['src'].split()[0]
+                    img['src'] = src.split()[0]
        return soup

    def parse_index(self):
@ -67,12 +67,12 @@ class Harpers(BasicNewsRecipe):
        for a in soup.findAll('a', attrs={'href':lambda x: x and x.startswith(url + '/')}):
            if not a.find('img') and a.find(['h1', 'h2', 'h3', 'h4']):
                url = a['href']
-                title = self.tag_to_string(a)
+                title = self.tag_to_string(a).strip()
                desc = ''
                div = a.findParent('div').find('div', attrs={'class':'byline'})
                if div:
-                    desc = self.tag_to_string(div)
-                self.log('\t', title, '\n\t', desc, '\n\t', url)
+                    desc = self.tag_to_string(div).strip()
+                self.log('      ', title, '\n\t', desc[:-1], '\n\t', url)
                ans.append({'title': title, 'description': desc, 'url': url})
        return [('Articles', ans)]

--- a/recipes/tls_mag.recipe
+++ b/recipes/tls_mag.recipe
@ -7,8 +7,9 @@ from calibre.web.feeds.news import BasicNewsRecipe


 def re_html(y):
-    soup = BeautifulSoup(y.rstrip(), "html.parser")
-    return soup.text
+    if y:
+        soup = BeautifulSoup(y.rstrip(), "html.parser")
+        return soup.text

 def get_cont(x):
    url = x['url']
@ -36,6 +37,7 @@ class tls(BasicNewsRecipe):
    encoding = 'utf-8'
    language = 'en_GB'
    masthead_url = 'https://www.the-tls.co.uk/wp-content/uploads/sites/7/2019/11/Smaller-Logo.jpg'
+    remove_empty_feeds = True

    extra_css = '''
        .label { font-size:small; color:#404040; }
@ -58,12 +60,15 @@ class tls(BasicNewsRecipe):

        feeds = []

-        self.log('A note from the Editor')
-        feeds.append(('A note from the Editor', [get_cont(data['featuredarticle'])]))
+        if data['featuredarticle']:
+            self.log('A note from the Editor')
+            feeds.append(('A note from the Editor', [get_cont(data['featuredarticle'])]))

        cont = data['contents']
        for c in cont:
            section = re_html(cont[c]['articleheader']['title'])
+            if not section:
+                continue
            self.log(section)
            articles = []
            for arts in cont[c]['articleslist']:
@ -84,10 +89,13 @@ class tls(BasicNewsRecipe):
        auth = lede = ''

        label = '<div class="label">{}</div>\n'
-        if prim['label']['category']['text']:
-            label = label.format(prim['label']['articletype'] + ' | ' + prim['label']['category']['text'])
-        else:
-            label = label.format(prim['label']['articletype'])
+        l = prim['label']
+        if l['category']['text'] and l['articletype']:
+            label = label.format(l['articletype'] + ' | ' + l['category']['text'])
+        elif l['articletype']:
+            label = label.format(l['articletype'])
+        elif l['category']['text']:
+            label = label.format(l['category']['text'])

        if prim['byline']['text']:
            auth = '<p class="auth"><a href="{}">'.format(prim['byline']['link']) + prim['byline']['text'] + '</a></p>\n'