Merge branch 'master' of https://github.com/unkn0w7n/calibre

2025-08-11 09:13:57 -04:00 · 2024-02-01 19:13:12 +05:30 · 2024-02-01 19:13:12 +05:30 · c48d0700a4
commit c48d0700a4
parent e1956f3cff 298ffba0b7
3 changed files with 25 additions and 9 deletions
--- a/recipes/barrons.recipe
+++ b/recipes/barrons.recipe
@ -1,4 +1,5 @@
 from calibre.web.feeds.news import BasicNewsRecipe, classes, prefixed_classes
 from collections import defaultdict
 from datetime import date
 import re
@ -24,17 +25,17 @@ class barrons(BasicNewsRecipe):
        img {display:block; margin:0 auto;}
        .figc { font-size:small; text-align:center; }
        .imageCredit { color:#404040; font-size:x-small; }
-        .headline__category { font-size:small; color:#404040; }
+        .headline__category, .article-prebody { font-size:small; color:#404040; }
        .sub-head { color:#202020; }
    '''
    keep_only_tags = [
-        classes('headline articleLead'),
+        classes('headline articleLead article-prebody'),
        dict(name='section', attrs={'subscriptions-section':'content'})
    ]
    remove_tags = [
        dict(name=['meta', 'link', 'svg', 'button', 'i-amphtml-sizer']),
-        classes('wsj-ad dynamic-inset-overflow')
+        classes('wsj-ad dynamic-inset-overflow newsletter-inset')
    ]
    def preprocess_html(self, soup):
@ -42,6 +43,9 @@ class barrons(BasicNewsRecipe):
            figc['class'] = 'figc'
            for p in figc.findAll('p'):
                p.name = 'div'
        for by in soup.findAll(**classes('byline')):
            for p in by.findAll('p'):
                p.name = 'span'
        for h2 in soup.findAll('h2'):
            h2.name = 'h4'
        for iframe in soup.findAll('amp-iframe'):
@ -54,7 +58,11 @@ class barrons(BasicNewsRecipe):
                iframe['src'] = 'https://datawrapper.dwcdn.net/' + data.group(1) + '/full.png'
        for amp in soup.findAll('amp-img'):
            if not amp.find('img', attrs={'src':True}):
-                amp.name = 'img'
+                if amp.has_attr('src'):
                    amp['src'] = amp['src'] + '&pixel_ratio=1.5'
                    amp.name = 'img'
            else:
                amp.img['src'] = amp.img['src'] + '&pixel_ratio=1.5'
        return soup
    def get_browser(self, *args, **kwargs):
@ -73,9 +81,15 @@ class barrons(BasicNewsRecipe):
        self.log(self.timefmt)
        self.cover_url = issue.img['src'].split('?')[0]
-        ans = []
+        ans = defaultdict(list)
        for articles in archive.findAll(**prefixed_classes('BarronsTheme--story--')):
            section = 'Magazine'
            strap = articles.find_previous_sibling(**prefixed_classes('BarronsTheme--strap--'))
            if strap:
                label = strap.find(**prefixed_classes('BarronsTheme--label--'))
                if label:
                    section = self.tag_to_string(label).strip()
            a = articles.find(**prefixed_classes('BarronsTheme--heading'))
            title = self.tag_to_string(a).strip()
            url = a.a['href']
@ -90,8 +104,8 @@ class barrons(BasicNewsRecipe):
            if summ:
                desc += ' | ' + self.tag_to_string(summ)
            self.log('\t', title, ' ', url, '\n\t', desc)
-            ans.append({'title': title, 'url': url, 'description': desc})
+            ans[section].append({'title': title, 'url': url, 'description': desc})
-        return [('Articles', ans)]
+        return [(section, articles) for section, articles in ans.items()]
    def print_version(self, url):
        return url.split('?')[0].replace('/articles/', '/amp/articles/')
--- a/recipes/livemint.recipe
+++ b/recipes/livemint.recipe
@ -102,7 +102,7 @@ class LiveMint(BasicNewsRecipe):
            dict(name=['meta', 'link', 'svg', 'button', 'iframe']),
            classes(
                'trendingSimilarHeight moreNews mobAppDownload label msgError msgOk taboolaHeight'
-                ' socialHolder imgbig disclamerText disqus-comment-count openinApp2 lastAdSlot'
+                ' socialHolder imgbig disclamerText disqus-comment-count openinApp2 lastAdSlot bs_logo'
                ' datePublish sepStory premiumSlider moreStory Joinus moreAbout milestone benefitText'
            )
        ]
--- a/recipes/natgeomag.recipe
+++ b/recipes/natgeomag.recipe
@ -154,7 +154,9 @@ class NatGeo(BasicNewsRecipe):
        if photoart := soup.find(attrs={'class':lambda x: x and 'BgImagePromo__Container__Text__Link' in x.split()}):
            ans2 = []
            title = self.tag_to_string(photoart)
-            url = 'https://www.nationalgeographic.com' + photoart['href']
+            url = photoart['href']
            if url.startswith('/'):
                url = 'https://www.nationalgeographic.com' + photoart['href']
            ans2.append(('Photo Essay', [{'title': title, 'url': url}]))
        for gird in soup.findAll(attrs={'class':'GridPromoTile'}):
            for article in soup.findAll('article'):