Merge branch 'master' of https://github.com/unkn0w7n/calibre

2025-08-11 09:13:57 -04:00 · 2024-04-13 11:40:03 +05:30 · 2024-04-13 11:40:03 +05:30 · 5ecafc7243
commit 5ecafc7243
parent b6812939c9 fae7481849
6 changed files with 23 additions and 116 deletions
--- a/recipes/business_today.recipe
+++ b/recipes/business_today.recipe
@ -74,7 +74,7 @@ class BT(BasicNewsRecipe):

        # Insert feeds in specified order, if available

-        feedSort = ['Editor\'s Note']
+        feedSort = ['Editor\'s Note', 'Editors note']
        for i in feedSort:
            if i in sections:
                feeds.append((i, sections[i]))
@ -82,7 +82,8 @@ class BT(BasicNewsRecipe):
        # Done with the sorted feeds

        for i in feedSort:
-            del sections[i]
+            if i in sections:
+                del sections[i]

        # Append what is left over...

--- a/recipes/harpers.recipe
+++ b/recipes/harpers.recipe
@ -79,5 +79,10 @@ class Harpers(BasicNewsRecipe):
    .index-statement .index-tooltip { font-size: small; }
    """

+    def get_cover_url(self):
+        issues_soup = self.index_to_soup("https://harpers.org/issues/")
+        curr_issue_a_ele = issues_soup.select_one("div.issue-card a")
+        if curr_issue_a_ele.find("img"):
+            return curr_issue_a_ele.img["src"]

    feeds = [(u"Harper's Magazine", u'https://harpers.org/feed/')]
--- a/recipes/harpers_full.recipe
+++ b/recipes/harpers_full.recipe
@ -131,7 +131,8 @@ class Harpers_full(BasicNewsRecipe):
        if not _issue_url:
            issues_soup = self.index_to_soup("https://harpers.org/issues/")
            curr_issue_a_ele = issues_soup.select_one("div.issue-card a")
-            curr_issue_url = urljoin(self.base_url, curr_issue_a_ele["href"])
+            if curr_issue_a_ele.find("img"):
+                self.cover_url = curr_issue_a_ele.img["src"]
        else:
            curr_issue_url = _issue_url

--- a/recipes/hindu_business_line_print_edition.recipe
+++ b/recipes/hindu_business_line_print_edition.recipe
@ -1,94 +0,0 @@
-import json
-import re
-from collections import defaultdict
-from datetime import date
-
-from calibre.web.feeds.news import BasicNewsRecipe, classes
-
-
-def absurl(url):
-    if url.startswith('/'):
-        url = 'https://www.thehindubusinessline.com' + url
-    return url
-
-
-local_edition = None
-# Chennai is default edition, for other editions use 'bl_hyderabad', 'bl_bangalore', 'bl_mumbai'
-
-
-class BusinessLine(BasicNewsRecipe):
-    title = 'The Hindu BusinessLine | Print Edition'
-    __author__ = 'unkn0wn'
-    description = (
-        'The Hindu BusinessLine is known for its credibility, accuracy, in-depth analysis of markets and sober coverage'
-        ' of business news. BusinessLine reduces the daily grind of business to relevant, readable, byte-sized stories.'
-        ' The newspaper is extensively followed by the decision makers and change leaders from the world of business.'
-    )
-    language = 'en_IN'
-    no_stylesheets = True
-    masthead_url = 'https://www.thehindubusinessline.com/theme/images/bl-online/bllogo.png'
-    remove_attributes = ['style', 'height', 'width']
-    extra_css = '.caption{font-size:small; text-align:center;}'\
-        '.author{font-size:small; font-weight:bold;}'\
-        '.subhead, .subhead_lead {font-weight:bold;}'\
-        'img {display:block; margin:0 auto;}'
-
-    ignore_duplicate_articles = {'url'}
-
-    keep_only_tags = [
-        classes('articlepage')
-    ]
-
-    remove_tags = [
-        classes('hide-mobile comments-shares share-page editiondetails author-img')
-    ]
-
-    def preprocess_html(self, soup):
-        for cap in soup.findAll('p', attrs={'class':'caption'}):
-            cap.name = 'figcaption'
-        for img in soup.findAll('img', attrs={'data-original':True}):
-            img['src'] = img['data-original']
-        return soup
-
-    def parse_index(self):
-        dt = date.today().strftime('%Y-%m-%d')
-        # For past editions, set date to, for example, '2023-01-28'
-        # dt = '2023-01-28'
-        if local_edition:
-            url = absurl('/todays-paper/' + dt + '/' + local_edition + '/')
-        else:
-            url = absurl('/todays-paper/' + dt + '/bl_chennai/')
-        raw = self.index_to_soup(url, raw=True)
-        soup = self.index_to_soup(raw)
-        ans = self.hindu_parse_index(soup)
-        if not ans:
-            raise ValueError(
-                    'The Hindu BusinessLine Newspaper is not published Today.'
-                )
-        cover = soup.find(attrs={'class':'hindu-ad'})
-        if cover:
-            self.cover_url = cover.img['src']
-        return ans
-
-    def hindu_parse_index(self, soup):
-        for script in soup.findAll('script'):
-            if not self.tag_to_string(script).strip().startswith('let grouped_articles = {}'):
-                continue
-            if script is not None:
-                art = re.search(r'grouped_articles = ({\".*)', self.tag_to_string(script))
-                data = json.JSONDecoder().raw_decode(art.group(1))[0]
-
-                feeds_dict = defaultdict(list)
-
-                a = json.dumps(data)
-                for sec in json.loads(a):
-                    for item in data[sec]:
-                        section = sec.replace('BL_', '')
-                        title = item['articleheadline']
-                        url = absurl(item['href'])
-                        desc = 'Page no.' + item['pageno'] + ' | ' + item['teaser_text'] or ''
-                        self.log('\t', title, '\n\t\t', url)
-                        feeds_dict[section].append({"title": title, "url": url, "description": desc})
-                return [(section, articles) for section, articles in feeds_dict.items()]
-            else:
-                return []
--- a/recipes/icons/hindu_business_line_print_edition.png
+++ b/recipes/icons/hindu_business_line_print_edition.png
--- a/recipes/new_yorker.recipe
+++ b/recipes/new_yorker.recipe
@ -80,27 +80,21 @@ class NewYorker(BasicNewsRecipe):
        # return buf.getvalue()

    def parse_index(self):
-        # Get cover
-        cover_soup = self.index_to_soup('https://www.newyorker.com/archive')
-        cover_img = cover_soup.find(
-            attrs={'class': lambda x: x and 'MagazineSection__cover___' in x})
-        if cover_img is not None:
-            cover_img = cover_img.find('img')
-            if cover_img is not None:
-                self.cover_url = cover_img.get('src')
-                try:
-                    # the src original resolution w_280 was too low, replace w_280 with w_560
-                    cover_url_width_index = self.cover_url.find("w_")
-                    old_width = self.cover_url[cover_url_width_index:cover_url_width_index+5]
-                    self.cover_url = self.cover_url.replace(old_width, "w_560")
-                except Exception:
-                    self.log('Failed enlarging cover img, using the original one')
-                self.log('Found cover:', self.cover_url)
-
-        # Get content
-
        soup = self.index_to_soup(
            'https://www.newyorker.com/magazine?intcid=magazine')
+        cover_img = soup.find('picture',
+            attrs={'class': lambda x: x and 'asset-embed__responsive-asset' in x})
+        if cover_img is not None:
+            self.cover_url = cover_img.img['src']
+            self.log('Found cover:', self.cover_url)
+            try:
+                # the src original resolution w_280 was too low, replace w_280 with w_560
+                cover_url_width_index = self.cover_url.find("w_")
+                old_width = self.cover_url[cover_url_width_index:cover_url_width_index+5]
+                self.cover_url = self.cover_url.replace(old_width, "w_640")
+            except Exception:
+                self.log('Failed enlarging cover img, using the original one')
+
        feeds_dict = defaultdict(list)
        for section in soup.findAll('section',
            attrs={'class': lambda x: x and 'SummaryRiverSection-' in x}):