Merge branch 'master' of https://github.com/unkn0w7n/calibre

2025-08-11 09:13:57 -04:00 · 2024-07-27 13:25:16 +05:30 · 2024-07-27 13:25:16 +05:30 · f3420c6b15
commit f3420c6b15
parent b0e1d97af6 2025f05a1b
4 changed files with 53 additions and 14 deletions
--- a/recipes/hbr.recipe
+++ b/recipes/hbr.recipe
@ -1,3 +1,5 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
 import json
 import re
 from collections import OrderedDict
@ -7,8 +9,6 @@ from calibre import browser, random_user_agent
 from calibre.web.feeds.news import BasicNewsRecipe, classes
 from mechanize import Request

-_issue_url = ""  # custom issue url
-

 class HBR(BasicNewsRecipe):
    title = "Harvard Business Review"
@ -129,15 +129,23 @@ class HBR(BasicNewsRecipe):
        content_ele.append(new_soup.body)
        return str(soup)

+    recipe_specific_options = {
+        'issue': {
+            'short': 'Enter the Issue Number you want to download ',
+            'long': 'For example, 2403'
+        }
+    }
+
    def parse_index(self):
-        if not _issue_url:
+        d = self.recipe_specific_options.get('issue')
+        if not (d and isinstance(d, str)):
            soup = self.index_to_soup(f"{self.base_url}/magazine")
            a = soup.find("a", href=lambda x: x and x.startswith("/archive-toc/"))
            cov_url = a.find("img", attrs={"src": True})["src"]
            self.cover_url = urljoin(self.base_url, cov_url)
            issue_url = urljoin(self.base_url, a["href"])
        else:
-            issue_url = _issue_url
+            issue_url = 'https://hbr.org/archive-toc/BR' + d
            mobj = re.search(r"archive-toc/(?P<issue>(BR)?\d+)\b", issue_url)
            if mobj:
                self.cover_url = f'https://hbr.org/resources/images/covers/{mobj.group("issue")}_500.png'
--- a/recipes/open_magazine.recipe
+++ b/recipes/open_magazine.recipe
@ -20,12 +20,28 @@ class OpenMagazine(BasicNewsRecipe):
    'blockquote{color:#404040;}'
    '.about-author{font-size:small;}'

+    recipe_specific_options = {
+        'days': {
+            'short': 'Oldest article to download from this news source. In days ',
+            'long': 'For example, 0.5, gives you articles from the past 12 hours',
+            'default': str(oldest_article)
+        }
+    }
+
+    def __init__(self, *args, **kwargs):
+        BasicNewsRecipe.__init__(self, *args, **kwargs)
+        d = self.recipe_specific_options.get('days')
+        if d and isinstance(d, str):
+            self.oldest_article = float(d)
+
    def get_cover_url(self):
-        soup = self.index_to_soup('https://openthemagazine.com/')
-        tag = soup.find(attrs={'class': 'magazine-item mr-1'})
-        if tag:
-            self.cover_url = tag.find('img')['src']
-        return getattr(self, 'cover_url', None)
+        d = self.recipe_specific_options.get('days')
+        if not (d and isinstance(d, str)):
+            soup = self.index_to_soup('https://openthemagazine.com/')
+            tag = soup.find(attrs={'class': 'magazine-item mr-1'})
+            if tag:
+                self.cover_url = tag.find('img')['src']
+            return getattr(self, 'cover_url', None)

    keep_only_tags = [
        classes('post-data post-thumb post-meta post-excerp'),
--- a/recipes/reuters.recipe
+++ b/recipes/reuters.recipe
@ -20,7 +20,8 @@ class Reuters(BasicNewsRecipe):
        'reaching billions of people worldwide every day. Reuters provides business, financial, national and international '
        'news to professionals via desktop terminals, the world’s media organizations, industry events and directly to consumers.'
    )
-    masthead_url = 'https://www.reutersprofessional.com/wp-content/uploads/2024/03/primary-logo.svg'
+    masthead_url = 'https://www.reutersagency.com/wp-content/uploads/2024/06/reuters-logo.png'
+    cover_url = 'https://yt3.googleusercontent.com/ytc/AIdro_mk43b9eQwN15ZBDyMPDaElxvw4V-oUS9XDUvVnYB3gA9yA=s1024' 
    language = 'en'
    encoding = 'utf-8'
    oldest_article = 1.2 # days
--- a/recipes/sportstar.recipe
+++ b/recipes/sportstar.recipe
@ -1,3 +1,5 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
 from collections import OrderedDict

 from calibre.web.feeds.news import BasicNewsRecipe, classes
@ -25,6 +27,13 @@ class Sportstar(BasicNewsRecipe):
        .author, .publish-time {font-size:small;}
    '''

+    recipe_specific_options = {
+        'issue': {
+            'short': 'Enter the Issue Number you want to download\n(Volume-Issue format)',
+            'long': 'For example, 47-16'
+        }
+    }
+
    keep_only_tags = [
        dict(name='h1', attrs={'class':'title'}),
        dict(name='h2', attrs={'class':'sub-title'}),
@ -39,10 +48,15 @@ class Sportstar(BasicNewsRecipe):
    ]

    def parse_index(self):
-        soup = self.index_to_soup('https://sportstar.thehindu.com/magazine/')
-        url = soup.find('a', href=lambda x: x and x.startswith('https://sportstar.thehindu.com/magazine/issue/'))['href']
-        self.log('Downloading Issue: ', url)
-        soup = self.index_to_soup(url)
+        d = self.recipe_specific_options.get('issue')
+        if d and isinstance(d, str):
+            issue_url = 'https://sportstar.thehindu.com/magazine/issue/vol' + d
+        else:
+            soup = self.index_to_soup('https://sportstar.thehindu.com/magazine/')
+            issue_url = soup.find('a', href=lambda x: x and x.startswith('https://sportstar.thehindu.com/magazine/issue/'))['href']
+        self.log('Downloading Issue: ', issue_url)
+
+        soup = self.index_to_soup(issue_url)

        feeds = OrderedDict()