Merge branch 'master' of https://github.com/unkn0w7n/calibre

2025-11-24 23:35:01 -05:00 · 2024-07-23 17:38:04 +05:30 · 2024-07-23 17:38:04 +05:30 · ab9cb22eac
commit ab9cb22eac
parent 02c75e7df7 2641ef766e
6 changed files with 91 additions and 15 deletions
--- a/recipes/mit_technology_review.recipe
+++ b/recipes/mit_technology_review.recipe
@ -65,9 +65,20 @@ class MitTechnologyReview(BasicNewsRecipe):
        ),
    ]

+    recipe_specific_options = {
+        'issue_url': {
+            'short': 'The issue URL ',
+            'long': 'For example, https://www.technologyreview.com/magazines/the-education-issue/',
+            'default': 'http://www.technologyreview.com/magazine/'
+        }
+    }
+
    def parse_index(self):
        # for past editions, change the issue link below 
        issue = 'http://www.technologyreview.com/magazine/'
+        d = self.recipe_specific_options.get('issue_url')
+        if d and isinstance(d, str):
+            issue = d
        soup = self.index_to_soup(issue)
        if script := soup.find('script', id='preload'):
            raw = script.contents[0]
--- a/recipes/nytimes_sub.recipe
+++ b/recipes/nytimes_sub.recipe
@ -121,9 +121,19 @@ class NewYorkTimes(BasicNewsRecipe):
                tf.write(self.get_nyt_page(url))
            return tf.name

+    recipe_specific_options = {
+        'date': {
+            'short': 'The date of the edition to download (YYYY/MM/DD format)',
+            'long': 'For example, 2024/07/16'
+        }
+    }
+
    def read_todays_paper(self):
        INDEX = 'https://www.nytimes.com/section/todayspaper'
        # INDEX = 'file:///t/raw.html'
+        d = self.recipe_specific_options.get('date')
+        if d and isinstance(d, str):
+            INDEX = 'https://www.nytimes.com/issue/todayspaper/' + d + '/todays-new-york-times'
        return self.index_to_soup(self.get_nyt_page(INDEX, skip_wayback=True))

    def read_nyt_metadata(self):
--- a/recipes/people_daily.recipe
+++ b/recipes/people_daily.recipe
@ -24,6 +24,20 @@ class AdvancedUserRecipe1277129332(BasicNewsRecipe):
    conversion_options = {'linearize_tables': True}
    masthead_url = 'http://www.people.com.cn/img/2010wb/images/logo.gif'

+    recipe_specific_options = {
+        'days': {
+            'short': 'Oldest article to download from this news source. In days ',
+            'long': 'For example, 0.5, gives you articles from the past 12 hours',
+            'default': str(oldest_article)
+        }
+    }
+
+    def __init__(self, *args, **kwargs):
+        BasicNewsRecipe.__init__(self, *args, **kwargs)
+        d = self.recipe_specific_options.get('days')
+        if d and isinstance(d, str):
+            self.oldest_article = float(d)
+
    feeds = [
        (u'时政', u'http://www.people.com.cn/rss/politics.xml'),
        (u'国际', u'http://www.people.com.cn/rss/world.xml'),
--- a/recipes/phillosophy_now.recipe
+++ b/recipes/phillosophy_now.recipe
@ -1,3 +1,5 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
 from collections import OrderedDict

 from calibre import browser
@ -32,18 +34,28 @@ class PhilosophyNow(BasicNewsRecipe):
        em, blockquote { color:#202020; }
    '''
    
+    recipe_specific_options = {
+        'issue': {
+            'short': 'Enter the Issue Number you want to download ',
+            'long': 'For example, 136'
+        }
+    }
+
    def parse_index(self):
        soup = self.index_to_soup('https://philosophynow.org/')
        div = soup.find('div', attrs={'id': 'aside_issue_cover'})
-        url = div.find('a', href=True)['href']
-        issue = div.find('div', attrs={'id':'aside_issue_text'})
-        if issue:
-            self.log('Downloading issue:', self.tag_to_string(issue).strip())
-            self.timefmt = ' [' + self.tag_to_string(issue.find(attrs={'id':'aside_issue_date'})) + ']'
-            self.title = 'Philosophy Now ' + self.tag_to_string(issue.find(attrs={'id':'aside_issue_number'}))
+        url = 'https://philosophynow.org' + div.find('a', href=True)['href']
+
+        d = self.recipe_specific_options.get('issue')
+        if d and isinstance(d, str):
+            url = 'https://philosophynow.org/issues/' + d
+
+        soup = self.index_to_soup(url)
+
+        div = soup.find('div', attrs={'id': 'issue_contents_cover_div'})
        cov_url = div.find('img', src=True)['src']
        self.cover_url = 'https://philosophynow.org' + cov_url
-        soup = self.index_to_soup('https://philosophynow.org' + url)
+        self.timefmt = ' [' + self.tag_to_string(soup.find('h1')) + ']'

        feeds = OrderedDict()

--- a/recipes/science_x.recipe
+++ b/recipes/science_x.recipe
@ -1,3 +1,5 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
 '''
 https://sciencex.com/
 '''
@ -26,6 +28,20 @@ class scix(BasicNewsRecipe):
        .article__info, .article-byline, .article-main__more, .d-print-block {font-size:small; color:#404040;}
    '''

+    recipe_specific_options = {
+        'days': {
+            'short': 'Oldest article to download from this news source. In days ',
+            'long': 'For example, 0.5, gives you articles from the past 12 hours',
+            'default': str(oldest_article)
+        }
+    }
+
+    def __init__(self, *args, **kwargs):
+        BasicNewsRecipe.__init__(self, *args, **kwargs)
+        d = self.recipe_specific_options.get('days')
+        if d and isinstance(d, str):
+            self.oldest_article = float(d)
+
    resolve_internal_links = True
    remove_empty_feeds = True

--- a/recipes/scientific_american.recipe
+++ b/recipes/scientific_american.recipe
@ -59,16 +59,29 @@ class ScientificAmerican(BasicNewsRecipe):
            br.submit()
        return br

+    recipe_specific_options = {
+        'issue_url': {
+            'short': 'The issue URL ',
+            'long': (
+                'For example, https://www.scientificamerican.com/issue/sa/2024/07-01/'
+                '\nYou can also download special-editions, physics, health, mind magazines by pasting the URL here.'
+            )
+        }
+    }
+
    def parse_index(self):
        # Get the cover, date and issue URL
+        d = self.recipe_specific_options.get('issue_url')
+        if d and isinstance(d, str):
+            issue = d
+        else:
            fp_soup = self.index_to_soup("https://www.scientificamerican.com")
            curr_issue_link = fp_soup.find(**prefixed_classes('latest_issue_links-'))
            if not curr_issue_link:
                self.abort_recipe_processing("Unable to find issue link")
-        issue_url = 'https://www.scientificamerican.com' + curr_issue_link.a["href"]
-        # for past editions https://www.scientificamerican.com/archive/issues/
-        # issue_url = 'https://www.scientificamerican.com/issue/sa/2024/01-01/'
-        soup = self.index_to_soup(issue_url)
+            issue = 'https://www.scientificamerican.com' + curr_issue_link.a["href"]
+
+        soup = self.index_to_soup(issue)
        script = soup.find("script", id="__DATA__")
        if not script:
            self.abort_recipe_processing("Unable to find script")