Merge branch 'master' of https://github.com/unkn0w7n/calibre

2025-07-09 03:04:10 -04:00 · 2024-07-23 17:38:04 +05:30 · 2024-07-23 17:38:04 +05:30 · ab9cb22eac
commit ab9cb22eac
parent 02c75e7df7 2641ef766e
6 changed files with 91 additions and 15 deletions
--- a/recipes/mit_technology_review.recipe
+++ b/recipes/mit_technology_review.recipe
@ -65,9 +65,20 @@ class MitTechnologyReview(BasicNewsRecipe):
        ),
    ]
    recipe_specific_options = {
        'issue_url': {
            'short': 'The issue URL ',
            'long': 'For example, https://www.technologyreview.com/magazines/the-education-issue/',
            'default': 'http://www.technologyreview.com/magazine/'
        }
    }
    def parse_index(self):
        # for past editions, change the issue link below 
        issue = 'http://www.technologyreview.com/magazine/'
        d = self.recipe_specific_options.get('issue_url')
        if d and isinstance(d, str):
            issue = d
        soup = self.index_to_soup(issue)
        if script := soup.find('script', id='preload'):
            raw = script.contents[0]
--- a/recipes/nytimes_sub.recipe
+++ b/recipes/nytimes_sub.recipe
@ -121,9 +121,19 @@ class NewYorkTimes(BasicNewsRecipe):
                tf.write(self.get_nyt_page(url))
            return tf.name
    recipe_specific_options = {
        'date': {
            'short': 'The date of the edition to download (YYYY/MM/DD format)',
            'long': 'For example, 2024/07/16'
        }
    }
    def read_todays_paper(self):
        INDEX = 'https://www.nytimes.com/section/todayspaper'
        # INDEX = 'file:///t/raw.html'
        d = self.recipe_specific_options.get('date')
        if d and isinstance(d, str):
            INDEX = 'https://www.nytimes.com/issue/todayspaper/' + d + '/todays-new-york-times'
        return self.index_to_soup(self.get_nyt_page(INDEX, skip_wayback=True))
    def read_nyt_metadata(self):
--- a/recipes/people_daily.recipe
+++ b/recipes/people_daily.recipe
@ -24,6 +24,20 @@ class AdvancedUserRecipe1277129332(BasicNewsRecipe):
    conversion_options = {'linearize_tables': True}
    masthead_url = 'http://www.people.com.cn/img/2010wb/images/logo.gif'
    recipe_specific_options = {
        'days': {
            'short': 'Oldest article to download from this news source. In days ',
            'long': 'For example, 0.5, gives you articles from the past 12 hours',
            'default': str(oldest_article)
        }
    }
    def __init__(self, *args, **kwargs):
        BasicNewsRecipe.__init__(self, *args, **kwargs)
        d = self.recipe_specific_options.get('days')
        if d and isinstance(d, str):
            self.oldest_article = float(d)
    feeds = [
        (u'时政', u'http://www.people.com.cn/rss/politics.xml'),
        (u'国际', u'http://www.people.com.cn/rss/world.xml'),
--- a/recipes/phillosophy_now.recipe
+++ b/recipes/phillosophy_now.recipe
@ -1,3 +1,5 @@
 #!/usr/bin/env python
 # vim:fileencoding=utf-8
 from collections import OrderedDict
 from calibre import browser
@ -31,19 +33,29 @@ class PhilosophyNow(BasicNewsRecipe):
        .articleImageCaption { font-size:small; text-align:center; }
        em, blockquote { color:#202020; }
    '''
    recipe_specific_options = {
        'issue': {
            'short': 'Enter the Issue Number you want to download ',
            'long': 'For example, 136'
        }
    }
    def parse_index(self):
        soup = self.index_to_soup('https://philosophynow.org/')
        div = soup.find('div', attrs={'id': 'aside_issue_cover'})
-        url = div.find('a', href=True)['href']
+        url = 'https://philosophynow.org' + div.find('a', href=True)['href']
-        issue = div.find('div', attrs={'id':'aside_issue_text'})
+
-        if issue:
+        d = self.recipe_specific_options.get('issue')
-            self.log('Downloading issue:', self.tag_to_string(issue).strip())
+        if d and isinstance(d, str):
-            self.timefmt = ' [' + self.tag_to_string(issue.find(attrs={'id':'aside_issue_date'})) + ']'
+            url = 'https://philosophynow.org/issues/' + d
-            self.title = 'Philosophy Now ' + self.tag_to_string(issue.find(attrs={'id':'aside_issue_number'}))
+
        soup = self.index_to_soup(url)
        div = soup.find('div', attrs={'id': 'issue_contents_cover_div'})
        cov_url = div.find('img', src=True)['src']
        self.cover_url = 'https://philosophynow.org' + cov_url
-        soup = self.index_to_soup('https://philosophynow.org' + url)
+        self.timefmt = ' [' + self.tag_to_string(soup.find('h1')) + ']'
        feeds = OrderedDict()
--- a/recipes/science_x.recipe
+++ b/recipes/science_x.recipe
@ -1,3 +1,5 @@
 #!/usr/bin/env python
 # vim:fileencoding=utf-8
 '''
 https://sciencex.com/
 '''
@ -26,6 +28,20 @@ class scix(BasicNewsRecipe):
        .article__info, .article-byline, .article-main__more, .d-print-block {font-size:small; color:#404040;}
    '''
    recipe_specific_options = {
        'days': {
            'short': 'Oldest article to download from this news source. In days ',
            'long': 'For example, 0.5, gives you articles from the past 12 hours',
            'default': str(oldest_article)
        }
    }
    def __init__(self, *args, **kwargs):
        BasicNewsRecipe.__init__(self, *args, **kwargs)
        d = self.recipe_specific_options.get('days')
        if d and isinstance(d, str):
            self.oldest_article = float(d)
    resolve_internal_links = True
    remove_empty_feeds = True
--- a/recipes/scientific_american.recipe
+++ b/recipes/scientific_american.recipe
@ -59,16 +59,29 @@ class ScientificAmerican(BasicNewsRecipe):
            br.submit()
        return br
    recipe_specific_options = {
        'issue_url': {
            'short': 'The issue URL ',
            'long': (
                'For example, https://www.scientificamerican.com/issue/sa/2024/07-01/'
                '\nYou can also download special-editions, physics, health, mind magazines by pasting the URL here.'
            )
        }
    }
    def parse_index(self):
        # Get the cover, date and issue URL
-        fp_soup = self.index_to_soup("https://www.scientificamerican.com")
+        d = self.recipe_specific_options.get('issue_url')
-        curr_issue_link = fp_soup.find(**prefixed_classes('latest_issue_links-'))
+        if d and isinstance(d, str):
-        if not curr_issue_link:
+            issue = d
-            self.abort_recipe_processing("Unable to find issue link")
+        else:
-        issue_url = 'https://www.scientificamerican.com' + curr_issue_link.a["href"]
+            fp_soup = self.index_to_soup("https://www.scientificamerican.com")
-        # for past editions https://www.scientificamerican.com/archive/issues/
+            curr_issue_link = fp_soup.find(**prefixed_classes('latest_issue_links-'))
-        # issue_url = 'https://www.scientificamerican.com/issue/sa/2024/01-01/'
+            if not curr_issue_link:
-        soup = self.index_to_soup(issue_url)
+                self.abort_recipe_processing("Unable to find issue link")
            issue = 'https://www.scientificamerican.com' + curr_issue_link.a["href"]
        soup = self.index_to_soup(issue)
        script = soup.find("script", id="__DATA__")
        if not script:
            self.abort_recipe_processing("Unable to find script")