Merge branch 'master' of https://github.com/unkn0w7n/calibre

2025-08-30 23:00:21 -04:00 · 2024-07-26 10:45:44 +05:30 · 2024-07-26 10:45:44 +05:30 · 21583ad1d0
commit 21583ad1d0
parent 9d174f5cac e848f9ba30
27 changed files with 375 additions and 3 deletions
--- a/recipes/20_minutos.recipe
+++ b/recipes/20_minutos.recipe
@ -54,6 +54,20 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe):
    preprocess_regexps = [(re.compile(
        r'<a href="http://estaticos.*?[0-999]px;" target="_blank">', re.DOTALL), lambda m: '')]
    recipe_specific_options = {
        'days': {
            'short': 'Oldest article to download from this news source. In days ',
            'long': 'For example, 0.5, gives you articles from the past 12 hours',
            'default': str(oldest_article)
        }
    }
    def __init__(self, *args, **kwargs):
        BasicNewsRecipe.__init__(self, *args, **kwargs)
        d = self.recipe_specific_options.get('days')
        if d and isinstance(d, str):
            self.oldest_article = float(d)
    feeds = [
    (u'Portada', u'http://www.20minutos.es/rss/'),
--- a/recipes/abc_au.recipe
+++ b/recipes/abc_au.recipe
@ -24,6 +24,20 @@ class ABCNews(BasicNewsRecipe):
    max_articles_per_feed = 100
    publication_type = 'newspaper'
    recipe_specific_options = {
        'days': {
            'short': 'Oldest article to download from this news source. In days ',
            'long': 'For example, 0.5, gives you articles from the past 12 hours',
            'default': str(oldest_article)
        }
    }
    def __init__(self, *args, **kwargs):
        BasicNewsRecipe.__init__(self, *args, **kwargs)
        d = self.recipe_specific_options.get('days')
        if d and isinstance(d, str):
            self.oldest_article = float(d)
 #    auto_cleanup   = True # enable this as a backup option if recipe stops working
 #    use_embedded_content = False # if set to true will assume that all the article content is within the feed (i.e. won't try to fetch more data)
--- a/recipes/abc_es.recipe
+++ b/recipes/abc_es.recipe
@ -22,7 +22,7 @@ class AdvancedUserRecipe1296604369(BasicNewsRecipe):
    description = 'Noticias de Spain y el mundo'
    category = 'News,Spain,National,International,Economy'
    oldest_article = 2
-    max_articles_per_feed = 10
+    max_articles_per_feed = 25
    no_stylesheets = True
    use_embedded_content = False
@ -31,6 +31,20 @@ class AdvancedUserRecipe1296604369(BasicNewsRecipe):
    remove_javascript = True
    language = 'es'
    recipe_specific_options = {
        'days': {
            'short': 'Oldest article to download from this news source. In days ',
            'long': 'For example, 0.5, gives you articles from the past 12 hours',
            'default': str(oldest_article)
        }
    }
    def __init__(self, *args, **kwargs):
        BasicNewsRecipe.__init__(self, *args, **kwargs)
        d = self.recipe_specific_options.get('days')
        if d and isinstance(d, str):
            self.oldest_article = float(d)
    extra_css             = """
                               p{text-align: justify; font-size: 100%}
                               body{ text-align: left; font-size:100% }
--- a/recipes/asianreviewofbooks.recipe
+++ b/recipes/asianreviewofbooks.recipe
@ -32,6 +32,20 @@ class AsianReviewOfBooks(BasicNewsRecipe):
                               img {display: block}
                            """
    recipe_specific_options = {
        'days': {
            'short': 'Oldest article to download from this news source. In days ',
            'long': 'For example, 0.5, gives you articles from the past 12 hours',
            'default': str(oldest_article)
        }
    }
    def __init__(self, *args, **kwargs):
        BasicNewsRecipe.__init__(self, *args, **kwargs)
        d = self.recipe_specific_options.get('days')
        if d and isinstance(d, str):
            self.oldest_article = float(d)
    conversion_options = {
        'comment': description,
        'tags': category,
--- a/recipes/bbc_fast.recipe
+++ b/recipes/bbc_fast.recipe
@ -151,6 +151,20 @@ class BBC(BasicNewsRecipe):
    ignore_duplicate_articles = {'title', 'url'}
    resolve_internal_links = True
    recipe_specific_options = {
        'days': {
            'short': 'Oldest article to download from this news source. In days ',
            'long': 'For example, 0.5, gives you articles from the past 12 hours',
            'default': str(oldest_article)
        }
    }
    def __init__(self, *args, **kwargs):
        BasicNewsRecipe.__init__(self, *args, **kwargs)
        d = self.recipe_specific_options.get('days')
        if d and isinstance(d, str):
            self.oldest_article = float(d)
    feeds = [
        ('Top Stories', 'https://feeds.bbci.co.uk/news/rss.xml'),
        ('Science/Environment',
--- a/recipes/china_economic_net.recipe
+++ b/recipes/china_economic_net.recipe
@ -1,3 +1,5 @@
 #!/usr/bin/env python
 # vim:fileencoding=utf-8
 from calibre.web.feeds.news import BasicNewsRecipe
@ -10,6 +12,21 @@ class AdvancedUserRecipe1278162597(BasicNewsRecipe):
    publisher = 'www.ce.cn - China Economic net - Beijing'
    description = 'China Economic Net Magazine'
    category = 'Economic News Magazine, Chinese, China'
    recipe_specific_options = {
        'days': {
            'short': 'Oldest article to download from this news source. In days ',
            'long': 'For example, 0.5, gives you articles from the past 12 hours',
            'default': str(oldest_article)
        }
    }
    def __init__(self, *args, **kwargs):
        BasicNewsRecipe.__init__(self, *args, **kwargs)
        d = self.recipe_specific_options.get('days')
        if d and isinstance(d, str):
            self.oldest_article = float(d)
    feeds = [
        (u'Stock Market 股市', u'http://finance.ce.cn/stock/index_6304.xml'),
        (u'Money 理财', u'http://finance.ce.cn/money/index_6301.xml'),
--- a/recipes/clarin.recipe
+++ b/recipes/clarin.recipe
@ -70,6 +70,20 @@ class Clarin(BasicNewsRecipe):
      'comment': description, 'tags': category, 'publisher': publisher, 'language': language
    }
    recipe_specific_options = {
        'days': {
            'short': 'Oldest article to download from this news source. In days ',
            'long': 'For example, 0.5, gives you articles from the past 12 hours',
            'default': str(oldest_article)
        }
    }
    def __init__(self, *args, **kwargs):
        BasicNewsRecipe.__init__(self, *args, **kwargs)
        d = self.recipe_specific_options.get('days')
        if d and isinstance(d, str):
            self.oldest_article = float(d)
    keep_only_tags = [
      dict(name='p'  , attrs={'class'   : 'volanta'}),
      dict(name='h1' , attrs={'id': 'title'}),
--- a/recipes/cnn.recipe
+++ b/recipes/cnn.recipe
@ -1,3 +1,5 @@
 #!/usr/bin/env python
 # vim:fileencoding=utf-8
 __license__ = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
@ -26,6 +28,20 @@ class CNN(BasicNewsRecipe):
    ]
    remove_tags = [classes('video-inline_carousel')]
    recipe_specific_options = {
        'days': {
            'short': 'Oldest article to download from this news source. In days ',
            'long': 'For example, 0.5, gives you articles from the past 12 hours',
            'default': str(oldest_article)
        }
    }
    def __init__(self, *args, **kwargs):
        BasicNewsRecipe.__init__(self, *args, **kwargs)
        d = self.recipe_specific_options.get('days')
        if d and isinstance(d, str):
            self.oldest_article = float(d)
    feeds = [
        ('Top News', 'http://rss.cnn.com/rss/cnn_topstories.rss'),
        ('World', 'http://rss.cnn.com/rss/cnn_world.rss'),
--- a/recipes/corriere_della_sera_en.recipe
+++ b/recipes/corriere_della_sera_en.recipe
@ -43,6 +43,20 @@ class ilCorriereEn(BasicNewsRecipe):
        basename = '/'.join(segments[:3]) + '/' + \
            'International/english/articoli/'
    recipe_specific_options = {
        'days': {
            'short': 'Oldest article to download from this news source. In days ',
            'long': 'For example, 0.5, gives you articles from the past 12 hours',
            'default': str(oldest_article)
        }
    }
    def __init__(self, *args, **kwargs):
        BasicNewsRecipe.__init__(self, *args, **kwargs)
        d = self.recipe_specific_options.get('days')
        if d and isinstance(d, str):
            self.oldest_article = float(d)
    # the date has to be redone with the url structure
        mlist1 = ['gennaio', 'febbraio', 'marzo', 'aprile', 'maggio', 'giugno',
                  'luglio', 'agosto', 'settembre', 'ottobre', 'novembre', 'dicembre']
--- a/recipes/corriere_della_sera_it.recipe
+++ b/recipes/corriere_della_sera_it.recipe
@ -28,6 +28,20 @@ class CorriereDellaSeraRecipe(BasicNewsRecipe):
    remove_tags = [dict(id='gallery')]
    ignore_duplicate_articles = {'title', 'url'}
    recipe_specific_options = {
        'days': {
            'short': 'Oldest article to download from this news source. In days ',
            'long': 'For example, 0.5, gives you articles from the past 12 hours',
            'default': str(oldest_article)
        }
    }
    def __init__(self, *args, **kwargs):
        BasicNewsRecipe.__init__(self, *args, **kwargs)
        d = self.recipe_specific_options.get('days')
        if d and isinstance(d, str):
            self.oldest_article = float(d)
    feeds = [
        ('Homepage', 'http://xml2.corriereobjects.it/rss/homepage.xml'),
        ('Editoriali', 'http://xml2.corriereobjects.it/rss/editoriali.xml'),
--- a/recipes/courrierinternational.recipe
+++ b/recipes/courrierinternational.recipe
@ -20,6 +20,20 @@ class CourrierInternational(BasicNewsRecipe):
    oldest_article = 7
    language = 'fr'
    recipe_specific_options = {
        'days': {
            'short': 'Oldest article to download from this news source. In days ',
            'long': 'For example, 0.5, gives you articles from the past 12 hours',
            'default': str(oldest_article)
        }
    }
    def __init__(self, *args, **kwargs):
        BasicNewsRecipe.__init__(self, *args, **kwargs)
        d = self.recipe_specific_options.get('days')
        if d and isinstance(d, str):
            self.oldest_article = float(d)
    max_articles_per_feed = 50
    no_stylesheets = True
--- a/recipes/el_correo.recipe
+++ b/recipes/el_correo.recipe
@ -88,6 +88,20 @@ class elcorreo(BasicNewsRecipe):
                p.name = 'div'
        return soup
    recipe_specific_options = {
        'days': {
            'short': 'Oldest article to download from this news source. In days ',
            'long': 'For example, 0.5, gives you articles from the past 12 hours',
            'default': str(oldest_article)
        }
    }
    def __init__(self, *args, **kwargs):
        BasicNewsRecipe.__init__(self, *args, **kwargs)
        d = self.recipe_specific_options.get('days')
        if d and isinstance(d, str):
            self.oldest_article = float(d)
    def get_browser(self, *args, **kwargs):
        kwargs['user_agent'] = 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
        br = BasicNewsRecipe.get_browser(self, *args, **kwargs)
--- a/recipes/foxnews.recipe
+++ b/recipes/foxnews.recipe
@ -31,6 +31,20 @@ class FoxNews(BasicNewsRecipe):
        .author,.dateline{font-size: small}
    """
    recipe_specific_options = {
        'days': {
            'short': 'Oldest article to download from this news source. In days ',
            'long': 'For example, 0.5, gives you articles from the past 12 hours',
            'default': str(oldest_article)
        }
    }
    def __init__(self, *args, **kwargs):
        BasicNewsRecipe.__init__(self, *args, **kwargs)
        d = self.recipe_specific_options.get('days')
        if d and isinstance(d, str):
            self.oldest_article = float(d)
    conversion_options = {
        'comment': description,
        'tags': category,
--- a/recipes/instapaper.recipe
+++ b/recipes/instapaper.recipe
@ -1,3 +1,5 @@
 #!/usr/bin/env python
 # vim:fileencoding=utf-8
 # Calibre recipe for Instapaper.com (Stable version)
 #
 # Homepage: http://khromov.wordpress.com/projects/instapaper-calibre-recipe/
@ -29,6 +31,21 @@ class InstapaperRecipe(BasicNewsRecipe):
    encoding = 'utf-8'
    language = 'en'
    remove_javascript = True
    recipe_specific_options = {
        'days': {
            'short': 'Oldest article to download from this news source. In days ',
            'long': 'For example, 0.5, gives you articles from the past 12 hours',
            'default': str(oldest_article)
        }
    }
    def __init__(self, *args, **kwargs):
        BasicNewsRecipe.__init__(self, *args, **kwargs)
        d = self.recipe_specific_options.get('days')
        if d and isinstance(d, str):
            self.oldest_article = float(d)
    remove_tags = [
        dict(name='div', attrs={'id': 'reflow'}),
        dict(name='div', attrs={'id': 'modal_backer'}),
--- a/recipes/japan_times.recipe
+++ b/recipes/japan_times.recipe
@ -32,6 +32,20 @@ class JapanTimes(BasicNewsRecipe):
    masthead_url = "https://cdn-japantimes.com/wp-content/themes/jt_theme/library/img/japantimes-logo-tagline.png"
    extra_css = "body{font-family: Geneva,Arial,Helvetica,sans-serif}"
    recipe_specific_options = {
        'days': {
            'short': 'Oldest article to download from this news source. In days ',
            'long': 'For example, 0.5, gives you articles from the past 12 hours',
            'default': str(oldest_article)
        }
    }
    def __init__(self, *args, **kwargs):
        BasicNewsRecipe.__init__(self, *args, **kwargs)
        d = self.recipe_specific_options.get('days')
        if d and isinstance(d, str):
            self.oldest_article = float(d)
    conversion_options = {
        "comment": description,
        "tags": category,
--- a/recipes/la_jornada.recipe
+++ b/recipes/la_jornada.recipe
@ -58,6 +58,20 @@ class LaJornada_mx(BasicNewsRecipe):
        'comment': description, 'tags': category, 'publisher': publisher, 'language': language
    }
    recipe_specific_options = {
        'days': {
            'short': 'Oldest article to download from this news source. In days ',
            'long': 'For example, 0.5, gives you articles from the past 12 hours',
            'default': str(oldest_article)
        }
    }
    def __init__(self, *args, **kwargs):
        BasicNewsRecipe.__init__(self, *args, **kwargs)
        d = self.recipe_specific_options.get('days')
        if d and isinstance(d, str):
            self.oldest_article = float(d)
    preprocess_regexps = [
        (re.compile(r'<div class="inicial">(.*)</div><p class="s-s">', re.DOTALL | re.IGNORECASE),
         lambda match: '<p class="inicial">' + match.group(1) + '</p><p class="s-s">')
--- a/recipes/national_post.recipe
+++ b/recipes/national_post.recipe
@ -23,6 +23,20 @@ class NationalPost(BasicNewsRecipe):
    oldest_article = 1.5
    use_embedded_content = False
    recipe_specific_options = {
        'days': {
            'short': 'Oldest article to download from this news source. In days ',
            'long': 'For example, 0.5, gives you articles from the past 12 hours',
            'default': str(oldest_article)
        }
    }
    def __init__(self, *args, **kwargs):
        BasicNewsRecipe.__init__(self, *args, **kwargs)
        d = self.recipe_specific_options.get('days')
        if d and isinstance(d, str):
            self.oldest_article = float(d)
    keep_only_tags = [
        dict(itemprop='headline'),
        classes('featured-image'),
--- a/recipes/nhk_news.recipe
+++ b/recipes/nhk_news.recipe
@ -29,3 +29,9 @@ class ReutersJa(BasicNewsRecipe):
        ('スポーツ', 'https://www.nhk.or.jp/rss/news/cat7.xml?format=xml'),
        ('文化・エンタメ', 'https://www.nhk.or.jp/rss/news/cat2.xml?format=xml')
    ]
    def preprocess_html(self, soup):
        for img in soup.findAll('img', attrs={'data-src':True}):
            img['src'] = img['data-src']
        return soup
--- a/recipes/nypost.recipe
+++ b/recipes/nypost.recipe
@ -22,9 +22,23 @@ class NewYorkPost(BasicNewsRecipe):
    no_stylesheets = True
    encoding = 'utf8'
    use_embedded_content = False
-    language = 'en'
+    language = 'en_US'
    extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} '
    recipe_specific_options = {
        'days': {
            'short': 'Oldest article to download from this news source. In days ',
            'long': 'For example, 0.5, gives you articles from the past 12 hours',
            'default': str(oldest_article)
        }
    }
    def __init__(self, *args, **kwargs):
        BasicNewsRecipe.__init__(self, *args, **kwargs)
        d = self.recipe_specific_options.get('days')
        if d and isinstance(d, str):
            self.oldest_article = float(d)
    conversion_options = {
        'comment': description, 'tags': category, 'publisher': publisher, 'language': language
    }
--- a/recipes/nytimes_sub.recipe
+++ b/recipes/nytimes_sub.recipe
@ -86,7 +86,7 @@ class NewYorkTimes(BasicNewsRecipe):
        description = 'Today\'s New York Times'
    encoding = 'utf-8'
    __author__ = 'Kovid Goyal'
-    language = 'en'
+    language = 'en_US'
    ignore_duplicate_articles = {'title', 'url'}
    no_stylesheets = True
    compress_news_images = True
--- a/recipes/scmp.recipe
+++ b/recipes/scmp.recipe
@ -28,6 +28,20 @@ class SCMP(BasicNewsRecipe):
    compress_news_images = True
    ignore_duplicate_articles = {"title", "url"}
    recipe_specific_options = {
        'days': {
            'short': 'Oldest article to download from this news source. In days ',
            'long': 'For example, 0.5, gives you articles from the past 12 hours',
            'default': str(oldest_article)
        }
    }
    def __init__(self, *args, **kwargs):
        BasicNewsRecipe.__init__(self, *args, **kwargs)
        d = self.recipe_specific_options.get('days')
        if d and isinstance(d, str):
            self.oldest_article = float(d)
    # used when unable to extract article from <script>, particularly in the Sports section
    remove_tags = [
        dict(
--- a/recipes/substack.recipe
+++ b/recipes/substack.recipe
@ -30,6 +30,20 @@ class Substack(BasicNewsRecipe):
    needs_subscription = 'optional'
    use_embedded_content = False
    recipe_specific_options = {
        'days': {
            'short': 'Oldest article to download from this news source. In days ',
            'long': 'For example, 0.5, gives you articles from the past 12 hours',
            'default': str(oldest_article)
        }
    }
    def __init__(self, *args, **kwargs):
        BasicNewsRecipe.__init__(self, *args, **kwargs)
        d = self.recipe_specific_options.get('days')
        if d and isinstance(d, str):
            self.oldest_article = float(d)
 # Every Substack publication has an RSS feed at https://{name}.substack.com/feed.
 # The same URL provides either all posts, or all free posts + previews of paid posts,
 # depending on whether you're logged in.
--- a/recipes/tagesspiegel.recipe
+++ b/recipes/tagesspiegel.recipe
@ -27,6 +27,20 @@ class TagesspiegelRss(BasicNewsRecipe):
    ignore_duplicate_articles = {'title', 'url'}
    remove_empty_feeds = True
    recipe_specific_options = {
        'days': {
            'short': 'Oldest article to download from this news source. In days ',
            'long': 'For example, 0.5, gives you articles from the past 12 hours',
            'default': str(oldest_article)
        }
    }
    def __init__(self, *args, **kwargs):
        BasicNewsRecipe.__init__(self, *args, **kwargs)
        d = self.recipe_specific_options.get('days')
        if d and isinstance(d, str):
            self.oldest_article = float(d)
    def get_browser(self):
        return BasicNewsRecipe.get_browser(self, verify_ssl_certificates=False)
--- a/recipes/the_verge.recipe
+++ b/recipes/the_verge.recipe
@ -1,3 +1,5 @@
 #!/usr/bin/env python
 # vim:fileencoding=utf-8
 from calibre.web.feeds.news import BasicNewsRecipe
@ -13,6 +15,20 @@ class HindustanTimes(BasicNewsRecipe):
    auto_cleanup = True
    auto_cleanup_keep = '//div[@class="story-image shadowbox entry-content-asset"]'
    recipe_specific_options = {
        'days': {
            'short': 'Oldest article to download from this news source. In days ',
            'long': 'For example, 0.5, gives you articles from the past 12 hours',
            'default': str(oldest_article)
        }
    }
    def __init__(self, *args, **kwargs):
        BasicNewsRecipe.__init__(self, *args, **kwargs)
        d = self.recipe_specific_options.get('days')
        if d and isinstance(d, str):
            self.oldest_article = float(d)
    feeds = [
        ('News',
         'http://www.theverge.com/rss/index.xml'),
--- a/recipes/wired_daily.recipe
+++ b/recipes/wired_daily.recipe
@ -1,3 +1,5 @@
 #!/usr/bin/env python
 # vim:fileencoding=utf-8
 __license__ = 'GPL v3'
 __copyright__ = '2014, Darko Miletic <darko.miletic at gmail.com>'
 '''
@ -44,6 +46,20 @@ class WiredDailyNews(BasicNewsRecipe):
        ul li{display: inline}
    """
    recipe_specific_options = {
        'days': {
            'short': 'Oldest article to download from this news source. In days ',
            'long': 'For example, 0.5, gives you articles from the past 12 hours',
            'default': str(oldest_article)
        }
    }
    def __init__(self, *args, **kwargs):
        BasicNewsRecipe.__init__(self, *args, **kwargs)
        d = self.recipe_specific_options.get('days')
        if d and isinstance(d, str):
            self.oldest_article = float(d)
    remove_tags = [
        classes('related-cne-video-component tags-component podcast_42 storyboard inset-left-component social-icons'),
        dict(name=['meta', 'link', 'aside']),
--- a/recipes/wirtscafts_woche.recipe
+++ b/recipes/wirtscafts_woche.recipe
@ -1,3 +1,5 @@
 #!/usr/bin/env python
 # vim:fileencoding=utf-8
 __license__ = 'GPL v3'
 __copyright__ = '2013, Armin Geller'
@ -34,6 +36,20 @@ class WirtschaftsWocheOnline(BasicNewsRecipe):
    cover_source = 'https://www.ikiosk.de/shop/epaper/wirtschaftswoche.html'
    masthead_url = 'http://www.wiwo.de/images/wiwo_logo/5748610/1-formatOriginal.png'
    recipe_specific_options = {
        'days': {
            'short': 'Oldest article to download from this news source. In days ',
            'long': 'For example, 0.5, gives you articles from the past 12 hours',
            'default': str(oldest_article)
        }
    }
    def __init__(self, *args, **kwargs):
        BasicNewsRecipe.__init__(self, *args, **kwargs)
        d = self.recipe_specific_options.get('days')
        if d and isinstance(d, str):
            self.oldest_article = float(d)
    def get_cover_url(self):
        cover_source_soup = self.index_to_soup(self.cover_source)
        preview_image_div = cover_source_soup.find(attrs={'class': 'gallery'})
--- a/recipes/zeitde.recipe
+++ b/recipes/zeitde.recipe
@ -1,3 +1,5 @@
 #!/usr/bin/env python
 # vim:fileencoding=utf-8
 __license__ = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
@ -55,6 +57,20 @@ class ZeitDe(BasicNewsRecipe):
        dict(name='a', class_='faq-link'),
    ]
    recipe_specific_options = {
        'days': {
            'short': 'Oldest article to download from this news source. In days ',
            'long': 'For example, 0.5, gives you articles from the past 12 hours',
            'default': str(oldest_article)
        }
    }
    def __init__(self, *args, **kwargs):
        BasicNewsRecipe.__init__(self, *args, **kwargs)
        d = self.recipe_specific_options.get('days')
        if d and isinstance(d, str):
            self.oldest_article = float(d)
    feeds = [
        (u'Startseite – Die wichtigsten Themen auf einen Blick',
         u'https://newsfeed.zeit.de/index'),