From e848f9ba3013bc63409518fe74b8fba6a97ecc39 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Fri, 26 Jul 2024 10:38:58 +0530 Subject: [PATCH] Update specific_options --- recipes/20_minutos.recipe | 14 ++++++++++++++ recipes/abc_au.recipe | 14 ++++++++++++++ recipes/abc_es.recipe | 16 +++++++++++++++- recipes/asianreviewofbooks.recipe | 14 ++++++++++++++ recipes/bbc_fast.recipe | 14 ++++++++++++++ recipes/china_economic_net.recipe | 17 +++++++++++++++++ recipes/clarin.recipe | 14 ++++++++++++++ recipes/cnn.recipe | 16 ++++++++++++++++ recipes/corriere_della_sera_en.recipe | 14 ++++++++++++++ recipes/corriere_della_sera_it.recipe | 14 ++++++++++++++ recipes/courrierinternational.recipe | 14 ++++++++++++++ recipes/el_correo.recipe | 14 ++++++++++++++ recipes/foxnews.recipe | 14 ++++++++++++++ recipes/instapaper.recipe | 17 +++++++++++++++++ recipes/japan_times.recipe | 14 ++++++++++++++ recipes/la_jornada.recipe | 14 ++++++++++++++ recipes/national_post.recipe | 14 ++++++++++++++ recipes/nhk_news.recipe | 6 ++++++ recipes/nypost.recipe | 16 +++++++++++++++- recipes/nytimes_sub.recipe | 2 +- recipes/scmp.recipe | 14 ++++++++++++++ recipes/substack.recipe | 14 ++++++++++++++ recipes/tagesspiegel.recipe | 14 ++++++++++++++ recipes/the_verge.recipe | 16 ++++++++++++++++ recipes/wired_daily.recipe | 16 ++++++++++++++++ recipes/wirtscafts_woche.recipe | 16 ++++++++++++++++ recipes/zeitde.recipe | 16 ++++++++++++++++ 27 files changed, 375 insertions(+), 3 deletions(-) diff --git a/recipes/20_minutos.recipe b/recipes/20_minutos.recipe index b1a18efda9..ba5da87954 100644 --- a/recipes/20_minutos.recipe +++ b/recipes/20_minutos.recipe @@ -54,6 +54,20 @@ class AdvancedUserRecipe1294946868(BasicNewsRecipe): preprocess_regexps = [(re.compile( r'', re.DOTALL), lambda m: '')] + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + feeds = [ (u'Portada', u'http://www.20minutos.es/rss/'), diff --git a/recipes/abc_au.recipe b/recipes/abc_au.recipe index 4f5d73bf4a..82591d1357 100644 --- a/recipes/abc_au.recipe +++ b/recipes/abc_au.recipe @@ -24,6 +24,20 @@ class ABCNews(BasicNewsRecipe): max_articles_per_feed = 100 publication_type = 'newspaper' + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + # auto_cleanup = True # enable this as a backup option if recipe stops working # use_embedded_content = False # if set to true will assume that all the article content is within the feed (i.e. won't try to fetch more data) diff --git a/recipes/abc_es.recipe b/recipes/abc_es.recipe index ea8e55c78d..3a55ac44e5 100644 --- a/recipes/abc_es.recipe +++ b/recipes/abc_es.recipe @@ -22,7 +22,7 @@ class AdvancedUserRecipe1296604369(BasicNewsRecipe): description = 'Noticias de Spain y el mundo' category = 'News,Spain,National,International,Economy' oldest_article = 2 - max_articles_per_feed = 10 + max_articles_per_feed = 25 no_stylesheets = True use_embedded_content = False @@ -31,6 +31,20 @@ class AdvancedUserRecipe1296604369(BasicNewsRecipe): remove_javascript = True language = 'es' + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + extra_css = """ p{text-align: justify; font-size: 100%} body{ text-align: left; font-size:100% } diff --git a/recipes/asianreviewofbooks.recipe b/recipes/asianreviewofbooks.recipe index 2b2a8bbc41..f9e463b545 100644 --- a/recipes/asianreviewofbooks.recipe +++ b/recipes/asianreviewofbooks.recipe @@ -32,6 +32,20 @@ class AsianReviewOfBooks(BasicNewsRecipe): img {display: block} """ + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + conversion_options = { 'comment': description, 'tags': category, diff --git a/recipes/bbc_fast.recipe b/recipes/bbc_fast.recipe index 312aa2dd7d..a53d5a6db9 100644 --- a/recipes/bbc_fast.recipe +++ b/recipes/bbc_fast.recipe @@ -151,6 +151,20 @@ class BBC(BasicNewsRecipe): ignore_duplicate_articles = {'title', 'url'} resolve_internal_links = True + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + feeds = [ ('Top Stories', 'https://feeds.bbci.co.uk/news/rss.xml'), ('Science/Environment', diff --git a/recipes/china_economic_net.recipe b/recipes/china_economic_net.recipe index 0a413f58d0..c63ee43b70 100644 --- a/recipes/china_economic_net.recipe +++ b/recipes/china_economic_net.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 from calibre.web.feeds.news import BasicNewsRecipe @@ -10,6 +12,21 @@ class AdvancedUserRecipe1278162597(BasicNewsRecipe): publisher = 'www.ce.cn - China Economic net - Beijing' description = 'China Economic Net Magazine' category = 'Economic News Magazine, Chinese, China' + + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + feeds = [ (u'Stock Market 股市', u'http://finance.ce.cn/stock/index_6304.xml'), (u'Money 理财', u'http://finance.ce.cn/money/index_6301.xml'), diff --git a/recipes/clarin.recipe b/recipes/clarin.recipe index 5e40f395f6..9cac8328ed 100644 --- a/recipes/clarin.recipe +++ b/recipes/clarin.recipe @@ -70,6 +70,20 @@ class Clarin(BasicNewsRecipe): 'comment': description, 'tags': category, 'publisher': publisher, 'language': language } + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + keep_only_tags = [ dict(name='p' , attrs={'class' : 'volanta'}), dict(name='h1' , attrs={'id': 'title'}), diff --git a/recipes/cnn.recipe b/recipes/cnn.recipe index cb6ef666b6..cdd17e4865 100644 --- a/recipes/cnn.recipe +++ b/recipes/cnn.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' ''' @@ -26,6 +28,20 @@ class CNN(BasicNewsRecipe): ] remove_tags = [classes('video-inline_carousel')] + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + feeds = [ ('Top News', 'http://rss.cnn.com/rss/cnn_topstories.rss'), ('World', 'http://rss.cnn.com/rss/cnn_world.rss'), diff --git a/recipes/corriere_della_sera_en.recipe b/recipes/corriere_della_sera_en.recipe index d1cc5a06fe..83821dc214 100644 --- a/recipes/corriere_della_sera_en.recipe +++ b/recipes/corriere_della_sera_en.recipe @@ -43,6 +43,20 @@ class ilCorriereEn(BasicNewsRecipe): basename = '/'.join(segments[:3]) + '/' + \ 'International/english/articoli/' + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + # the date has to be redone with the url structure mlist1 = ['gennaio', 'febbraio', 'marzo', 'aprile', 'maggio', 'giugno', 'luglio', 'agosto', 'settembre', 'ottobre', 'novembre', 'dicembre'] diff --git a/recipes/corriere_della_sera_it.recipe b/recipes/corriere_della_sera_it.recipe index 17a15911fc..925d3be7ae 100644 --- a/recipes/corriere_della_sera_it.recipe +++ b/recipes/corriere_della_sera_it.recipe @@ -28,6 +28,20 @@ class CorriereDellaSeraRecipe(BasicNewsRecipe): remove_tags = [dict(id='gallery')] ignore_duplicate_articles = {'title', 'url'} + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + feeds = [ ('Homepage', 'http://xml2.corriereobjects.it/rss/homepage.xml'), ('Editoriali', 'http://xml2.corriereobjects.it/rss/editoriali.xml'), diff --git a/recipes/courrierinternational.recipe b/recipes/courrierinternational.recipe index 510cbc2663..7f8bb1315f 100644 --- a/recipes/courrierinternational.recipe +++ b/recipes/courrierinternational.recipe @@ -20,6 +20,20 @@ class CourrierInternational(BasicNewsRecipe): oldest_article = 7 language = 'fr' + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + max_articles_per_feed = 50 no_stylesheets = True diff --git a/recipes/el_correo.recipe b/recipes/el_correo.recipe index a803a0ce67..158cf4cc75 100644 --- a/recipes/el_correo.recipe +++ b/recipes/el_correo.recipe @@ -88,6 +88,20 @@ class elcorreo(BasicNewsRecipe): p.name = 'div' return soup + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + def get_browser(self, *args, **kwargs): kwargs['user_agent'] = 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' br = BasicNewsRecipe.get_browser(self, *args, **kwargs) diff --git a/recipes/foxnews.recipe b/recipes/foxnews.recipe index d1833347a9..3e5c25ca6d 100644 --- a/recipes/foxnews.recipe +++ b/recipes/foxnews.recipe @@ -31,6 +31,20 @@ class FoxNews(BasicNewsRecipe): .author,.dateline{font-size: small} """ + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + conversion_options = { 'comment': description, 'tags': category, diff --git a/recipes/instapaper.recipe b/recipes/instapaper.recipe index ee298ebc18..0d91d6dcc5 100644 --- a/recipes/instapaper.recipe +++ b/recipes/instapaper.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 # Calibre recipe for Instapaper.com (Stable version) # # Homepage: http://khromov.wordpress.com/projects/instapaper-calibre-recipe/ @@ -29,6 +31,21 @@ class InstapaperRecipe(BasicNewsRecipe): encoding = 'utf-8' language = 'en' remove_javascript = True + + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + remove_tags = [ dict(name='div', attrs={'id': 'reflow'}), dict(name='div', attrs={'id': 'modal_backer'}), diff --git a/recipes/japan_times.recipe b/recipes/japan_times.recipe index b06f32a748..eb1db6493c 100644 --- a/recipes/japan_times.recipe +++ b/recipes/japan_times.recipe @@ -32,6 +32,20 @@ class JapanTimes(BasicNewsRecipe): masthead_url = "https://cdn-japantimes.com/wp-content/themes/jt_theme/library/img/japantimes-logo-tagline.png" extra_css = "body{font-family: Geneva,Arial,Helvetica,sans-serif}" + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + conversion_options = { "comment": description, "tags": category, diff --git a/recipes/la_jornada.recipe b/recipes/la_jornada.recipe index 3cbc368650..e85c815fee 100644 --- a/recipes/la_jornada.recipe +++ b/recipes/la_jornada.recipe @@ -58,6 +58,20 @@ class LaJornada_mx(BasicNewsRecipe): 'comment': description, 'tags': category, 'publisher': publisher, 'language': language } + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + preprocess_regexps = [ (re.compile(r'
(.*)

', re.DOTALL | re.IGNORECASE), lambda match: '

' + match.group(1) + '

') diff --git a/recipes/national_post.recipe b/recipes/national_post.recipe index 1c1a60dd73..ee156fda72 100644 --- a/recipes/national_post.recipe +++ b/recipes/national_post.recipe @@ -23,6 +23,20 @@ class NationalPost(BasicNewsRecipe): oldest_article = 1.5 use_embedded_content = False + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + keep_only_tags = [ dict(itemprop='headline'), classes('featured-image'), diff --git a/recipes/nhk_news.recipe b/recipes/nhk_news.recipe index 61e906aa81..13ab1fad60 100644 --- a/recipes/nhk_news.recipe +++ b/recipes/nhk_news.recipe @@ -29,3 +29,9 @@ class ReutersJa(BasicNewsRecipe): ('スポーツ', 'https://www.nhk.or.jp/rss/news/cat7.xml?format=xml'), ('文化・エンタメ', 'https://www.nhk.or.jp/rss/news/cat2.xml?format=xml') ] + + def preprocess_html(self, soup): + for img in soup.findAll('img', attrs={'data-src':True}): + img['src'] = img['data-src'] + return soup + diff --git a/recipes/nypost.recipe b/recipes/nypost.recipe index 7811b481b5..95aa130008 100644 --- a/recipes/nypost.recipe +++ b/recipes/nypost.recipe @@ -22,9 +22,23 @@ class NewYorkPost(BasicNewsRecipe): no_stylesheets = True encoding = 'utf8' use_embedded_content = False - language = 'en' + language = 'en_US' extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} ' + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language } diff --git a/recipes/nytimes_sub.recipe b/recipes/nytimes_sub.recipe index fa3b7b0a6b..1486abe7a2 100644 --- a/recipes/nytimes_sub.recipe +++ b/recipes/nytimes_sub.recipe @@ -86,7 +86,7 @@ class NewYorkTimes(BasicNewsRecipe): description = 'Today\'s New York Times' encoding = 'utf-8' __author__ = 'Kovid Goyal' - language = 'en' + language = 'en_US' ignore_duplicate_articles = {'title', 'url'} no_stylesheets = True compress_news_images = True diff --git a/recipes/scmp.recipe b/recipes/scmp.recipe index fe0b42a849..c2d49abcc8 100644 --- a/recipes/scmp.recipe +++ b/recipes/scmp.recipe @@ -28,6 +28,20 @@ class SCMP(BasicNewsRecipe): compress_news_images = True ignore_duplicate_articles = {"title", "url"} + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + # used when unable to extract article from