diff --git a/recipes/ancient_egypt.recipe b/recipes/ancient_egypt.recipe index c2116a00a4..c40b0aa3cc 100644 --- a/recipes/ancient_egypt.recipe +++ b/recipes/ancient_egypt.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 ''' https://ancientegyptmagazine.com ''' diff --git a/recipes/atlantic.recipe b/recipes/atlantic.recipe index 0733c23895..a8a0d7565b 100644 --- a/recipes/atlantic.recipe +++ b/recipes/atlantic.recipe @@ -81,6 +81,19 @@ class TheAtlantic(BasicNewsRecipe): language = 'en' encoding = 'utf-8' + recipe_specific_options = { + 'date': { + 'short': 'The date of the edition to download (YYYY/MM format)', + 'long': 'For example, 2024/05' + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('date') + if d and isinstance(d, str): + self.INDEX = 'https://www.theatlantic.com/magazine/toc/' + d + '/' + keep_only_tags = [ dict(itemprop=['headline']), classes( diff --git a/recipes/bbc.recipe b/recipes/bbc.recipe index 93b5eddccf..7302d02d3f 100644 --- a/recipes/bbc.recipe +++ b/recipes/bbc.recipe @@ -234,6 +234,20 @@ class BBCNews(BasicNewsRecipe): # oldest_article = 1.5 + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + # Number of simultaneous downloads. 20 is consistently working fine on the # BBC News feeds with no problems. Speeds things up from the default of 5. # If you have a lot of feeds and/or have increased oldest_article above 2 diff --git a/recipes/bloomberg-business-week.recipe b/recipes/bloomberg-business-week.recipe index d6bfa7f12d..d43bc0cd86 100644 --- a/recipes/bloomberg-business-week.recipe +++ b/recipes/bloomberg-business-week.recipe @@ -1,3 +1,6 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 + import json import time from datetime import datetime @@ -58,7 +61,7 @@ class Bloomberg(BasicNewsRecipe): remove_empty_feeds = True recipe_specific_options = { - 'date': { + 'issue': { 'short': 'The ID of the edition to download (YY_XX format)', 'long': 'For example, 24_17\nHint: Edition ID can be found at the end of its URL' } @@ -86,7 +89,7 @@ class Bloomberg(BasicNewsRecipe): inx = 'https://cdn-mobapi.bloomberg.com' sec = self.index_to_soup(inx + '/wssmobile/v1/bw/news/list?limit=1', raw=True) id = json.loads(sec)['magazines'][0]['id'] - past_edition = self.recipe_specific_options.get('date') + past_edition = self.recipe_specific_options.get('issue') if past_edition and isinstance(past_edition, str): id = past_edition edit = self.index_to_soup(inx + '/wssmobile/v1/bw/news/week/' + id, raw=True) diff --git a/recipes/business_standard_print.recipe b/recipes/business_standard_print.recipe index ff7d63ff98..a5b2187fac 100644 --- a/recipes/business_standard_print.recipe +++ b/recipes/business_standard_print.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 import json from datetime import datetime diff --git a/recipes/caravan_magazine.recipe b/recipes/caravan_magazine.recipe index f0ec508791..3de9e663f7 100644 --- a/recipes/caravan_magazine.recipe +++ b/recipes/caravan_magazine.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 import json from urllib.parse import quote, urlparse @@ -118,6 +120,13 @@ class CaravanMagazine(BasicNewsRecipe): return br return br + recipe_specific_options = { + 'date': { + 'short': 'The date of the edition to download (MM-YYYY format)', + 'long': 'For example, 07-2024' + } + } + def parse_index(self): self.log( '\n***\nif this recipe fails, report it on: ' @@ -125,9 +134,11 @@ class CaravanMagazine(BasicNewsRecipe): ) api = 'https://api.caravanmagazine.in/api/trpc/magazines.getLatestIssue' - # for past editions - # inp = json.dumps({"0":{"json":{"month":6,"year":2023}}}) - # api = 'https://api.caravanmagazine.in/api/trpc/magazines.getForMonthAndYear?batch=1&input=' + quote(inp, safe='') + d = self.recipe_specific_options.get('date') + if d and isinstance(d, str): + x = d.split('-') + inp = json.dumps({"0":{"json":{"month":int(x[0]),"year":int(x[1])}}}) + api = 'https://api.caravanmagazine.in/api/trpc/magazines.getForMonthAndYear?batch=1&input=' + quote(inp, safe='') raw = json.loads(self.index_to_soup(api, raw=True)) if isinstance(raw, list): diff --git a/recipes/deutsche_welle_bs.recipe b/recipes/deutsche_welle_bs.recipe index e638560cf8..a81ee32935 100644 --- a/recipes/deutsche_welle_bs.recipe +++ b/recipes/deutsche_welle_bs.recipe @@ -23,13 +23,27 @@ class DeutscheWelle_bs(BasicNewsRecipe): keep_only_tags = [ dict(name='article') ] - + + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + remove_tags = [ dict(name=['footer', 'source']), dict(attrs={'data-tracking-name':'sharing-icons-inline'}), classes('kicker advertisement vjs-wrapper') ] - + feeds = [ (u'Politika', u'http://rss.dw-world.de/rdf/rss-bos-pol'), (u'Evropa', u'http://rss.dw-world.de/rdf/rss-bos-eu'), diff --git a/recipes/deutsche_welle_de.recipe b/recipes/deutsche_welle_de.recipe index e9a5bf1921..500e0bb9cc 100644 --- a/recipes/deutsche_welle_de.recipe +++ b/recipes/deutsche_welle_de.recipe @@ -21,6 +21,20 @@ class DeutscheWelle(BasicNewsRecipe): dict(name='article') ] + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + remove_tags = [ dict(name=['footer', 'source']), dict(attrs={'data-tracking-name':'sharing-icons-inline'}), diff --git a/recipes/deutsche_welle_en.recipe b/recipes/deutsche_welle_en.recipe index 00679daea4..634eab9b84 100644 --- a/recipes/deutsche_welle_en.recipe +++ b/recipes/deutsche_welle_en.recipe @@ -15,7 +15,21 @@ class DeutscheWelle_en(BasicNewsRecipe): remove_empty_feeds = True ignore_duplicate_articles = {'title', 'url'} remove_attributes = ['height', 'width', 'style'] - + + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + keep_only_tags = [ dict(name='article') ] diff --git a/recipes/deutsche_welle_es.recipe b/recipes/deutsche_welle_es.recipe index b28c085e17..5076f1425d 100644 --- a/recipes/deutsche_welle_es.recipe +++ b/recipes/deutsche_welle_es.recipe @@ -17,10 +17,24 @@ class DeutscheWelle_es(BasicNewsRecipe): remove_attributes = ['height', 'width', 'style'] + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + keep_only_tags = [ dict(name='article') ] - + remove_tags = [ dict(name=['footer', 'source']), dict(attrs={'data-tracking-name':'sharing-icons-inline'}), @@ -40,7 +54,7 @@ class DeutscheWelle_es(BasicNewsRecipe): ('Conozca Alemania', 'http://rss.dw-world.de/rdf/rss-sp-con') ] - + def preprocess_html(self, soup): for img in soup.findAll('img', srcset=True): img['src'] = img['srcset'].split()[6] diff --git a/recipes/deutsche_welle_hr.recipe b/recipes/deutsche_welle_hr.recipe index 8edc24a618..bf56e72386 100644 --- a/recipes/deutsche_welle_hr.recipe +++ b/recipes/deutsche_welle_hr.recipe @@ -16,20 +16,34 @@ class DeutscheWelle_hr(BasicNewsRecipe): remove_empty_feeds = True masthead_url = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif' remove_javascript = True - + ignore_duplicate_articles = {'title', 'url'} remove_attributes = ['height', 'width', 'style'] + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + keep_only_tags = [ dict(name='article') ] - + remove_tags = [ dict(name=['footer', 'source']), dict(attrs={'data-tracking-name':'sharing-icons-inline'}), classes('kicker advertisement vjs-wrapper') ] - + feeds = [ (u'Svijet', u'http://rss.dw-world.de/rdf/rss-cro-svijet'), (u'Europa', u'http://rss.dw-world.de/rdf/rss-cro-eu'), diff --git a/recipes/deutsche_welle_pt.recipe b/recipes/deutsche_welle_pt.recipe index afafe67a28..d2e5082854 100644 --- a/recipes/deutsche_welle_pt.recipe +++ b/recipes/deutsche_welle_pt.recipe @@ -15,12 +15,25 @@ class DeutscheWelle_pt(BasicNewsRecipe): publication_type = 'newsportal' remove_empty_feeds = True masthead_url = 'http://www.dw-world.de/skins/std/channel1/pics/dw_logo1024.gif' - - + remove_javascript = True ignore_duplicate_articles = {'title', 'url'} remove_attributes = ['height', 'width', 'style'] - + + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + def preprocess_html(self, soup): for img in soup.findAll('img', srcset=True): img['src'] = img['srcset'].split()[6] @@ -29,7 +42,7 @@ class DeutscheWelle_pt(BasicNewsRecipe): keep_only_tags = [ dict(name='article') ] - + remove_tags = [ dict(name=['footer', 'source']), dict(attrs={'data-tracking-name':'sharing-icons-inline'}), diff --git a/recipes/deutsche_welle_ru.recipe b/recipes/deutsche_welle_ru.recipe index ec4e838af0..38df7884b1 100644 --- a/recipes/deutsche_welle_ru.recipe +++ b/recipes/deutsche_welle_ru.recipe @@ -16,7 +16,21 @@ class DeutscheWelle(BasicNewsRecipe): remove_empty_feeds = True ignore_duplicate_articles = {'title', 'url'} remove_attributes = ['height', 'width', 'style'] - + + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + def preprocess_html(self, soup): for img in soup.findAll('img', srcset=True): img['src'] = img['srcset'].split()[6] diff --git a/recipes/deutsche_welle_sr.recipe b/recipes/deutsche_welle_sr.recipe index 9cb2aaa482..134ea368fc 100644 --- a/recipes/deutsche_welle_sr.recipe +++ b/recipes/deutsche_welle_sr.recipe @@ -18,7 +18,21 @@ class DeutscheWelle_sr(BasicNewsRecipe): remove_javascript = True ignore_duplicate_articles = {'title', 'url'} remove_attributes = ['height', 'width', 'style'] - + + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + def preprocess_html(self, soup): for img in soup.findAll('img', srcset=True): img['src'] = img['srcset'].split()[6] @@ -27,13 +41,13 @@ class DeutscheWelle_sr(BasicNewsRecipe): keep_only_tags = [ dict(name='article') ] - + remove_tags = [ dict(name=['footer', 'source']), dict(attrs={'data-tracking-name':'sharing-icons-inline'}), classes('kicker advertisement vjs-wrapper') ] - + feeds = [ (u'Politika', u'http://rss.dw-world.de/rdf/rss-ser-pol'), (u'Srbija', u'http://rss.dw-world.de/rdf/rss-ser-pol-ser'), diff --git a/recipes/eenadu.recipe b/recipes/eenadu.recipe index ed2e254e19..5072bb3866 100644 --- a/recipes/eenadu.recipe +++ b/recipes/eenadu.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 import json import re from datetime import date, datetime, timedelta diff --git a/recipes/eenadu_ap.recipe b/recipes/eenadu_ap.recipe index 6cbbb4395e..1a36c5bbef 100644 --- a/recipes/eenadu_ap.recipe +++ b/recipes/eenadu_ap.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 import re from datetime import datetime, timedelta from urllib.parse import quote diff --git a/recipes/el_correo.recipe b/recipes/el_correo.recipe index f83ee410df..a803a0ce67 100644 --- a/recipes/el_correo.recipe +++ b/recipes/el_correo.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 ''' http://www.elcorreo.com/ ''' @@ -22,6 +24,20 @@ class elcorreo(BasicNewsRecipe): max_articles_per_feed = 25 # articles compress_news_images = True + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + extra_css = ''' .v-mdl-ath__inf, .v-mdl-ath__p--2, .v-mdl-ath__p {font-size:small; color:#404040;} .v-fc, .v-a-fig { text-align:center; font-size:small; } diff --git a/recipes/el_pais.recipe b/recipes/el_pais.recipe index 0bf6fb5ce3..ffe83b57dc 100644 --- a/recipes/el_pais.recipe +++ b/recipes/el_pais.recipe @@ -27,6 +27,20 @@ class ElPais(BasicNewsRecipe): oldest_article = 2.1 max_articles_per_feed = 25 + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + use_embedded_content = False recursion = 5 diff --git a/recipes/epoch_times.recipe b/recipes/epoch_times.recipe index cf2c0cc8b8..d0da409b37 100644 --- a/recipes/epoch_times.recipe +++ b/recipes/epoch_times.recipe @@ -19,6 +19,20 @@ class EpochTimes(BasicNewsRecipe): masthead_url = 'https://epochtimes-ny.newsmemory.com/eeLayout/epochtimes/1.0.a/images/webapp/banner.png' extra_css = '.post_caption, .text-sm, .uppercase {font-size:small;}' + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + keep_only_tags = [ dict(name='article') ] diff --git a/recipes/financial_times.recipe b/recipes/financial_times.recipe index 62384d6aed..581ca3ab79 100644 --- a/recipes/financial_times.recipe +++ b/recipes/financial_times.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 import json import re from urllib.parse import quote @@ -31,6 +33,20 @@ class ft(BasicNewsRecipe): .o-topper__topic { font-size:small; color:#5c5c5c; } ''' + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + keep_only_tags = [ classes( 'body_json o-topper__topic o-topper__headline o-topper__standfirst o-topper__visual article-info__time-byline main-image' diff --git a/recipes/foreign_policy.recipe b/recipes/foreign_policy.recipe index a702fa81e0..89d8fc5b2d 100644 --- a/recipes/foreign_policy.recipe +++ b/recipes/foreign_policy.recipe @@ -47,8 +47,20 @@ class ForeignPolicy(BasicNewsRecipe): ] remove_tags_after = [classes('post-content-main')] + recipe_specific_options = { + 'issue': { + 'short': 'Enter the Issue ID you want to download ', + 'long': 'For example, 411131563' + } + } + def parse_index(self): - soup = self.index_to_soup('https://foreignpolicy.com/the-magazine') + issue_url = 'https://foreignpolicy.com/the-magazine' + d = self.recipe_specific_options.get('issue') + if d and isinstance(d, str): + issue_url = issue_url + '/?issue_id=' + d + + soup = self.index_to_soup(issue_url) img = soup.find('img', attrs={'src': lambda x: x and '-cover' in x}) if img: self.cover_url = img['src'].split('?')[0] + '?w=800?quality=90' diff --git a/recipes/foreignaffairs.recipe b/recipes/foreignaffairs.recipe index dd0888c5b2..405d598fc7 100644 --- a/recipes/foreignaffairs.recipe +++ b/recipes/foreignaffairs.recipe @@ -127,6 +127,13 @@ class ForeignAffairsRecipe(BasicNewsRecipe): INDEX = 'https://www.foreignaffairs.com/magazine' + recipe_specific_options = { + 'issue': { + 'short': 'Enter the Issue Number you want to download ', + 'long': 'For example, 2024/103/1' + } + } + keep_only_tags = [ classes('article-header article-body article-lead-image article-body-text'), ] @@ -140,6 +147,10 @@ class ForeignAffairsRecipe(BasicNewsRecipe): remove_empty_feeds = True def parse_index(self): + d = self.recipe_specific_options.get('issue') + if d and isinstance(d, str): + self.INDEX = 'https://www.foreignaffairs.com/issues/' + d + soup = self.index_to_soup(self.INDEX) # get dates date = re.split(r'\s\|\s', self.tag_to_string( diff --git a/recipes/frontline.recipe b/recipes/frontline.recipe index fb23018f18..0655f5745a 100644 --- a/recipes/frontline.recipe +++ b/recipes/frontline.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 from collections import defaultdict from calibre.web.feeds.news import BasicNewsRecipe, classes @@ -53,8 +55,20 @@ class Frontline(BasicNewsRecipe): src.extract() return soup + recipe_specific_options = { + 'issue': { + 'short': 'Enter the Issue Number you want to download\n(Volume-Issue format)', + 'long': 'For example, 41-12' + } + } + def parse_index(self): - soup = self.index_to_soup('https://frontline.thehindu.com/current-issue/') + issue_url = 'https://frontline.thehindu.com/current-issue/' + d = self.recipe_specific_options.get('issue') + if d and isinstance(d, str): + issue_url = 'https://frontline.thehindu.com/magazine/issue/vol' + d + + soup = self.index_to_soup(issue_url) if cover := soup.find('div', attrs={'class':'magazine'}): self.cover_url = cover.find(**classes('sptar-image')).img['data-original'].replace('_320', '_1200') @@ -82,4 +96,4 @@ class Frontline(BasicNewsRecipe): continue self.log(section, '\n\t', title, '\n\t', desc, '\n\t\t', url) feeds_dict[section].append({"title": title, "url": url, "description": desc}) - return [(section, articles) for section, articles in feeds_dict.items()] \ No newline at end of file + return [(section, articles) for section, articles in feeds_dict.items()] diff --git a/recipes/globaltimes.recipe b/recipes/globaltimes.recipe index e40bab6b40..e7d808c1ad 100644 --- a/recipes/globaltimes.recipe +++ b/recipes/globaltimes.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 from datetime import datetime, timedelta, timezone from calibre.utils.date import parse_date @@ -29,6 +31,20 @@ class GlobalTimes(BasicNewsRecipe): blockquote, em {color:#202020;} ''' + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + keep_only_tags = [ classes( 'article_column article_title author_share_left article_content' diff --git a/recipes/harpers.recipe b/recipes/harpers.recipe index be5a87086e..ee83add22c 100644 --- a/recipes/harpers.recipe +++ b/recipes/harpers.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 ''' harpers.org ''' @@ -70,7 +72,7 @@ class Harpers(BasicNewsRecipe): edition = self.recipe_specific_options.get('date') if edition and isinstance(edition, str): url = 'https://harpers.org/archive/' + edition - self.timefmt = ' [' +edition + ']' + self.timefmt = ' [' + edition + ']' soup = self.index_to_soup(url) cov_div = soup.find('div', attrs={'class':'issue-cover'}) diff --git a/recipes/himal_southasian.recipe b/recipes/himal_southasian.recipe index 5025806326..85188e0af9 100644 --- a/recipes/himal_southasian.recipe +++ b/recipes/himal_southasian.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 import json from calibre.web.feeds.news import BasicNewsRecipe @@ -37,6 +39,20 @@ class himal(BasicNewsRecipe): resolve_internal_links = True oldest_article = 30 # days + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + extra_css = ''' .cap, .auth {font-size:small;} em, blockquote {color:#404040;} diff --git a/recipes/hindu.recipe b/recipes/hindu.recipe index 8b81d0b405..4dbb83bed2 100644 --- a/recipes/hindu.recipe +++ b/recipes/hindu.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 import json import re from collections import defaultdict @@ -32,7 +34,12 @@ class TheHindu(BasicNewsRecipe): recipe_specific_options = { 'location': { 'short': 'The name of the local edition', - 'long': 'If The Hindu is available in your local town/city,\nset this to your location, for example, hyderabad', + 'long': ('If The Hindu is available in your local town/city,\n' + 'set this to your location, for example, hyderabad\n' + 'Available Editions: bengaluru, chennai, coimbatore, delhi, ' + 'erode, hyderabad, international, kochi, kolkata,\n' + 'kozhikode, madurai, mangalore, mumbai, thiruvananthapuram, ' + 'tiruchirapalli, vijayawada, visakhapatnam'), 'default': 'international' }, 'date': { diff --git a/recipes/hindufeeds.recipe b/recipes/hindufeeds.recipe index 113ac6350f..143cdcdd6a 100644 --- a/recipes/hindufeeds.recipe +++ b/recipes/hindufeeds.recipe @@ -1,5 +1,5 @@ -from datetime import date - +#!/usr/bin/env python +# vim:fileencoding=utf-8 from calibre.web.feeds.news import BasicNewsRecipe, classes @@ -24,6 +24,20 @@ class TheHindufeeds(BasicNewsRecipe): .italic {font-style:italic; color:#202020;} ''' + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + ignore_duplicate_articles = {'url'} keep_only_tags = [ @@ -60,14 +74,9 @@ class TheHindufeeds(BasicNewsRecipe): src.extract() return soup - def __init__(self, *args, **kwargs): - BasicNewsRecipe.__init__(self, *args, **kwargs) - if self.output_profile.short_name.startswith('kindle'): - self.title = 'The Hindu (Feeds) ' + date.today().strftime('%b %d, %Y') - def get_cover_url(self): - soup = self.index_to_soup('https://www.thehindu.com/todays-paper/') + soup = self.index_to_soup('https://www.thehindu.com/todays-paper/') if cover := soup.find(attrs={'class':'hindu-ad'}): return cover.img['src'] diff --git a/recipes/hindustan_times_print.recipe b/recipes/hindustan_times_print.recipe index a8eee1e21a..f6bc688aa1 100644 --- a/recipes/hindustan_times_print.recipe +++ b/recipes/hindustan_times_print.recipe @@ -1,21 +1,11 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 import json from collections import defaultdict from datetime import date from calibre.web.feeds.news import BasicNewsRecipe -# figure out your local_edition from the fetch news log of this recipe -local_edition = 'Delhi' - -today = date.today().strftime('%d/%m/%Y') - -# for older edition, change today -# today = '22/12/2023' - -day, month, year = (int(x) for x in today.split('/')) -dt = date(year, month, day) -today = today.replace('/', '%2F') - index = 'https://epaper.hindustantimes.com' class ht(BasicNewsRecipe): @@ -23,28 +13,51 @@ class ht(BasicNewsRecipe): language = 'en_IN' __author__ = 'unkn0wn' masthead_url = 'https://www.htmedia.in/wp-content/uploads/2020/08/HT-dot-com-logo-product.png' - timefmt = ' [' + dt.strftime('%b %d, %Y') + ']' description = 'Articles from the Hindustan Times epaper, digital edition' encoding = 'utf-8' delay = 1 ignore_duplicate_articles = {'title'} - def __init__(self, *args, **kwargs): - BasicNewsRecipe.__init__(self, *args, **kwargs) - if self.output_profile.short_name.startswith('kindle'): - self.title = 'HT Print Edition ' + dt.strftime('%b %d, %Y') - extra_css = ''' .cap { text-align:center; font-size:small; } img { display:block; margin:0 auto; } ''' - def parse_index(self): + recipe_specific_options = { + 'location': { + 'short': 'The name of the local edition', + 'long': ('If The Hindustan Times is available in your local town/city,\n' + 'set this to your location, for example, Delhi\nAvailable Editions:' + 'Delhi, Mumbai, Chandigarh, Lucknow, Patna, Bengaluru, Pune, Gurgaon,' + 'Ludhiana, Rajasthan, Amritsar,\nEast UP, Haryana, Jammu, Navi Mumbai,' + 'Noida, Punjab, Ranchi, Thane, Uttarakhand, West UP'), + 'default': 'Delhi' + }, + 'date': { + 'short': 'The date of the edition to download (DD/MM/YYYY format)', + 'long': 'For example, 22/12/2023' + } + } + def parse_index(self): self.log( '\n***\nif this recipe fails, report it on: ' 'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n' ) + local_edition = 'Delhi' + d = self.recipe_specific_options.get('location') + if d and isinstance(d, str): + local_edition = d + + today = date.today().strftime('%d/%m/%Y') + + p = self.recipe_specific_options.get('date') + if p and isinstance(p, str): + today = p + + self.timefmt = ' [%s]' % today + + today = today.replace('/', '%2F') get_edition = index + '/Home/GetEditionSupplementHierarchy?EditionDate=' + today edi_data = json.loads(self.index_to_soup(get_edition, raw=True)) @@ -56,7 +69,7 @@ class ht(BasicNewsRecipe): if edi['EditionName'] == local_edition: edi_name = edi['EditionName'] edi_id = str(edi['EditionId']) - self.log('Downloading', edi_name, 'Edition') + self.log('Downloading', edi_name, 'Edition', self.timefmt) url = index + '/Home/GetAllpages?editionid=' + edi_id + '&editiondate=' + today main_data = json.loads(self.index_to_soup(url, raw=True)) diff --git a/recipes/india_today.recipe b/recipes/india_today.recipe index a2b6ee317d..c8e648e93b 100644 --- a/recipes/india_today.recipe +++ b/recipes/india_today.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 from calibre.ebooks.BeautifulSoup import Tag from calibre.web.feeds.news import BasicNewsRecipe @@ -47,17 +49,30 @@ class IndiaToday(BasicNewsRecipe): def preprocess_raw_html(self, raw_html, url): return raw_html.replace('—', '--') + recipe_specific_options = { + 'date': { + 'short': 'The date of the edition to download (DD-MM-YYYY format)', + 'long': 'For example, 22-07-2024' + } + } + def get_cover_url(self): - soup = self.index_to_soup( - 'https://www.readwhere.com/magazine/the-india-today-group/India-Today/1154' - ) - for citem in soup.findAll( - 'meta', content=lambda s: s and s.endswith('/magazine/300/new') - ): - return citem['content'].replace('300', '600') + d = self.recipe_specific_options.get('date') + if not (d and isinstance(d, str)): + soup = self.index_to_soup( + 'https://www.readwhere.com/magazine/the-india-today-group/India-Today/1154' + ) + for citem in soup.findAll( + 'meta', content=lambda s: s and s.endswith('/magazine/300/new') + ): + return citem['content'].replace('300', '600') def parse_index(self): - soup = self.index_to_soup('https://www.indiatoday.in/magazine') + issue = 'https://www.indiatoday.in/magazine' + d = self.recipe_specific_options.get('date') + if d and isinstance(d, str): + issue = issue + '/' + d + soup = self.index_to_soup(issue) section = None sections = {} diff --git a/recipes/le_monde.recipe b/recipes/le_monde.recipe index f16c39c6ac..f815b1cc38 100644 --- a/recipes/le_monde.recipe +++ b/recipes/le_monde.recipe @@ -36,6 +36,20 @@ class LeMonde(BasicNewsRecipe): 'publisher': publisher } + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + masthead_url = 'http://upload.wikimedia.org/wikipedia/commons/thumb/5/54/Le_monde_logo.svg/800px-Le_monde_logo.svg.png' feeds = [ diff --git a/recipes/le_monde_diplomatique_fr.recipe b/recipes/le_monde_diplomatique_fr.recipe index ec14aeaf1d..556af30b61 100644 --- a/recipes/le_monde_diplomatique_fr.recipe +++ b/recipes/le_monde_diplomatique_fr.recipe @@ -1,3 +1,4 @@ +#!/usr/bin/env python # vim:fileencoding=utf-8 from __future__ import unicode_literals @@ -33,6 +34,20 @@ class LeMondeDiplomatiqueSiteWeb(BasicNewsRecipe): timefmt = ' [%d %b %Y]' no_stylesheets = True + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + feeds = [(u'Blogs', u'http://blog.mondediplo.net/spip.php?page=backend'), (u'Archives', u'http://www.monde-diplomatique.fr/rss/')] diff --git a/recipes/lex_fridman_podcast.recipe b/recipes/lex_fridman_podcast.recipe index 5fc387f036..8dfb25a5c2 100644 --- a/recipes/lex_fridman_podcast.recipe +++ b/recipes/lex_fridman_podcast.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 from calibre.web.feeds.news import BasicNewsRecipe diff --git a/recipes/liberation.recipe b/recipes/liberation.recipe index b674d5922e..a8d072ad7a 100644 --- a/recipes/liberation.recipe +++ b/recipes/liberation.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 ''' liberation.fr ''' @@ -81,7 +83,7 @@ class Liberation(BasicNewsRecipe): 'les mutations des sociétés et des cultures.' ) language = 'fr' - oldest_article = 1 + oldest_article = 1.15 remove_empty_feeds = True articles_are_obfuscated = True ignore_duplicate_articles = {'title', 'url'} @@ -94,6 +96,20 @@ class Liberation(BasicNewsRecipe): blockquote { color:#202020; } ''' + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + feeds = [ ('A la une', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/collection/accueil-une/?outputType=xml'), ('Politique', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/politique/?outputType=xml'), diff --git a/recipes/livemint.recipe b/recipes/livemint.recipe index 8eb4586b06..60e7cf6fae 100644 --- a/recipes/livemint.recipe +++ b/recipes/livemint.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 import json import re from datetime import date @@ -19,15 +21,22 @@ class LiveMint(BasicNewsRecipe): remove_attributes = ['style', 'height', 'width'] masthead_url = 'https://images.livemint.com/static/livemint-logo-v1.svg' + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } remove_empty_feeds = True resolve_internal_links = True + def __init__(self, *args, **kwargs): BasicNewsRecipe.__init__(self, *args, **kwargs) - if self.output_profile.short_name.startswith('kindle'): - self.title = 'Mint | ' + date.today().strftime('%b %d, %Y') - if is_saturday: - self.title = 'Mint Lounge | ' + date.today().strftime('%b %d, %Y') + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) def get_cover_url(self): today = date.today().strftime('%d/%m/%Y') @@ -40,7 +49,7 @@ class LiveMint(BasicNewsRecipe): return cov['HighResolution'] if is_saturday: - + title = 'Mint Lounge' masthead_url = 'https://lifestyle.livemint.com/mintlounge/static-images/lounge-logo.svg' oldest_article = 6.5 # days diff --git a/recipes/military_history.recipe b/recipes/military_history.recipe index 09d330a09b..9d04d65146 100644 --- a/recipes/military_history.recipe +++ b/recipes/military_history.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 ''' https://www.military-history.org/ ''' diff --git a/recipes/minerva_magazine.recipe b/recipes/minerva_magazine.recipe index 203d9b3520..57037bd662 100644 --- a/recipes/minerva_magazine.recipe +++ b/recipes/minerva_magazine.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 ''' https://minervamagazine.com/ ''' diff --git a/recipes/mit_technology_review.recipe b/recipes/mit_technology_review.recipe index 364fdc02a9..d9f00e42c4 100644 --- a/recipes/mit_technology_review.recipe +++ b/recipes/mit_technology_review.recipe @@ -65,9 +65,20 @@ class MitTechnologyReview(BasicNewsRecipe): ), ] + recipe_specific_options = { + 'issue_url': { + 'short': 'The issue URL ', + 'long': 'For example, https://www.technologyreview.com/magazines/the-education-issue/', + 'default': 'http://www.technologyreview.com/magazine/' + } + } + def parse_index(self): # for past editions, change the issue link below issue = 'http://www.technologyreview.com/magazine/' + d = self.recipe_specific_options.get('issue_url') + if d and isinstance(d, str): + issue = d soup = self.index_to_soup(issue) if script := soup.find('script', id='preload'): raw = script.contents[0] diff --git a/recipes/natgeomag.recipe b/recipes/natgeomag.recipe index c95b239d8f..70724ab4e5 100644 --- a/recipes/natgeomag.recipe +++ b/recipes/natgeomag.recipe @@ -192,7 +192,6 @@ class NatGeo(BasicNewsRecipe): # self.cover_url = png[0] + '?w=1000&h=1000' self.cover_url = soup.find('meta', attrs={'property':'og:image'})['content'].split('?')[0] + '?w=1000' - name = soup.find(attrs={'class':lambda x: x and 'Header__Description' in x.split()}) # self.title = 'National Geographic ' + self.tag_to_string(name) ans = {} if photoart := soup.find(attrs={'class':lambda x: x and 'BgImagePromo__Container__Text__Link' in x.split()}): diff --git a/recipes/nytimes_sub.recipe b/recipes/nytimes_sub.recipe index 93b65b4a08..fa3b7b0a6b 100644 --- a/recipes/nytimes_sub.recipe +++ b/recipes/nytimes_sub.recipe @@ -121,9 +121,19 @@ class NewYorkTimes(BasicNewsRecipe): tf.write(self.get_nyt_page(url)) return tf.name + recipe_specific_options = { + 'date': { + 'short': 'The date of the edition to download (YYYY/MM/DD format)', + 'long': 'For example, 2024/07/16' + } + } + def read_todays_paper(self): INDEX = 'https://www.nytimes.com/section/todayspaper' # INDEX = 'file:///t/raw.html' + d = self.recipe_specific_options.get('date') + if d and isinstance(d, str): + INDEX = 'https://www.nytimes.com/issue/todayspaper/' + d + '/todays-new-york-times' return self.index_to_soup(self.get_nyt_page(INDEX, skip_wayback=True)) def read_nyt_metadata(self): diff --git a/recipes/outlook_india.recipe b/recipes/outlook_india.recipe index 9a3e8c0e10..b91b6ce4e8 100644 --- a/recipes/outlook_india.recipe +++ b/recipes/outlook_india.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 from calibre.web.feeds.news import BasicNewsRecipe, classes @@ -34,6 +36,13 @@ class outlook(BasicNewsRecipe): classes('ads-box info-img-absolute mobile-info-id story-dec-time-mobile sb-also-read ads-box1') ] + recipe_specific_options = { + 'date': { + 'short': 'The date of the edition to download (DD-Month-YYYY format)', + 'long': 'For example, 10-june-2024' + } + } + def get_browser(self): return BasicNewsRecipe.get_browser(self, user_agent='common_words/based', verify_ssl_certificates=False) @@ -42,14 +51,27 @@ class outlook(BasicNewsRecipe): '\n***\nif this recipe fails, report it on: ' 'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n' ) - soup = self.index_to_soup('https://www.outlookindia.com/magazine') - a = soup.find('a', attrs={'aria-label':'magazine-cover-image'}) - self.cover_url = a.img['src'].split('?')[0] - url = a['href'] - self.description = self.tag_to_string(a) - self.timefmt = ' [' + self.tag_to_string(a.div).strip() + ']' - self.log('Downloading issue:', url, self.timefmt) + + d = self.recipe_specific_options.get('date') + if d and isinstance(d, str): + url = 'https://www.outlookindia.com/magazine/' + d + else: + soup = self.index_to_soup('https://www.outlookindia.com/magazine') + a = soup.find('a', attrs={'aria-label':'magazine-cover-image'}) + url = a['href'] + + self.log('Downloading issue:', url) + soup = self.index_to_soup(url) + cov = soup.find(attrs={'aria-label':'magazine-cover-image'}) + self.cover_url = cov.img['src'].split('?')[0] + summ = soup.find(attrs={'data-test-id':'magazine-summary'}) + if summ: + self.description = self.tag_to_string(summ) + tme = soup.find(attrs={'class':'arr__timeago'}) + if tme: + self.timefmt = ' [' + self.tag_to_string(tme).strip() + ']' + ans = [] diff --git a/recipes/people_daily.recipe b/recipes/people_daily.recipe index 26881d67cb..4ad18a436c 100644 --- a/recipes/people_daily.recipe +++ b/recipes/people_daily.recipe @@ -24,6 +24,20 @@ class AdvancedUserRecipe1277129332(BasicNewsRecipe): conversion_options = {'linearize_tables': True} masthead_url = 'http://www.people.com.cn/img/2010wb/images/logo.gif' + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + feeds = [ (u'时政', u'http://www.people.com.cn/rss/politics.xml'), (u'国际', u'http://www.people.com.cn/rss/world.xml'), diff --git a/recipes/phillosophy_now.recipe b/recipes/phillosophy_now.recipe index 2a46dcf0eb..14d044e489 100644 --- a/recipes/phillosophy_now.recipe +++ b/recipes/phillosophy_now.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 from collections import OrderedDict from calibre import browser @@ -31,19 +33,29 @@ class PhilosophyNow(BasicNewsRecipe): .articleImageCaption { font-size:small; text-align:center; } em, blockquote { color:#202020; } ''' + + recipe_specific_options = { + 'issue': { + 'short': 'Enter the Issue Number you want to download ', + 'long': 'For example, 136' + } + } def parse_index(self): soup = self.index_to_soup('https://philosophynow.org/') div = soup.find('div', attrs={'id': 'aside_issue_cover'}) - url = div.find('a', href=True)['href'] - issue = div.find('div', attrs={'id':'aside_issue_text'}) - if issue: - self.log('Downloading issue:', self.tag_to_string(issue).strip()) - self.timefmt = ' [' + self.tag_to_string(issue.find(attrs={'id':'aside_issue_date'})) + ']' - self.title = 'Philosophy Now ' + self.tag_to_string(issue.find(attrs={'id':'aside_issue_number'})) + url = 'https://philosophynow.org' + div.find('a', href=True)['href'] + + d = self.recipe_specific_options.get('issue') + if d and isinstance(d, str): + url = 'https://philosophynow.org/issues/' + d + + soup = self.index_to_soup(url) + + div = soup.find('div', attrs={'id': 'issue_contents_cover_div'}) cov_url = div.find('img', src=True)['src'] self.cover_url = 'https://philosophynow.org' + cov_url - soup = self.index_to_soup('https://philosophynow.org' + url) + self.timefmt = ' [' + self.tag_to_string(soup.find('h1')) + ']' feeds = OrderedDict() diff --git a/recipes/poliitico_eu.recipe b/recipes/poliitico_eu.recipe index a5b602173e..44f4e9f484 100644 --- a/recipes/poliitico_eu.recipe +++ b/recipes/poliitico_eu.recipe @@ -27,6 +27,20 @@ class Politico(BasicNewsRecipe): encoding = 'UTF-8' language = 'en' + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + remove_empty_feeds = True ignore_duplicate_articles = ['url'] diff --git a/recipes/reuters.recipe b/recipes/reuters.recipe index e22b1a61f4..7fdbbbb997 100644 --- a/recipes/reuters.recipe +++ b/recipes/reuters.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 import json import time from datetime import datetime, timedelta diff --git a/recipes/rtnews.recipe b/recipes/rtnews.recipe index a9a7fcef7c..b4e99da434 100644 --- a/recipes/rtnews.recipe +++ b/recipes/rtnews.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 ''' rt.com ''' @@ -26,6 +28,20 @@ class RT_eng(BasicNewsRecipe): remove_attributes = ['height', 'width', 'style'] publication_type = 'newsportal' + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + extra_css = ''' img {display:block; margin:0 auto;} em { color:#202020; } diff --git a/recipes/science_x.recipe b/recipes/science_x.recipe index 9faaaa707b..1ae4dc8b07 100644 --- a/recipes/science_x.recipe +++ b/recipes/science_x.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 ''' https://sciencex.com/ ''' @@ -26,6 +28,20 @@ class scix(BasicNewsRecipe): .article__info, .article-byline, .article-main__more, .d-print-block {font-size:small; color:#404040;} ''' + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + resolve_internal_links = True remove_empty_feeds = True diff --git a/recipes/scientific_american.recipe b/recipes/scientific_american.recipe index bf13b4b945..a5bbd7d609 100644 --- a/recipes/scientific_american.recipe +++ b/recipes/scientific_american.recipe @@ -59,16 +59,29 @@ class ScientificAmerican(BasicNewsRecipe): br.submit() return br + recipe_specific_options = { + 'issue_url': { + 'short': 'The issue URL ', + 'long': ( + 'For example, https://www.scientificamerican.com/issue/sa/2024/07-01/' + '\nYou can also download special-editions, physics, health, mind magazines by pasting the URL here.' + ) + } + } + def parse_index(self): # Get the cover, date and issue URL - fp_soup = self.index_to_soup("https://www.scientificamerican.com") - curr_issue_link = fp_soup.find(**prefixed_classes('latest_issue_links-')) - if not curr_issue_link: - self.abort_recipe_processing("Unable to find issue link") - issue_url = 'https://www.scientificamerican.com' + curr_issue_link.a["href"] - # for past editions https://www.scientificamerican.com/archive/issues/ - # issue_url = 'https://www.scientificamerican.com/issue/sa/2024/01-01/' - soup = self.index_to_soup(issue_url) + d = self.recipe_specific_options.get('issue_url') + if d and isinstance(d, str): + issue = d + else: + fp_soup = self.index_to_soup("https://www.scientificamerican.com") + curr_issue_link = fp_soup.find(**prefixed_classes('latest_issue_links-')) + if not curr_issue_link: + self.abort_recipe_processing("Unable to find issue link") + issue = 'https://www.scientificamerican.com' + curr_issue_link.a["href"] + + soup = self.index_to_soup(issue) script = soup.find("script", id="__DATA__") if not script: self.abort_recipe_processing("Unable to find script") diff --git a/recipes/spectator_magazine.recipe b/recipes/spectator_magazine.recipe index f9003f7de2..c9bc29c652 100644 --- a/recipes/spectator_magazine.recipe +++ b/recipes/spectator_magazine.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 from calibre.web.feeds.news import BasicNewsRecipe, classes @@ -56,8 +58,19 @@ class spectator(BasicNewsRecipe): ] return br + recipe_specific_options = { + 'date': { + 'short': 'The date of the edition to download (DD-MM-YYYY format)', + 'long': 'For example, 20-07-2024' + } + } + def parse_index(self): - soup = self.index_to_soup('https://www.spectator.co.uk/magazine') + index = 'https://www.spectator.co.uk/magazine' + d = self.recipe_specific_options.get('date') + if d and isinstance(d, str): + index = index + '/' + d + '/' + soup = self.index_to_soup(index) self.cover_url = soup.find(**classes( 'magazine-header__container')).img['src'].split('?')[0] issue = self.tag_to_string(soup.find(**classes( diff --git a/recipes/the_week.recipe b/recipes/the_week.recipe index 040c7174af..0e898bd3a0 100644 --- a/recipes/the_week.recipe +++ b/recipes/the_week.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 from datetime import datetime from calibre.web.feeds.news import BasicNewsRecipe, classes @@ -29,17 +31,32 @@ class TheWeek(BasicNewsRecipe): .article-info { font-size:small; } ''' + recipe_specific_options = { + 'date': { + 'short': 'The date of the edition to download (YYYY.MM.DD format)', + 'long': 'For example, 2024.06.30' + } + } + def get_cover_url(self): - soup = self.index_to_soup( - 'https://www.magzter.com/IN/Malayala_Manorama/THE_WEEK/Business/' - ) - for citem in soup.findAll( - 'meta', content=lambda s: s and s.endswith('view/3.jpg') - ): - return citem['content'] + d = self.recipe_specific_options.get('date') + if not (d and isinstance(d, str)): + soup = self.index_to_soup( + 'https://www.magzter.com/IN/Malayala_Manorama/THE_WEEK/Business/' + ) + for citem in soup.findAll( + 'meta', content=lambda s: s and s.endswith('view/3.jpg') + ): + return citem['content'] def parse_index(self): - soup = self.index_to_soup('https://www.theweek.in/theweek.html') + issue = 'https://www.theweek.in/theweek.html' + + d = self.recipe_specific_options.get('date') + if d and isinstance(d, str): + issue = 'https://www.theweek.in/theweek.' + d + '.html' + + soup = self.index_to_soup(issue) ans = [] d = datetime.today() diff --git a/recipes/tls_mag.recipe b/recipes/tls_mag.recipe index c8f8b239b6..9cd10dcf51 100644 --- a/recipes/tls_mag.recipe +++ b/recipes/tls_mag.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 import json import re diff --git a/recipes/wash_post.recipe b/recipes/wash_post.recipe index 6460104be8..73d869a905 100644 --- a/recipes/wash_post.recipe +++ b/recipes/wash_post.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 __license__ = 'GPL v3' __copyright__ = '2011, Darko Miletic ' ''' @@ -28,6 +30,20 @@ class TheWashingtonPost(BasicNewsRecipe): publication_type = 'newspaper' remove_attributes = ['style', 'width', 'height'] + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + extra_css = ''' .img { text-align:center; font-size:small; } .auth { font-weight:bold; font-size:small; } diff --git a/recipes/world_archeology.recipe b/recipes/world_archeology.recipe index 31d467a3fd..008f2ff1bf 100644 --- a/recipes/world_archeology.recipe +++ b/recipes/world_archeology.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 ''' https://www.world-archaeology.com ''' diff --git a/recipes/wsj.recipe b/recipes/wsj.recipe index 35587f971d..4cad8727f9 100644 --- a/recipes/wsj.recipe +++ b/recipes/wsj.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 import json import time from datetime import datetime, timedelta diff --git a/recipes/wsj_mag.recipe b/recipes/wsj_mag.recipe index 29712902f1..b09aeccdb1 100644 --- a/recipes/wsj_mag.recipe +++ b/recipes/wsj_mag.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 import json import time from datetime import datetime, timedelta diff --git a/recipes/wsj_news.recipe b/recipes/wsj_news.recipe index c5f3ef5d0b..3c8912b7de 100644 --- a/recipes/wsj_news.recipe +++ b/recipes/wsj_news.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 import json import time from datetime import datetime, timedelta @@ -38,7 +40,21 @@ class WSJ(BasicNewsRecipe): resolve_internal_links = True ignore_duplicate_articles = {'url', 'title'} remove_empty_feeds = True - oldest_article = 1 # days + oldest_article = 1.2 # days + + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) extra_css = ''' #subhed, em { font-style:italic; color:#202020; }