diff --git a/recipes/india_today.recipe b/recipes/india_today.recipe index a2b6ee317d..3b616de01c 100644 --- a/recipes/india_today.recipe +++ b/recipes/india_today.recipe @@ -47,17 +47,30 @@ class IndiaToday(BasicNewsRecipe): def preprocess_raw_html(self, raw_html, url): return raw_html.replace('—', '--') + recipe_specific_options = { + 'date': { + 'short': 'The date of the edition to download (DD-MM-YYYY format)', + 'long': 'For example, 22-07-2024' + } + } + def get_cover_url(self): - soup = self.index_to_soup( - 'https://www.readwhere.com/magazine/the-india-today-group/India-Today/1154' - ) - for citem in soup.findAll( - 'meta', content=lambda s: s and s.endswith('/magazine/300/new') - ): - return citem['content'].replace('300', '600') + d = self.recipe_specific_options.get('date') + if not (d and isinstance(d, str)): + soup = self.index_to_soup( + 'https://www.readwhere.com/magazine/the-india-today-group/India-Today/1154' + ) + for citem in soup.findAll( + 'meta', content=lambda s: s and s.endswith('/magazine/300/new') + ): + return citem['content'].replace('300', '600') def parse_index(self): - soup = self.index_to_soup('https://www.indiatoday.in/magazine') + issue = https://www.indiatoday.in/magazine' + d = self.recipe_specific_options.get('date') + if d and isinstance(d, str): + issue = issue + '/' + d + soup = self.index_to_soup(issue) section = None sections = {} diff --git a/recipes/liberation.recipe b/recipes/liberation.recipe index b674d5922e..a2c85284f3 100644 --- a/recipes/liberation.recipe +++ b/recipes/liberation.recipe @@ -81,7 +81,7 @@ class Liberation(BasicNewsRecipe): 'les mutations des sociétés et des cultures.' ) language = 'fr' - oldest_article = 1 + oldest_article = 1.15 remove_empty_feeds = True articles_are_obfuscated = True ignore_duplicate_articles = {'title', 'url'} @@ -94,6 +94,20 @@ class Liberation(BasicNewsRecipe): blockquote { color:#202020; } ''' + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + feeds = [ ('A la une', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/collection/accueil-une/?outputType=xml'), ('Politique', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/politique/?outputType=xml'), diff --git a/recipes/livemint.recipe b/recipes/livemint.recipe index 8eb4586b06..b977b10e47 100644 --- a/recipes/livemint.recipe +++ b/recipes/livemint.recipe @@ -19,6 +19,20 @@ class LiveMint(BasicNewsRecipe): remove_attributes = ['style', 'height', 'width'] masthead_url = 'https://images.livemint.com/static/livemint-logo-v1.svg' + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + remove_empty_feeds = True resolve_internal_links = True diff --git a/recipes/outlook_india.recipe b/recipes/outlook_india.recipe index 9a3e8c0e10..6c6558c1c4 100644 --- a/recipes/outlook_india.recipe +++ b/recipes/outlook_india.recipe @@ -34,6 +34,13 @@ class outlook(BasicNewsRecipe): classes('ads-box info-img-absolute mobile-info-id story-dec-time-mobile sb-also-read ads-box1') ] + recipe_specific_options = { + 'date': { + 'short': 'The date of the edition to download (DD-Month-YYYY format)', + 'long': 'For example, 10-june-2024' + } + } + def get_browser(self): return BasicNewsRecipe.get_browser(self, user_agent='common_words/based', verify_ssl_certificates=False) @@ -42,14 +49,27 @@ class outlook(BasicNewsRecipe): '\n***\nif this recipe fails, report it on: ' 'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n' ) - soup = self.index_to_soup('https://www.outlookindia.com/magazine') - a = soup.find('a', attrs={'aria-label':'magazine-cover-image'}) - self.cover_url = a.img['src'].split('?')[0] - url = a['href'] - self.description = self.tag_to_string(a) - self.timefmt = ' [' + self.tag_to_string(a.div).strip() + ']' - self.log('Downloading issue:', url, self.timefmt) + + d = self.recipe_specific_options.get('date') + if d and isinstance(d, str): + url = 'https://www.outlookindia.com/magazine/' + d + else: + soup = self.index_to_soup('https://www.outlookindia.com/magazine') + a = soup.find('a', attrs={'aria-label':'magazine-cover-image'}) + url = a['href'] + + self.log('Downloading issue:', url) + soup = self.index_to_soup(url) + cov = soup.find(attrs={'aria-label':'magazine-cover-image'}) + self.cover_url = cov.img['src'].split('?')[0] + summ = soup.find(attrs={'data-test-id':'magazine-summary'}) + if summ: + self.description = self.tag_to_string(summ) + tme = soup.find(attrs={'class':'arr__timeago'}) + if tme: + self.timefmt = ' [' + self.tag_to_string(tme).strip() + ']' + ans = [] diff --git a/recipes/rtnews.recipe b/recipes/rtnews.recipe index a9a7fcef7c..a76411a8fc 100644 --- a/recipes/rtnews.recipe +++ b/recipes/rtnews.recipe @@ -26,6 +26,20 @@ class RT_eng(BasicNewsRecipe): remove_attributes = ['height', 'width', 'style'] publication_type = 'newsportal' + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) + extra_css = ''' img {display:block; margin:0 auto;} em { color:#202020; } diff --git a/recipes/spectator_magazine.recipe b/recipes/spectator_magazine.recipe index f9003f7de2..fd3f9febee 100644 --- a/recipes/spectator_magazine.recipe +++ b/recipes/spectator_magazine.recipe @@ -56,8 +56,19 @@ class spectator(BasicNewsRecipe): ] return br + recipe_specific_options = { + 'date': { + 'short': 'The date of the edition to download (DD-MM-YYYY format)', + 'long': 'For example, 20-07-2024' + } + } + def parse_index(self): - soup = self.index_to_soup('https://www.spectator.co.uk/magazine') + index = 'https://www.spectator.co.uk/magazine' + d = self.recipe_specific_options.get('date') + if d and isinstance(d, str): + index = index + '/' + d + '/' + soup = self.index_to_soup(index) self.cover_url = soup.find(**classes( 'magazine-header__container')).img['src'].split('?')[0] issue = self.tag_to_string(soup.find(**classes( diff --git a/recipes/the_week.recipe b/recipes/the_week.recipe index 040c7174af..a3d0110ebc 100644 --- a/recipes/the_week.recipe +++ b/recipes/the_week.recipe @@ -29,17 +29,32 @@ class TheWeek(BasicNewsRecipe): .article-info { font-size:small; } ''' + recipe_specific_options = { + 'date': { + 'short': 'The date of the edition to download (YYYY.MM.DD format)', + 'long': 'For example, 2024.06.30' + } + } + def get_cover_url(self): - soup = self.index_to_soup( - 'https://www.magzter.com/IN/Malayala_Manorama/THE_WEEK/Business/' - ) - for citem in soup.findAll( - 'meta', content=lambda s: s and s.endswith('view/3.jpg') - ): - return citem['content'] + d = self.recipe_specific_options.get('date') + if not (d and isinstance(d, str)): + soup = self.index_to_soup( + 'https://www.magzter.com/IN/Malayala_Manorama/THE_WEEK/Business/' + ) + for citem in soup.findAll( + 'meta', content=lambda s: s and s.endswith('view/3.jpg') + ): + return citem['content'] def parse_index(self): - soup = self.index_to_soup('https://www.theweek.in/theweek.html') + issue = 'https://www.theweek.in/theweek.html' + + d = self.recipe_specific_options.get('date') + if d and isinstance(d, str): + issue = 'https://www.theweek.in/theweek.' + d + '.html' + + soup = self.index_to_soup(issue) ans = [] d = datetime.today() diff --git a/recipes/wsj_news.recipe b/recipes/wsj_news.recipe index c5f3ef5d0b..a835b2b3b6 100644 --- a/recipes/wsj_news.recipe +++ b/recipes/wsj_news.recipe @@ -38,7 +38,21 @@ class WSJ(BasicNewsRecipe): resolve_internal_links = True ignore_duplicate_articles = {'url', 'title'} remove_empty_feeds = True - oldest_article = 1 # days + oldest_article = 1.2 # days + + recipe_specific_options = { + 'days': { + 'short': 'Oldest article to download from this news source. In days ', + 'long': 'For example, 0.5, gives you articles from the past 12 hours', + 'default': str(oldest_article) + } + } + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + d = self.recipe_specific_options.get('days') + if d and isinstance(d, str): + self.oldest_article = float(d) extra_css = ''' #subhed, em { font-style:italic; color:#202020; }