Updates recipe_specific_options

2025-07-09 03:04:10 -04:00 · 2024-07-22 11:08:00 +05:30 · 2024-07-22 11:08:00 +05:30 · 16a1f2890b
commit 16a1f2890b
parent bdbfdf0f43
8 changed files with 141 additions and 26 deletions
--- a/recipes/india_today.recipe
+++ b/recipes/india_today.recipe
@ -47,17 +47,30 @@ class IndiaToday(BasicNewsRecipe):
    def preprocess_raw_html(self, raw_html, url):
        return raw_html.replace('â€”', '--')

+    recipe_specific_options = {
+        'date': {
+            'short': 'The date of the edition to download (DD-MM-YYYY format)',
+            'long': 'For example, 22-07-2024'
+        }
+    }
+
    def get_cover_url(self):
-        soup = self.index_to_soup(
-            'https://www.readwhere.com/magazine/the-india-today-group/India-Today/1154'
-        )
-        for citem in soup.findAll(
-            'meta', content=lambda s: s and s.endswith('/magazine/300/new')
-        ):
-            return citem['content'].replace('300', '600')
+        d = self.recipe_specific_options.get('date')
+        if not (d and isinstance(d, str)):
+            soup = self.index_to_soup(
+                'https://www.readwhere.com/magazine/the-india-today-group/India-Today/1154'
+            )
+            for citem in soup.findAll(
+                'meta', content=lambda s: s and s.endswith('/magazine/300/new')
+            ):
+                return citem['content'].replace('300', '600')

    def parse_index(self):
-        soup = self.index_to_soup('https://www.indiatoday.in/magazine')
+        issue = https://www.indiatoday.in/magazine'
+        d = self.recipe_specific_options.get('date')
+        if d and isinstance(d, str):
+            issue = issue + '/' + d
+        soup = self.index_to_soup(issue)

        section = None
        sections = {}
--- a/recipes/liberation.recipe
+++ b/recipes/liberation.recipe
@ -81,7 +81,7 @@ class Liberation(BasicNewsRecipe):
        'les mutations des sociétés et des cultures.'
    )
    language = 'fr'
-    oldest_article = 1
+    oldest_article = 1.15
    remove_empty_feeds = True
    articles_are_obfuscated = True
    ignore_duplicate_articles = {'title', 'url'}
@ -94,6 +94,20 @@ class Liberation(BasicNewsRecipe):
        blockquote { color:#202020; }
    '''

+    recipe_specific_options = {
+        'days': {
+            'short': 'Oldest article to download from this news source. In days ',
+            'long': 'For example, 0.5, gives you articles from the past 12 hours',
+            'default': str(oldest_article)
+        }
+    }
+
+    def __init__(self, *args, **kwargs):
+        BasicNewsRecipe.__init__(self, *args, **kwargs)
+        d = self.recipe_specific_options.get('days')
+        if d and isinstance(d, str):
+            self.oldest_article = float(d)
+
    feeds = [
        ('A la une', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/collection/accueil-une/?outputType=xml'),
        ('Politique', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/politique/?outputType=xml'),
--- a/recipes/livemint.recipe
+++ b/recipes/livemint.recipe
@ -19,6 +19,20 @@ class LiveMint(BasicNewsRecipe):
    remove_attributes = ['style', 'height', 'width']
    masthead_url = 'https://images.livemint.com/static/livemint-logo-v1.svg'

+     recipe_specific_options = {
+        'days': {
+            'short': 'Oldest article to download from this news source. In days ',
+            'long': 'For example, 0.5, gives you articles from the past 12 hours',
+            'default': str(oldest_article)
+        }
+    }
+
+    def __init__(self, *args, **kwargs):
+        BasicNewsRecipe.__init__(self, *args, **kwargs)
+        d = self.recipe_specific_options.get('days')
+        if d and isinstance(d, str):
+            self.oldest_article = float(d)
+
    remove_empty_feeds =  True
    resolve_internal_links = True

--- a/recipes/outlook_india.recipe
+++ b/recipes/outlook_india.recipe
@ -34,6 +34,13 @@ class outlook(BasicNewsRecipe):
        classes('ads-box info-img-absolute mobile-info-id story-dec-time-mobile sb-also-read ads-box1')
    ]

+    recipe_specific_options = {
+        'date': {
+            'short': 'The date of the edition to download (DD-Month-YYYY format)',
+            'long': 'For example, 10-june-2024'
+        }
+    }
+
    def get_browser(self):
        return BasicNewsRecipe.get_browser(self, user_agent='common_words/based', verify_ssl_certificates=False)

@ -42,14 +49,27 @@ class outlook(BasicNewsRecipe):
            '\n***\nif this recipe fails, report it on: '
            'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n'
        )
-        soup = self.index_to_soup('https://www.outlookindia.com/magazine')
-        a = soup.find('a', attrs={'aria-label':'magazine-cover-image'})
-        self.cover_url = a.img['src'].split('?')[0]
-        url = a['href']
-        self.description = self.tag_to_string(a)
-        self.timefmt = ' [' + self.tag_to_string(a.div).strip() + ']'
-        self.log('Downloading issue:', url, self.timefmt)
+
+        d = self.recipe_specific_options.get('date')
+        if d and isinstance(d, str):
+            url = 'https://www.outlookindia.com/magazine/' + d
+        else:
+            soup = self.index_to_soup('https://www.outlookindia.com/magazine')
+            a = soup.find('a', attrs={'aria-label':'magazine-cover-image'})
+            url = a['href']
+
+        self.log('Downloading issue:', url)
+
        soup = self.index_to_soup(url)
+        cov = soup.find(attrs={'aria-label':'magazine-cover-image'})
+        self.cover_url = cov.img['src'].split('?')[0]
+        summ = soup.find(attrs={'data-test-id':'magazine-summary'})
+        if summ:
+            self.description = self.tag_to_string(summ)
+        tme = soup.find(attrs={'class':'arr__timeago'})
+        if tme:
+            self.timefmt = ' [' + self.tag_to_string(tme).strip() + ']'
+        

        ans = []

--- a/recipes/rtnews.recipe
+++ b/recipes/rtnews.recipe
@ -26,6 +26,20 @@ class RT_eng(BasicNewsRecipe):
    remove_attributes = ['height', 'width', 'style']
    publication_type = 'newsportal'

+    recipe_specific_options = {
+        'days': {
+            'short': 'Oldest article to download from this news source. In days ',
+            'long': 'For example, 0.5, gives you articles from the past 12 hours',
+            'default': str(oldest_article)
+        }
+    }
+
+    def __init__(self, *args, **kwargs):
+        BasicNewsRecipe.__init__(self, *args, **kwargs)
+        d = self.recipe_specific_options.get('days')
+        if d and isinstance(d, str):
+            self.oldest_article = float(d)
+
    extra_css = '''
        img {display:block; margin:0 auto;}
        em { color:#202020; }
--- a/recipes/spectator_magazine.recipe
+++ b/recipes/spectator_magazine.recipe
@ -56,8 +56,19 @@ class spectator(BasicNewsRecipe):
        ]
        return br

+    recipe_specific_options = {
+        'date': {
+            'short': 'The date of the edition to download (DD-MM-YYYY format)',
+            'long': 'For example, 20-07-2024'
+        }
+    }
+
    def parse_index(self):
-        soup = self.index_to_soup('https://www.spectator.co.uk/magazine')
+        index = 'https://www.spectator.co.uk/magazine'
+        d = self.recipe_specific_options.get('date')
+        if d and isinstance(d, str):
+            index = index + '/' + d + '/'
+        soup = self.index_to_soup(index)
        self.cover_url = soup.find(**classes(
            'magazine-header__container')).img['src'].split('?')[0]
        issue = self.tag_to_string(soup.find(**classes(
--- a/recipes/the_week.recipe
+++ b/recipes/the_week.recipe
@ -29,17 +29,32 @@ class TheWeek(BasicNewsRecipe):
        .article-info { font-size:small; }
    '''

+    recipe_specific_options = {
+        'date': {
+            'short': 'The date of the edition to download (YYYY.MM.DD format)',
+            'long': 'For example, 2024.06.30'
+        }
+    }
+
    def get_cover_url(self):
-        soup = self.index_to_soup(
-            'https://www.magzter.com/IN/Malayala_Manorama/THE_WEEK/Business/'
-        )
-        for citem in soup.findAll(
-            'meta', content=lambda s: s and s.endswith('view/3.jpg')
-        ):
-            return citem['content']
+        d = self.recipe_specific_options.get('date')
+        if not (d and isinstance(d, str)):
+            soup = self.index_to_soup(
+                'https://www.magzter.com/IN/Malayala_Manorama/THE_WEEK/Business/'
+            )
+            for citem in soup.findAll(
+                'meta', content=lambda s: s and s.endswith('view/3.jpg')
+            ):
+                return citem['content']

    def parse_index(self):
-        soup = self.index_to_soup('https://www.theweek.in/theweek.html')
+        issue = 'https://www.theweek.in/theweek.html'
+
+        d = self.recipe_specific_options.get('date')
+        if d and isinstance(d, str):
+            issue = 'https://www.theweek.in/theweek.' + d + '.html'
+
+        soup = self.index_to_soup(issue)
        ans = []
        d = datetime.today()

--- a/recipes/wsj_news.recipe
+++ b/recipes/wsj_news.recipe
@ -38,7 +38,21 @@ class WSJ(BasicNewsRecipe):
    resolve_internal_links = True
    ignore_duplicate_articles = {'url', 'title'}
    remove_empty_feeds = True
-    oldest_article = 1 # days
+    oldest_article = 1.2 # days
+
+    recipe_specific_options = {
+        'days': {
+            'short': 'Oldest article to download from this news source. In days ',
+            'long': 'For example, 0.5, gives you articles from the past 12 hours',
+            'default': str(oldest_article)
+        }
+    }
+
+    def __init__(self, *args, **kwargs):
+        BasicNewsRecipe.__init__(self, *args, **kwargs)
+        d = self.recipe_specific_options.get('days')
+        if d and isinstance(d, str):
+            self.oldest_article = float(d)

    extra_css = '''
        #subhed, em { font-style:italic; color:#202020; }