Update India Today Outlook Magazine and Live Mint

2025-07-09 03:04:10 -04:00 · 2023-01-08 13:38:33 +05:30 · 2023-01-08 13:38:33 +05:30 · 0ebd840d6a
commit 0ebd840d6a
parent 8114376b2f
3 changed files with 27 additions and 21 deletions
--- a/recipes/india_today.recipe
+++ b/recipes/india_today.recipe
@ -63,11 +63,10 @@ class IndiaToday(BasicNewsRecipe):
        sections = {}

        date = soup.find(attrs={'class':lambda x: x and x.startswith('MagazineEdition_edition__date')})
-        edition = soup.find(attrs={'class':lambda x: x and x.startswith('MagazineEdition_magazineprime')})
-        self.timefmt =' (' + self.tag_to_string(edition) + ') [' + self.tag_to_string(date).strip() + ']'
-        p = edition.findNext('p')
-        if p:
-            self.description = self.tag_to_string(p).strip()
+        edition = soup.find(attrs={'class':'prime__magazine'})
+        self.timefmt = '(' + self.tag_to_string(edition).strip() +') [' + self.tag_to_string(date).strip() + ']'
+        if p := edition.findNext('p'):
+            self.description = self.tag_to_string(p)
        self.log('Downloading Issue: ', self.timefmt)

        for tag in soup.findAll('div', attrs={'class': lambda x: x and 'NoCard_story__grid__' in x}):
@ -125,11 +124,5 @@ class IndiaToday(BasicNewsRecipe):
            quo.name = 'blockquote'
        return soup

-    def populate_article_metadata(self, article, soup, first):
-        if first and hasattr(self, 'add_toc_thumbnail'):
-            image = soup.find('img', src=True, attrs={'class':'i-amphtml-fill-content'})
-            if image is not None:
-                self.add_toc_thumbnail(article, image['src'])
-
    def print_version(self, url):
        return url.replace('.in/','.in/amp/')
--- a/recipes/livemint.recipe
+++ b/recipes/livemint.recipe
@ -24,9 +24,14 @@ class LiveMint(BasicNewsRecipe):

    remove_empty_feeds =  True

-    if is_saturday:
+    def get_cover_url(self):
+        soup = self.index_to_soup(
+            'https://www.magzter.com/IN/HT-Digital-Streams-Ltd./Mint-Mumbai/Newspaper/'
+        )
+        for citem in soup.findAll('meta', content=lambda s: s and s.endswith('view/3.jpg')):
+            return citem['content']

-        cover_url = 'https://epsfs.hindustantimes.com/MINT/2022/04/16/Delhi/Delhi/5_01/bf867ea1_01_mr.jpg'
+    if is_saturday:

        keep_only_tags = [
            dict(name='h1'),
@ -54,14 +59,13 @@ class LiveMint(BasicNewsRecipe):
                img['src'] = img['data-img']
            return soup
    else:
-        # some wsj articles wont load
+
        extra_css = '''
            #img-cap {font-size:small; text-align:center;}
            #auth-info {font-size:small; text-align:center;}
            .highlights {font-style:italic;}
            .summary{font-style:italic; color:#404040;}
        '''
-        cover_url = 'https://epsfs.hindustantimes.com/MINT/2022/04/05/Delhi/Delhi/5_01/1ec7ad14_01_mr.jpg'

        keep_only_tags = [
            dict(name='h1'),
--- a/recipes/outlook_india.recipe
+++ b/recipes/outlook_india.recipe
@ -18,13 +18,17 @@ class outlook(BasicNewsRecipe):
    remove_attributes = ['height', 'width', 'style']
    ignore_duplicate_articles = {'url'}
    resolve_internal_links = True
-    masthead_url = 'https://www.outlookindia.com/images/home_new_v4/logo_outlook.svg'
+    extra_css = '''
+        .story-summary{font-style:italic; color:#202020;}
+        .author_wrapper, .relatedCategory{font-size:small; color:#404040;}
+        #figcap{font-size:small; text-align:center;}
+    '''

    keep_only_tags = [classes('__story_detail')]
    remove_tags = [
        classes(
-            'social_sharing_article left_trending left-sticky __tag_links'
-            ' next_prev_stories	downarrow uparrow more_from_author_links next prev __related_stories_thumbs'
+            'social_sharing_article left_trending left-sticky __tag_links next_prev_stories '
+            'downarrow uparrow more_from_author_links next prev __related_stories_thumbs'
        )
    ]

@ -33,8 +37,8 @@ class outlook(BasicNewsRecipe):
        div = soup.find('div', attrs={'class':'wrapper'})
        a = div.find('a', href=lambda x: x and x.startswith('/magazine/issue/'))
        url = a['href']
-        self.log('Downloading issue:', url)
-        self.timefmt = ' [' + self.tag_to_string(a) + ']'
+        self.timefmt = ' [' + self.tag_to_string(a.find('p')).strip() + ']'
+        self.log('Downloading issue:', url, self.timefmt)
        soup = self.index_to_soup('https://www.outlookindia.com' + url)
        cover = soup.find(**classes('listingPage_lead_story'))
        self.cover_url = cover.find('img', attrs={'src': True})['src']
@ -42,7 +46,7 @@ class outlook(BasicNewsRecipe):

        for h3 in soup.findAll(['h3', 'h4'],
                               attrs={'class': 'tk-kepler-std-condensed-subhead'}):
-            a = h3.find('a', href=lambda x: x)
+            a = h3.find('a', href=True)
            url = a['href']
            title = self.tag_to_string(a)
            desc = ''
@ -55,6 +59,11 @@ class outlook(BasicNewsRecipe):
            ans.append({'title': title, 'url': url, 'description': desc})
        return [('Articles', ans)]

+    def preprocess_html(self,soup):
+        for fig in soup.findAll('figure'):
+            fig['id'] = 'figcap'
+        return soup
+
    def preprocess_raw_html(self, raw, *a):
        return raw
        m = re.search('<!-- NewsArticle Schema -->.*?script.*?>', raw, flags=re.DOTALL)