Update India Today and Indian Express

2025-07-09 03:04:10 -04:00 · 2022-07-26 08:46:01 +05:30 · 2022-07-26 08:46:01 +05:30 · f7b592023f
commit f7b592023f
parent 86dbab7283
2 changed files with 14 additions and 4 deletions
--- a/recipes/india_today.recipe
+++ b/recipes/india_today.recipe
@ -9,7 +9,6 @@ class IndiaToday(BasicNewsRecipe):
    use_embedded_content = False
    remove_attributes = ['style', 'height', 'width']
    ignore_duplicate_articles = {'url'}
    extra_css = '[itemprop^="description"] {font-size: small; font-style: italic;}'
    description = (
        'India’s Most Reputed, Credible and Popular news magazine.'
        ' Read the most preferred magazine of 9.5 million Indians to access highly researched and unbiased content.'
@ -19,6 +18,7 @@ class IndiaToday(BasicNewsRecipe):
    extra_css = '''
        .body_caption{font-size:small;}
        .image-alt{font-size:small;}
        [itemprop^="description"] {font-size: small; font-style: italic;}
    '''
    def get_cover_url(self):
@ -63,11 +63,19 @@ class IndiaToday(BasicNewsRecipe):
                else:
                    url = 'https://www.indiatoday.in' + url
                title = self.tag_to_string(a).strip()
                try:
                    desc = self.tag_to_string(a.findParent(
                        'span', attrs={'class':'field-content'}).findNext(
                            'div', attrs={'class':'views-field'})).strip()
                except Exception:
                    desc = self.tag_to_string(a.findParent(
                        ('h3','p')).findNext('span', attrs={'class':'kicket-text'})).strip()
                if not url or not title:
                    continue
                self.log('\t', title)
                self.log('\t', desc)
                self.log('\t\t', url)
-                sections[section].append({'title': title, 'url': url})
+                sections[section].append({'title': title, 'url': url, 'description': desc})
        def sort_key(x):
            section = x[0]
@ -99,4 +107,6 @@ class IndiaToday(BasicNewsRecipe):
            style.extract()
        for img in soup.findAll('img', attrs={'data-src': True}):
            img['src'] = img['data-src']
        for h2 in soup.findAll('h2'):
            h2.name = 'h5'
        return str(soup)
--- a/recipes/indian_express.recipe
+++ b/recipes/indian_express.recipe
@ -62,7 +62,7 @@ class IndianExpress(BasicNewsRecipe):
        classes(
            'share-social appstext ie-int-campign-ad ie-breadcrumb custom_read_button unitimg copyright'
            ' storytags pdsc-related-modify news-guard premium-story append_social_share'
-            ' digital-subscriber-only h-text-widget'
+            ' digital-subscriber-only h-text-widget ie-premium'
        )
    ]
@ -107,7 +107,7 @@ class IndianExpress(BasicNewsRecipe):
    def preprocess_html(self, soup):
        h2 = soup.findAll('h2')
        for sub in h2:
-            sub.name = 'h4'
+            sub.name = 'h5'
        for span in soup.findAll(
            'span', attrs={'class': ['ie-custom-caption', 'custom-caption']}
        ):