From f7b592023fba0bb03f67c755c13b7351c2639170 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 26 Jul 2022 08:46:01 +0530 Subject: [PATCH] Update India Today and Indian Express --- recipes/india_today.recipe | 14 ++++++++++++-- recipes/indian_express.recipe | 4 ++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/recipes/india_today.recipe b/recipes/india_today.recipe index 00c805daa4..18ca902c3c 100644 --- a/recipes/india_today.recipe +++ b/recipes/india_today.recipe @@ -9,7 +9,6 @@ class IndiaToday(BasicNewsRecipe): use_embedded_content = False remove_attributes = ['style', 'height', 'width'] ignore_duplicate_articles = {'url'} - extra_css = '[itemprop^="description"] {font-size: small; font-style: italic;}' description = ( 'India’s Most Reputed, Credible and Popular news magazine.' ' Read the most preferred magazine of 9.5 million Indians to access highly researched and unbiased content.' @@ -19,6 +18,7 @@ class IndiaToday(BasicNewsRecipe): extra_css = ''' .body_caption{font-size:small;} .image-alt{font-size:small;} + [itemprop^="description"] {font-size: small; font-style: italic;} ''' def get_cover_url(self): @@ -63,11 +63,19 @@ class IndiaToday(BasicNewsRecipe): else: url = 'https://www.indiatoday.in' + url title = self.tag_to_string(a).strip() + try: + desc = self.tag_to_string(a.findParent( + 'span', attrs={'class':'field-content'}).findNext( + 'div', attrs={'class':'views-field'})).strip() + except Exception: + desc = self.tag_to_string(a.findParent( + ('h3','p')).findNext('span', attrs={'class':'kicket-text'})).strip() if not url or not title: continue self.log('\t', title) + self.log('\t', desc) self.log('\t\t', url) - sections[section].append({'title': title, 'url': url}) + sections[section].append({'title': title, 'url': url, 'description': desc}) def sort_key(x): section = x[0] @@ -99,4 +107,6 @@ class IndiaToday(BasicNewsRecipe): style.extract() for img in soup.findAll('img', attrs={'data-src': True}): img['src'] = img['data-src'] + for h2 in soup.findAll('h2'): + h2.name = 'h5' return str(soup) diff --git a/recipes/indian_express.recipe b/recipes/indian_express.recipe index 4c7a2175a7..5dd2614c8c 100644 --- a/recipes/indian_express.recipe +++ b/recipes/indian_express.recipe @@ -62,7 +62,7 @@ class IndianExpress(BasicNewsRecipe): classes( 'share-social appstext ie-int-campign-ad ie-breadcrumb custom_read_button unitimg copyright' ' storytags pdsc-related-modify news-guard premium-story append_social_share' - ' digital-subscriber-only h-text-widget' + ' digital-subscriber-only h-text-widget ie-premium' ) ] @@ -107,7 +107,7 @@ class IndianExpress(BasicNewsRecipe): def preprocess_html(self, soup): h2 = soup.findAll('h2') for sub in h2: - sub.name = 'h4' + sub.name = 'h5' for span in soup.findAll( 'span', attrs={'class': ['ie-custom-caption', 'custom-caption']} ):