From a91a6141422b889bbbc6da419393c60bd719ca80 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 27 Nov 2022 13:59:54 +0530 Subject: [PATCH] ... --- recipes/hindu.recipe | 17 ++++++----------- recipes/indian_express_print_edition.recipe | 2 +- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/recipes/hindu.recipe b/recipes/hindu.recipe index 81ea342c1d..7101a88bf1 100644 --- a/recipes/hindu.recipe +++ b/recipes/hindu.recipe @@ -11,7 +11,7 @@ def absurl(url): return url -local_edition = 'th_hyderabad' +local_edition = None # Chennai is default edition, for other editions use 'th_hyderabad', 'th_bangalore', 'th_delhi', 'th_kolkata' etc @@ -23,8 +23,9 @@ class TheHindu(BasicNewsRecipe): masthead_url = 'https://www.thehindu.com/theme/images/th-online/thehindu-logo.svg' remove_attributes = ['style', 'height', 'width'] extra_css = '.caption{font-size:small; text-align:center;}'\ - '.author{font-size:small;}'\ - '.subhead{font-weight:bold;}' + '.author{font-size:small; font-weight:bold;}'\ + '.subhead, .subhead_lead {font-weight:bold;}'\ + 'img {display:block; margin:0 auto;}' ignore_duplicate_articles = {'url'} @@ -38,17 +39,11 @@ class TheHindu(BasicNewsRecipe): def preprocess_html(self, soup): for cap in soup.findAll('p', attrs={'class':'caption'}): - cap.name = 'span' + cap.name = 'figcaption' for img in soup.findAll('img', attrs={'data-original':True}): img['src'] = img['data-original'] return soup - def populate_article_metadata(self, article, soup, first): - if first and hasattr(self, 'add_toc_thumbnail'): - image = soup.find('img') - if image is not None: - self.add_toc_thumbnail(article, image['src']) - def parse_index(self): if local_edition: yr = str(date.today().year) @@ -85,7 +80,7 @@ class TheHindu(BasicNewsRecipe): section = sec.replace('TH_', '') title = item['articleheadline'] url = absurl(item['href']) - desc = 'from page no.' + item['pageno'] + ' | ' + item['teaser_text'] or '' + desc = 'Page no.' + item['pageno'] + ' | ' + item['teaser_text'] or '' self.log('\t', title, '\n\t\t', url) feeds_dict[section].append({"title": title, "url": url, "description": desc}) return [(section, articles) for section, articles in feeds_dict.items()] diff --git a/recipes/indian_express_print_edition.recipe b/recipes/indian_express_print_edition.recipe index ab81e5ecf7..01e8f862b2 100644 --- a/recipes/indian_express_print_edition.recipe +++ b/recipes/indian_express_print_edition.recipe @@ -66,7 +66,7 @@ class IndianExpressPrint(BasicNewsRecipe): def parse_index(self): soup = self.index_to_soup('https://indianexpress.com/todays-paper/') feeds_dict = defaultdict(list) - for div in soup.findAll('div', attrs={'class':['lead-story', 'section']}): + for div in soup.findAll('div', attrs={'class':['lead-story', 'section', 'today-paper']}): for a in div.findAll('a', attrs={'href':lambda x: x and x.startswith('https://indianexpress.com/article/')}): if not a.find('img'): url = a['href']