From de77bfa3be1bc2061acace9693f6b8357f425b3a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 12 Dec 2022 16:22:18 +0530 Subject: [PATCH] Update Hindu Business Line Print Edition --- .../hindu_business_line_print_edition.recipe | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/recipes/hindu_business_line_print_edition.recipe b/recipes/hindu_business_line_print_edition.recipe index 9ab0e7e71d..7a83251a9e 100644 --- a/recipes/hindu_business_line_print_edition.recipe +++ b/recipes/hindu_business_line_print_edition.recipe @@ -1,6 +1,7 @@ import json import re from collections import defaultdict +from datetime import date from calibre.web.feeds.news import BasicNewsRecipe, classes @@ -10,6 +11,10 @@ def absurl(url): return url +local_edition = None +# Chennai is default edition, for other editions use 'bl_hyderabad', 'bl_bangalore', 'bl_mumbai' + + class BusinessLine(BasicNewsRecipe): title = 'The Hindu BusinessLine | Print Edition' __author__ = 'unkn0wn' @@ -24,7 +29,8 @@ class BusinessLine(BasicNewsRecipe): remove_attributes = ['style', 'height', 'width'] extra_css = '.caption{font-size:small; text-align:center;}'\ '.author{font-size:small; font-weight:bold;}'\ - '.subhead{font-weight:bold;}' + '.subhead, .subhead_lead {font-weight:bold;}'\ + 'img {display:block; margin:0 auto;}' ignore_duplicate_articles = {'url'} @@ -43,14 +49,14 @@ class BusinessLine(BasicNewsRecipe): img['src'] = img['data-original'] return soup - def populate_article_metadata(self, article, soup, first): - if first and hasattr(self, 'add_toc_thumbnail'): - image = soup.find('img') - if image is not None: - self.add_toc_thumbnail(article, image['src']) - def parse_index(self): - url = 'https://www.thehindubusinessline.com/todays-paper/' + if local_edition: + yr = str(date.today().year) + mn = date.today().strftime('%m') + dy = date.today().strftime('%d') + url = absurl('/todays-paper/' + yr + '-' + mn + '-' + dy + '/' + local_edition + '/') + else: + url = 'https://www.thehindubusinessline.com/todays-paper/' raw = self.index_to_soup(url, raw=True) soup = self.index_to_soup(raw) ans = self.hindu_parse_index(soup)