From 594e44974ca6c6220333e9fc782319848ba2d943 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Sun, 21 Jul 2024 10:29:36 +0530 Subject: [PATCH 1/2] Update business_standard_print.recipe --- recipes/business_standard_print.recipe | 54 ++++++++++++++++---------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/recipes/business_standard_print.recipe b/recipes/business_standard_print.recipe index b7c772e0d9..ff7d63ff98 100644 --- a/recipes/business_standard_print.recipe +++ b/recipes/business_standard_print.recipe @@ -4,12 +4,6 @@ from datetime import datetime from calibre.web.feeds.news import BasicNewsRecipe from html5_parser import parse -today = datetime.today().strftime('%d-%m-%Y') - -# today = '20-09-2023' - -day, month, year = (int(x) for x in today.split('-')) -dt = datetime(year, month, day) class BusinessStandardPrint(BasicNewsRecipe): title = 'Business Standard Print Edition' @@ -18,18 +12,12 @@ class BusinessStandardPrint(BasicNewsRecipe): language = 'en_IN' masthead_url = 'https://bsmedia.business-standard.com/include/_mod/site/html5/images/business-standard-logo.png' encoding = 'utf-8' - timefmt = ' [' + dt.strftime('%b %d, %Y') + ']' resolve_internal_links = True remove_empty_feeds = True no_stylesheets = True remove_javascript = True - remove_attributes = ['width', 'height', 'float', 'style'] - - def __init__(self, *args, **kwargs): - BasicNewsRecipe.__init__(self, *args, **kwargs) - if self.output_profile.short_name.startswith('kindle'): - self.title = 'Business Standard ' + dt.strftime('%b %d, %Y') + remove_attributes = ['width', 'height', 'style'] def get_browser(self): return BasicNewsRecipe.get_browser(self, user_agent='common_words/based') @@ -40,16 +28,35 @@ class BusinessStandardPrint(BasicNewsRecipe): extra_css = ''' img {display:block; margin:0 auto;} + .sub { font-style:italic; color:#202020; } .auth, .cat { font-size:small; color:#202020; } .cap { font-size:small; text-align:center; } ''' + recipe_specific_options = { + 'date': { + 'short': 'The date of the print edition to download (DD-MM-YYYY format)', + 'long': 'For example, 20-09-2023' + } + } + def get_cover_url(self): - soup = self.index_to_soup('https://www.magzter.com/IN/Business-Standard-Private-Ltd/Business-Standard/Newspaper/') - for citem in soup.findAll('meta', content=lambda s: s and s.endswith('view/3.jpg')): - return citem['content'] + d = self.recipe_specific_options.get('date') + if not (d and isinstance(d, str)): + soup = self.index_to_soup('https://www.magzter.com/IN/Business-Standard-Private-Ltd/Business-Standard/Newspaper/') + for citem in soup.findAll('meta', content=lambda s: s and s.endswith('view/3.jpg')): + return citem['content'] def parse_index(self): + today = datetime.today().strftime('%d-%m-%Y') + d = self.recipe_specific_options.get('date') + if d and isinstance(d, str): + today = d + + day, month, year = (int(x) for x in today.split('-')) + dt = datetime(year, month, day) + self.timefmt = ' [' + dt.strftime('%b %d, %Y') + ']' + if dt.weekday() == 6: self.log.warn( 'Business Standard Does Not Have A Print Publication On Sunday. The Reports' @@ -97,10 +104,10 @@ class BusinessStandardPrint(BasicNewsRecipe): if 'defaultArticleCat' in data and data['defaultArticleCat'] is not None: if 'h1_tag' in data['defaultArticleCat'] and data['defaultArticleCat']['h1_tag'] is not None: - cat = '
' + data['defaultArticleCat']['h1_tag'] + '
' + data['metaDescription'] + '
' self.art_desc = data['metaDescription'] date = (datetime.fromtimestamp(int(data['publishDate']))).strftime('%b %d, %Y | %I:%M %p') @@ -120,6 +127,13 @@ class BusinessStandardPrint(BasicNewsRecipe): if 'alt_text' in data['featuredImageObj']: caption = '' + data['featuredImageObj']['alt_text'] + '' - body = data['htmlContent'] + body = data['htmlContent'].replace('