From 07db492808c5fba756271fdcfe203750d27d312b Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Sat, 10 May 2025 14:21:38 +0530 Subject: [PATCH] Update business_standard_print.recipe --- recipes/business_standard_print.recipe | 51 +++++++++++++++++++++----- 1 file changed, 42 insertions(+), 9 deletions(-) diff --git a/recipes/business_standard_print.recipe b/recipes/business_standard_print.recipe index 439284b28d..10fe56a0e2 100644 --- a/recipes/business_standard_print.recipe +++ b/recipes/business_standard_print.recipe @@ -38,15 +38,19 @@ class BusinessStandardPrint(BasicNewsRecipe): recipe_specific_options = { 'date': { 'short': 'The date of the print edition to download (DD-MM-YYYY format)', - 'long': 'For example, 20-09-2023' + 'long': 'For example, 20-09-2023', } } def get_cover_url(self): d = self.recipe_specific_options.get('date') if not (d and isinstance(d, str)): - soup = self.index_to_soup('https://www.magzter.com/IN/Business-Standard-Private-Ltd/Business-Standard/Newspaper/') - for citem in soup.findAll('meta', content=lambda s: s and s.endswith('view/3.jpg')): + soup = self.index_to_soup( + 'https://www.magzter.com/IN/Business-Standard-Private-Ltd/Business-Standard/Newspaper/' + ) + for citem in soup.findAll( + 'meta', content=lambda s: s and s.endswith('view/3.jpg') + ): return citem['content'] def parse_index(self): @@ -81,7 +85,7 @@ class BusinessStandardPrint(BasicNewsRecipe): desc = article['sub_heading'] url = 'https://www.business-standard.com' + article['article_url'] self.log('\t', title, '\n\t', desc, '\n\t\t', url) - articles.append({'title': title, 'description':desc, 'url': url}) + articles.append({'title': title, 'description': desc, 'url': url}) if articles: feeds.append((section, articles)) return feeds @@ -105,33 +109,59 @@ class BusinessStandardPrint(BasicNewsRecipe): cat = subhead = lede = auth = caption = '' if 'defaultArticleCat' in data and data['defaultArticleCat'] is not None: - if 'h1_tag' in data['defaultArticleCat'] and data['defaultArticleCat']['h1_tag'] is not None: + if ( + 'h1_tag' in data['defaultArticleCat'] + and data['defaultArticleCat']['h1_tag'] is not None + ): cat = '
' + data['defaultArticleCat']['h1_tag'] + '
' if 'metaDescription' in data and data['metaDescription'] is not None: subhead = '

' + data['metaDescription'] + '

' self.art_desc = data['metaDescription'] - date = (datetime.fromtimestamp(int(data['publishDate']))).strftime('%b %d, %Y | %I:%M %p') + date = (datetime.fromtimestamp(int(data['publishDate']))).strftime( + '%b %d, %Y | %I:%M %p' + ) authors = [] if 'articleMappedMultipleAuthors' in data: for aut in data['articleMappedMultipleAuthors']: authors.append(data['articleMappedMultipleAuthors'][str(aut)]) - auth = '

' + ', '.join(authors) + ' | ' + data['placeName'] + ' | ' + date + '

' + auth = ( + '

' + + ', '.join(authors) + + ' | ' + + data['placeName'] + + ' | ' + + date + + '

' + ) if 'featuredImageObj' in data: if 'url' in data['featuredImageObj']: if img_url is not None: lede = '

'.format(img_url) else: - lede = '

'.format(data['featuredImageObj']['url']) + lede = '

'.format( + data['featuredImageObj']['url'] + ) if 'alt_text' in data['featuredImageObj']: caption = '' + data['featuredImageObj']['alt_text'] + '

' body = data['htmlContent'] - return '' + cat + title + subhead + auth + lede + caption + '

' + body + '
' + return ( + '' + + cat + + title + + subhead + + auth + + lede + + caption + + '

' + + body + + '
' + ) def preprocess_html(self, soup): for img in soup.findAll('img'): @@ -141,4 +171,7 @@ class BusinessStandardPrint(BasicNewsRecipe): for attr in self.remove_attributes: for x in soup.findAll(attrs={attr: True}): del x[attr] + for br in soup.findAll('small', attrs={'class': 'brtag'}): + br.name = 'br' + br.clear() return soup