diff --git a/recipes/business_standard.recipe b/recipes/business_standard.recipe index 4e961240f6..48d8c57c51 100644 --- a/recipes/business_standard.recipe +++ b/recipes/business_standard.recipe @@ -17,12 +17,7 @@ class BusinessStandard(BasicNewsRecipe): no_stylesheets = True remove_javascript = True - remove_attributes = ['width', 'height', 'float', 'style'] - - def get_cover_url(self): - soup = self.index_to_soup('https://www.magzter.com/IN/Business-Standard-Private-Ltd/Business-Standard/Newspaper/') - for citem in soup.findAll('meta', content=lambda s: s and s.endswith('view/3.jpg')): - return citem['content'] + remove_attributes = ['width', 'height', 'style'] def get_browser(self): return BasicNewsRecipe.get_browser(self, user_agent='common_words/based') @@ -32,6 +27,14 @@ class BusinessStandard(BasicNewsRecipe): resolve_internal_links = True max_articles_per_feed = 50 oldest_article = 1.15 + browser_type = 'webengine' + + extra_css = ''' + img {display:block; margin:0 auto;} + .sub { font-style:italic; color:#202020; } + .auth, .cat { font-size:small; color:#202020; } + .cap { font-size:small; text-align:center; } + ''' recipe_specific_options = { 'days': { @@ -41,18 +44,23 @@ class BusinessStandard(BasicNewsRecipe): } } + def get_cover_url(self): + d = self.recipe_specific_options.get('date') + if not (d and isinstance(d, str)): + soup = self.index_to_soup( + 'https://www.magzter.com/IN/Business-Standard-Private-Ltd/Business-Standard/Newspaper/' + ) + for citem in soup.findAll( + 'meta', content=lambda s: s and s.endswith('view/3.jpg') + ): + return citem['content'] + def __init__(self, *args, **kwargs): BasicNewsRecipe.__init__(self, *args, **kwargs) d = self.recipe_specific_options.get('days') if d and isinstance(d, str): self.oldest_article = float(d) - extra_css = ''' - img {display:block; margin:0 auto;} - .auth, .cat { font-size:small; color:#202020; } - .cap { font-size:small; text-align:center; } - ''' - # https://www.business-standard.com/rss-feeds/listing feeds = [ ('Top Stories', 'https://www.business-standard.com/rss/home_page_top_stories.rss'), @@ -88,30 +96,69 @@ class BusinessStandard(BasicNewsRecipe): cat = subhead = lede = auth = caption = '' if 'defaultArticleCat' in data and data['defaultArticleCat'] is not None: - if 'h1_tag' in data['defaultArticleCat'] and data['defaultArticleCat']['h1_tag'] is not None: - cat = '
' + data['defaultArticleCat']['h1_tag'] + '
' + data['metaDescription'] + '
' self.art_desc = data['metaDescription'] - date = (datetime.fromtimestamp(int(data['publishDate']))).strftime('%b %d, %Y | %I:%M %p') + date = (datetime.fromtimestamp(int(data['publishDate']))).strftime( + '%b %d, %Y | %I:%M %p' + ) authors = [] if 'articleMappedMultipleAuthors' in data: for aut in data['articleMappedMultipleAuthors']: authors.append(data['articleMappedMultipleAuthors'][str(aut)]) - auth = '' + ', '.join(authors) + ' | ' + data['placeName'] + ' | ' + date + '
' + + ', '.join(authors) + + ' | ' + + data['placeName'] + + ' | ' + + date + + '
'.format(img_url)
else:
- lede = '
'.format(data['featuredImageObj']['url'])
+ lede = '
'.format(
+ data['featuredImageObj']['url']
+ )
if 'alt_text' in data['featuredImageObj']:
caption = '' + data['featuredImageObj']['alt_text'] + '
' + data['metaDescription'] + '
' self.art_desc = data['metaDescription'] - date = (datetime.fromtimestamp(int(data['publishDate']))).strftime('%b %d, %Y | %I:%M %p') + date = (datetime.fromtimestamp(int(data['publishDate']))).strftime( + '%b %d, %Y | %I:%M %p' + ) authors = [] if 'articleMappedMultipleAuthors' in data: for aut in data['articleMappedMultipleAuthors']: authors.append(data['articleMappedMultipleAuthors'][str(aut)]) - auth = '' + ', '.join(authors) + ' | ' + data['placeName'] + ' | ' + date + '
' + + ', '.join(authors) + + ' | ' + + data['placeName'] + + ' | ' + + date + + '
'.format(img_url)
else:
- lede = '
'.format(data['featuredImageObj']['url'])
+ lede = '
'.format(
+ data['featuredImageObj']['url']
+ )
if 'alt_text' in data['featuredImageObj']:
caption = '' + data['featuredImageObj']['alt_text'] + '