diff --git a/recipes/business_standard.recipe b/recipes/business_standard.recipe index c6b171d1ba..25af5a8b32 100644 --- a/recipes/business_standard.recipe +++ b/recipes/business_standard.recipe @@ -13,7 +13,7 @@ class BusinessStandard(BasicNewsRecipe): no_stylesheets = True remove_javascript = True - remove_attributes = ['width', 'height', 'style'] + remove_attributes = ['width', 'height', 'float', 'style'] def get_cover_url(self): soup = self.index_to_soup('https://www.magzter.com/IN/Business-Standard-Private-Ltd/Business-Standard/Newspaper/') @@ -67,7 +67,7 @@ class BusinessStandard(BasicNewsRecipe): def preprocess_raw_html(self, raw, *a): root = parse(raw) m = root.xpath('//script[@id="__NEXT_DATA__"]') - + data = json.loads(m[0].text) img_url = None @@ -75,7 +75,7 @@ class BusinessStandard(BasicNewsRecipe): img_url = data['props']['pageProps']['articleSchema']['articleImageUrl'] art_url = 'https://www.business-standard.com' + data['props']['pageProps']['url'] - + data = data['props']['pageProps']['data'] title = '

'.format(art_url) + data['pageTitle'] + '

' @@ -92,8 +92,11 @@ class BusinessStandard(BasicNewsRecipe): date = (datetime.fromtimestamp(int(data['publishDate']))).strftime('%b %d, %Y | %I:%M %p') - if 'multiple_authors_name' in data: - auth = '

' + data['multiple_authors_name'] + ' | ' + data['placeName'] + ' | ' + date + '

' + authors = [] + if 'articleMappedMultipleAuthors' in data: + for aut in data['articleMappedMultipleAuthors']: + authors.append(data['articleMappedMultipleAuthors'][str(aut)]) + auth = '

' + ', '.join(authors) + ' | ' + data['placeName'] + ' | ' + date + '

' if 'featuredImageObj' in data: if 'url' in data['featuredImageObj']: diff --git a/recipes/business_standard_print.recipe b/recipes/business_standard_print.recipe index e54870fbcf..d648ce042f 100644 --- a/recipes/business_standard_print.recipe +++ b/recipes/business_standard_print.recipe @@ -23,7 +23,7 @@ class BusinessStandardPrint(BasicNewsRecipe): no_stylesheets = True remove_javascript = True - remove_attributes = ['width', 'height', 'style'] + remove_attributes = ['width', 'height', 'float', 'style'] def __init__(self, *args, **kwargs): BasicNewsRecipe.__init__(self, *args, **kwargs) @@ -43,6 +43,11 @@ class BusinessStandardPrint(BasicNewsRecipe): .cap { font-size:small; text-align:center; } ''' + def get_cover_url(self): + soup = self.index_to_soup('https://www.magzter.com/IN/Business-Standard-Private-Ltd/Business-Standard/Newspaper/') + for citem in soup.findAll('meta', content=lambda s: s and s.endswith('view/3.jpg')): + return citem['content'] + def parse_index(self): if dt.weekday() == 6: self.log.warn( @@ -58,7 +63,6 @@ class BusinessStandardPrint(BasicNewsRecipe): for section in data: if section == 'EpaperImage': - self.cover_url = data[section]['url'] continue self.log(section) articles = [] @@ -100,8 +104,11 @@ class BusinessStandardPrint(BasicNewsRecipe): date = (datetime.fromtimestamp(int(data['publishDate']))).strftime('%b %d, %Y | %I:%M %p') - if 'multiple_authors_name' in data: - auth = '

' + data['multiple_authors_name'] + ' | ' + data['placeName'] + ' | ' + date + '

' + authors = [] + if 'articleMappedMultipleAuthors' in data: + for aut in data['articleMappedMultipleAuthors']: + authors.append(data['articleMappedMultipleAuthors'][str(aut)]) + auth = '

' + ', '.join(authors) + ' | ' + data['placeName'] + ' | ' + date + '

' if 'featuredImageObj' in data: if 'url' in data['featuredImageObj']: