diff --git a/recipes/business_standard.recipe b/recipes/business_standard.recipe
index c6b171d1ba..25af5a8b32 100644
--- a/recipes/business_standard.recipe
+++ b/recipes/business_standard.recipe
@@ -13,7 +13,7 @@ class BusinessStandard(BasicNewsRecipe):
no_stylesheets = True
remove_javascript = True
- remove_attributes = ['width', 'height', 'style']
+ remove_attributes = ['width', 'height', 'float', 'style']
def get_cover_url(self):
soup = self.index_to_soup('https://www.magzter.com/IN/Business-Standard-Private-Ltd/Business-Standard/Newspaper/')
@@ -67,7 +67,7 @@ class BusinessStandard(BasicNewsRecipe):
def preprocess_raw_html(self, raw, *a):
root = parse(raw)
m = root.xpath('//script[@id="__NEXT_DATA__"]')
-
+
data = json.loads(m[0].text)
img_url = None
@@ -75,7 +75,7 @@ class BusinessStandard(BasicNewsRecipe):
img_url = data['props']['pageProps']['articleSchema']['articleImageUrl']
art_url = 'https://www.business-standard.com' + data['props']['pageProps']['url']
-
+
data = data['props']['pageProps']['data']
title = '
'.format(art_url) + data['pageTitle'] + '
'
@@ -92,8 +92,11 @@ class BusinessStandard(BasicNewsRecipe):
date = (datetime.fromtimestamp(int(data['publishDate']))).strftime('%b %d, %Y | %I:%M %p')
- if 'multiple_authors_name' in data:
- auth = '' + data['multiple_authors_name'] + ' | ' + data['placeName'] + ' | ' + date + '
'
+ authors = []
+ if 'articleMappedMultipleAuthors' in data:
+ for aut in data['articleMappedMultipleAuthors']:
+ authors.append(data['articleMappedMultipleAuthors'][str(aut)])
+ auth = '' + ', '.join(authors) + ' | ' + data['placeName'] + ' | ' + date + '
'
if 'featuredImageObj' in data:
if 'url' in data['featuredImageObj']:
diff --git a/recipes/business_standard_print.recipe b/recipes/business_standard_print.recipe
index e54870fbcf..d648ce042f 100644
--- a/recipes/business_standard_print.recipe
+++ b/recipes/business_standard_print.recipe
@@ -23,7 +23,7 @@ class BusinessStandardPrint(BasicNewsRecipe):
no_stylesheets = True
remove_javascript = True
- remove_attributes = ['width', 'height', 'style']
+ remove_attributes = ['width', 'height', 'float', 'style']
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
@@ -43,6 +43,11 @@ class BusinessStandardPrint(BasicNewsRecipe):
.cap { font-size:small; text-align:center; }
'''
+ def get_cover_url(self):
+ soup = self.index_to_soup('https://www.magzter.com/IN/Business-Standard-Private-Ltd/Business-Standard/Newspaper/')
+ for citem in soup.findAll('meta', content=lambda s: s and s.endswith('view/3.jpg')):
+ return citem['content']
+
def parse_index(self):
if dt.weekday() == 6:
self.log.warn(
@@ -58,7 +63,6 @@ class BusinessStandardPrint(BasicNewsRecipe):
for section in data:
if section == 'EpaperImage':
- self.cover_url = data[section]['url']
continue
self.log(section)
articles = []
@@ -100,8 +104,11 @@ class BusinessStandardPrint(BasicNewsRecipe):
date = (datetime.fromtimestamp(int(data['publishDate']))).strftime('%b %d, %Y | %I:%M %p')
- if 'multiple_authors_name' in data:
- auth = '' + data['multiple_authors_name'] + ' | ' + data['placeName'] + ' | ' + date + '
'
+ authors = []
+ if 'articleMappedMultipleAuthors' in data:
+ for aut in data['articleMappedMultipleAuthors']:
+ authors.append(data['articleMappedMultipleAuthors'][str(aut)])
+ auth = '' + ', '.join(authors) + ' | ' + data['placeName'] + ' | ' + date + '
'
if 'featuredImageObj' in data:
if 'url' in data['featuredImageObj']: