diff --git a/recipes/bloomberg.recipe b/recipes/bloomberg.recipe
index e7d743d2e7..b9c69846aa 100644
--- a/recipes/bloomberg.recipe
+++ b/recipes/bloomberg.recipe
@@ -121,7 +121,11 @@ class Bloomberg(BasicNewsRecipe):
data = json.loads(m2[0].text)
data = data['props']['pageProps']['story']
- title = '
' + data['headline'] + '
'
+ art_url = data['url']
+ if not art_url.startswith('http'):
+ art_url = 'https://www.bloomberg.com' + art_url
+
+ title = ''.format(art_url) + data['headline'] + '
'
cat = subhead = lede = auth = caption = ''
@@ -129,7 +133,7 @@ class Bloomberg(BasicNewsRecipe):
cat = '' + data['primaryCategory'] + '
'
if len(data['abstract']) != 0 and len(data['abstract']) == 2:
- subhead = '' + data['abstract'][0] + '
' + data['abstract'][1] + '
'
+ subhead = '' + data['abstract'][0] + '
' + data['abstract'][1] + '
'
else:
if 'summary' in data:
subhead = ''
@@ -175,3 +179,9 @@ class Bloomberg(BasicNewsRecipe):
for img in soup.findAll('img', attrs={'src':lambda x: x and x.endswith(('-1x-1.jpg', '-1x-1.png'))}):
img['src'] = img['src'].replace('-1x-1', '750x-1')
return soup
+
+ def populate_article_metadata(self, article, soup, first):
+ article.url = soup.find('h1')['title']
+ article.summary = self.tag_to_string(soup.find('div', attrs={'class':'subhead'}))
+ article.text_summary = self.tag_to_string(soup.find('div', attrs={'class':'subhead'}))
+ article.title = article.title.replace(' - Bloomberg', '')
diff --git a/recipes/business_standard.recipe b/recipes/business_standard.recipe
index 7f2f0a0dc7..09f9420cee 100644
--- a/recipes/business_standard.recipe
+++ b/recipes/business_standard.recipe
@@ -26,15 +26,13 @@ class BusinessStandard(BasicNewsRecipe):
ignore_duplicate_articles = {'title', 'url'}
remove_empty_feeds = True
resolve_internal_links = True
- simultaneous_downloads = 1
+ max_articles_per_feed = 20
extra_css = '''
+ img {display:block; margin:0 auto;}
.auth, .cat { font-size:small; color:#202020; }
.cap { font-size:small; text-align:center; }
'''
-
- art_url = ''
- art_desc = ''
articles_are_obfuscated = True
@@ -47,7 +45,6 @@ class BusinessStandard(BasicNewsRecipe):
]
if any(x in link['href'] for x in skip_sections):
self.abort_article('skipping video links ', link['href'])
- self.art_url = link['href']
self.log('Found ', link['href'])
html = br.open(link['href']).read()
pt = PersistentTemporaryFile('.html')
@@ -72,9 +69,16 @@ class BusinessStandard(BasicNewsRecipe):
m = root.xpath('//script[@id="__NEXT_DATA__"]')
data = json.loads(m[0].text)
+
+ img_url = None
+ if 'articleImageUrl' in data['props']['pageProps']['articleSchema']:
+ img_url = data['props']['pageProps']['articleSchema']['articleImageUrl']
+
+ art_url = 'https://www.business-standard.com' + data['props']['pageProps']['url']
+
data = data['props']['pageProps']['data']
- title = '' + data['pageTitle'] + '
'
+ title = '' + data['pageTitle'] + '
'
cat = subhead = lede = auth = caption = ''
@@ -92,7 +96,10 @@ class BusinessStandard(BasicNewsRecipe):
auth = '' + data['multiple_authors_name'] + ' | ' + data['placeName'] + ' | ' + date + '
'
if data['featuredImageObj'] and 'url' in data['featuredImageObj']:
- lede = '
'.format(data['featuredImageObj']['url'])
+ if img_url is not None:
+ lede = '
'.format(img_url)
+ else:
+ lede = '
'.format(data['featuredImageObj']['url'])
if 'alt_text' in data['featuredImageObj']:
caption = '' + data['featuredImageObj']['alt_text'] + '
'
@@ -101,7 +108,7 @@ class BusinessStandard(BasicNewsRecipe):
return '' + cat + title + subhead + auth + lede + caption + ''
def populate_article_metadata(self, article, soup, first):
- article.url = self.art_url
- article.summary = self.art_desc
- article.text_summary = self.art_desc
+ article.url = soup.find('h1')['title']
+ article.summary = self.tag_to_string(soup.find('h3'))
+ article.text_summary = self.tag_to_string(soup.find('h3'))
article.title = article.title.replace(' - Business Standard', '')