diff --git a/recipes/bloomberg.recipe b/recipes/bloomberg.recipe index e7d743d2e7..b9c69846aa 100644 --- a/recipes/bloomberg.recipe +++ b/recipes/bloomberg.recipe @@ -121,7 +121,11 @@ class Bloomberg(BasicNewsRecipe): data = json.loads(m2[0].text) data = data['props']['pageProps']['story'] - title = '

' + data['headline'] + '

' + art_url = data['url'] + if not art_url.startswith('http'): + art_url = 'https://www.bloomberg.com' + art_url + + title = '

'.format(art_url) + data['headline'] + '

' cat = subhead = lede = auth = caption = '' @@ -129,7 +133,7 @@ class Bloomberg(BasicNewsRecipe): cat = '

' + data['primaryCategory'] + '

' if len(data['abstract']) != 0 and len(data['abstract']) == 2: - subhead = '

' + data['abstract'][0] + '

' + data['abstract'][1] + '

' + subhead = '

' + data['abstract'][0] + '

' + data['abstract'][1] + '

' else: if 'summary' in data: subhead = '

' + data['summary'] + '

' @@ -175,3 +179,9 @@ class Bloomberg(BasicNewsRecipe): for img in soup.findAll('img', attrs={'src':lambda x: x and x.endswith(('-1x-1.jpg', '-1x-1.png'))}): img['src'] = img['src'].replace('-1x-1', '750x-1') return soup + + def populate_article_metadata(self, article, soup, first): + article.url = soup.find('h1')['title'] + article.summary = self.tag_to_string(soup.find('div', attrs={'class':'subhead'})) + article.text_summary = self.tag_to_string(soup.find('div', attrs={'class':'subhead'})) + article.title = article.title.replace(' - Bloomberg', '') diff --git a/recipes/business_standard.recipe b/recipes/business_standard.recipe index 7f2f0a0dc7..09f9420cee 100644 --- a/recipes/business_standard.recipe +++ b/recipes/business_standard.recipe @@ -26,15 +26,13 @@ class BusinessStandard(BasicNewsRecipe): ignore_duplicate_articles = {'title', 'url'} remove_empty_feeds = True resolve_internal_links = True - simultaneous_downloads = 1 + max_articles_per_feed = 20 extra_css = ''' + img {display:block; margin:0 auto;} .auth, .cat { font-size:small; color:#202020; } .cap { font-size:small; text-align:center; } ''' - - art_url = '' - art_desc = '' articles_are_obfuscated = True @@ -47,7 +45,6 @@ class BusinessStandard(BasicNewsRecipe): ] if any(x in link['href'] for x in skip_sections): self.abort_article('skipping video links ', link['href']) - self.art_url = link['href'] self.log('Found ', link['href']) html = br.open(link['href']).read() pt = PersistentTemporaryFile('.html') @@ -72,9 +69,16 @@ class BusinessStandard(BasicNewsRecipe): m = root.xpath('//script[@id="__NEXT_DATA__"]') data = json.loads(m[0].text) + + img_url = None + if 'articleImageUrl' in data['props']['pageProps']['articleSchema']: + img_url = data['props']['pageProps']['articleSchema']['articleImageUrl'] + + art_url = 'https://www.business-standard.com' + data['props']['pageProps']['url'] + data = data['props']['pageProps']['data'] - title = '

' + data['pageTitle'] + '

' + title = '

' + data['pageTitle'] + '

' cat = subhead = lede = auth = caption = '' @@ -92,7 +96,10 @@ class BusinessStandard(BasicNewsRecipe): auth = '

' + data['multiple_authors_name'] + ' | ' + data['placeName'] + ' | ' + date + '

' if data['featuredImageObj'] and 'url' in data['featuredImageObj']: - lede = '

'.format(data['featuredImageObj']['url']) + if img_url is not None: + lede = '

'.format(img_url) + else: + lede = '

'.format(data['featuredImageObj']['url']) if 'alt_text' in data['featuredImageObj']: caption = '' + data['featuredImageObj']['alt_text'] + '

' @@ -101,7 +108,7 @@ class BusinessStandard(BasicNewsRecipe): return '' + cat + title + subhead + auth + lede + caption + '

' + body + '
' def populate_article_metadata(self, article, soup, first): - article.url = self.art_url - article.summary = self.art_desc - article.text_summary = self.art_desc + article.url = soup.find('h1')['title'] + article.summary = self.tag_to_string(soup.find('h3')) + article.text_summary = self.tag_to_string(soup.find('h3')) article.title = article.title.replace(' - Business Standard', '')