mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
...
This commit is contained in:
parent
c419c58c97
commit
ee62e1b5ba
@ -121,7 +121,11 @@ class Bloomberg(BasicNewsRecipe):
|
||||
data = json.loads(m2[0].text)
|
||||
data = data['props']['pageProps']['story']
|
||||
|
||||
title = '<h1>' + data['headline'] + '</h1>'
|
||||
art_url = data['url']
|
||||
if not art_url.startswith('http'):
|
||||
art_url = 'https://www.bloomberg.com' + art_url
|
||||
|
||||
title = '<h1 title="{}">'.format(art_url) + data['headline'] + '</h1>'
|
||||
|
||||
cat = subhead = lede = auth = caption = ''
|
||||
|
||||
@ -129,7 +133,7 @@ class Bloomberg(BasicNewsRecipe):
|
||||
cat = '<p class="cat">' + data['primaryCategory'] + '</p>'
|
||||
|
||||
if len(data['abstract']) != 0 and len(data['abstract']) == 2:
|
||||
subhead = '<div class="subhead"><p>' + data['abstract'][0] + '</p><p>' + data['abstract'][1] + '</p></div>'
|
||||
subhead = '<div class="subhead"><p>' + data['abstract'][0] + ' </p><p>' + data['abstract'][1] + '</p></div>'
|
||||
else:
|
||||
if 'summary' in data:
|
||||
subhead = '<div class="subhead"><p>' + data['summary'] + '</p></div>'
|
||||
@ -175,3 +179,9 @@ class Bloomberg(BasicNewsRecipe):
|
||||
for img in soup.findAll('img', attrs={'src':lambda x: x and x.endswith(('-1x-1.jpg', '-1x-1.png'))}):
|
||||
img['src'] = img['src'].replace('-1x-1', '750x-1')
|
||||
return soup
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
article.url = soup.find('h1')['title']
|
||||
article.summary = self.tag_to_string(soup.find('div', attrs={'class':'subhead'}))
|
||||
article.text_summary = self.tag_to_string(soup.find('div', attrs={'class':'subhead'}))
|
||||
article.title = article.title.replace(' - Bloomberg', '')
|
||||
|
@ -26,15 +26,13 @@ class BusinessStandard(BasicNewsRecipe):
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
remove_empty_feeds = True
|
||||
resolve_internal_links = True
|
||||
simultaneous_downloads = 1
|
||||
max_articles_per_feed = 20
|
||||
|
||||
extra_css = '''
|
||||
img {display:block; margin:0 auto;}
|
||||
.auth, .cat { font-size:small; color:#202020; }
|
||||
.cap { font-size:small; text-align:center; }
|
||||
'''
|
||||
|
||||
art_url = ''
|
||||
art_desc = ''
|
||||
|
||||
articles_are_obfuscated = True
|
||||
|
||||
@ -47,7 +45,6 @@ class BusinessStandard(BasicNewsRecipe):
|
||||
]
|
||||
if any(x in link['href'] for x in skip_sections):
|
||||
self.abort_article('skipping video links ', link['href'])
|
||||
self.art_url = link['href']
|
||||
self.log('Found ', link['href'])
|
||||
html = br.open(link['href']).read()
|
||||
pt = PersistentTemporaryFile('.html')
|
||||
@ -72,9 +69,16 @@ class BusinessStandard(BasicNewsRecipe):
|
||||
m = root.xpath('//script[@id="__NEXT_DATA__"]')
|
||||
|
||||
data = json.loads(m[0].text)
|
||||
|
||||
img_url = None
|
||||
if 'articleImageUrl' in data['props']['pageProps']['articleSchema']:
|
||||
img_url = data['props']['pageProps']['articleSchema']['articleImageUrl']
|
||||
|
||||
art_url = 'https://www.business-standard.com' + data['props']['pageProps']['url']
|
||||
|
||||
data = data['props']['pageProps']['data']
|
||||
|
||||
title = '<h1>' + data['pageTitle'] + '</h1>'
|
||||
title = '<h1 title=art_url>' + data['pageTitle'] + '</h1>'
|
||||
|
||||
cat = subhead = lede = auth = caption = ''
|
||||
|
||||
@ -92,7 +96,10 @@ class BusinessStandard(BasicNewsRecipe):
|
||||
auth = '<div><p class="auth">' + data['multiple_authors_name'] + ' | ' + data['placeName'] + ' | ' + date + '</p></div>'
|
||||
|
||||
if data['featuredImageObj'] and 'url' in data['featuredImageObj']:
|
||||
lede = '<p class="cap"><img src="{}">'.format(data['featuredImageObj']['url'])
|
||||
if img_url is not None:
|
||||
lede = '<p class="cap"><img src="{}">'.format(img_url)
|
||||
else:
|
||||
lede = '<p class="cap"><img src="{}">'.format(data['featuredImageObj']['url'])
|
||||
if 'alt_text' in data['featuredImageObj']:
|
||||
caption = '<span>' + data['featuredImageObj']['alt_text'] + '</span></p>'
|
||||
|
||||
@ -101,7 +108,7 @@ class BusinessStandard(BasicNewsRecipe):
|
||||
return '<html><body>' + cat + title + subhead + auth + lede + caption + '<div><p></p>' + body + '</div></body></html>'
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
article.url = self.art_url
|
||||
article.summary = self.art_desc
|
||||
article.text_summary = self.art_desc
|
||||
article.url = soup.find('h1')['title']
|
||||
article.summary = self.tag_to_string(soup.find('h3'))
|
||||
article.text_summary = self.tag_to_string(soup.find('h3'))
|
||||
article.title = article.title.replace(' - Business Standard', '')
|
||||
|
Loading…
x
Reference in New Issue
Block a user