Update Bloomberg

This commit is contained in:
Kovid Goyal 2022-11-23 14:57:57 +05:30
parent fd41f2719e
commit 65c2ba4c31
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 15 additions and 7 deletions

View File

@ -75,6 +75,7 @@ class Bloomberg(BasicNewsRecipe):
m = re.search('data-component-props="FeatureBody">', raw) m = re.search('data-component-props="FeatureBody">', raw)
if not m: if not m:
return raw return raw
raw = raw[m.start():] raw = raw[m.start():]
raw = raw.split('>', 1)[1] raw = raw.split('>', 1)[1]
data = json.JSONDecoder().raw_decode(raw)[0] data = json.JSONDecoder().raw_decode(raw)[0]
@ -107,12 +108,17 @@ class Bloomberg(BasicNewsRecipe):
if data['ledeDescription'] is not None: if data['ledeDescription'] is not None:
caption = '<span id="cap">' + data['ledeDescription'] + '</span>' caption = '<span id="cap">' + data['ledeDescription'] + '</span>'
body = data['body'].replace('\\n', '').replace('\\','') body = data['body'].replace('\\', '')
html = '<html><body>' + cat + title + subhead + auth + lede + caption + '<div>' + body html = '<html><body>' + cat + title + subhead + auth + lede + caption + '<div>' + body
return html return html
def preprocess_html(self, soup): def preprocess_html(self, soup):
for icon in soup.findAll('img', attrs={'class':'video-player__play-icon'}):
icon.decompose()
for div in soup.findAll('div', attrs={'class':'chart'}):
nos = div.find('noscript')
if nos:
nos.name = 'span'
for img in soup.findAll('img', attrs={'data-native-src':True}): for img in soup.findAll('img', attrs={'data-native-src':True}):
if img['data-native-src'].__contains__('videos') is False: if img['data-native-src'].__contains__('videos') is False:
img['src'] = img['data-native-src'] img['src'] = img['data-native-src']

View File

@ -1,4 +1,3 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre import browser from calibre import browser
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile
@ -16,10 +15,8 @@ class Bloomberg(BasicNewsRecipe):
ignore_duplicate_articles = {'url'} ignore_duplicate_articles = {'url'}
resolve_internal_links = True resolve_internal_links = True
oldest_article = 2 # days oldest_article = 2 # days
delay = 1.5 delay = 1.5
masthead_url = 'https://assets.bwbx.io/s3/javelin/public/hub/images/BW-Logo-Black-cc9035fbb3.svg'
extra_css = ''' extra_css = '''
#auth {font-size:small; font-weight:bold;} #auth {font-size:small; font-weight:bold;}
#time {font-size:small;} #time {font-size:small;}
@ -89,12 +86,17 @@ class Bloomberg(BasicNewsRecipe):
if data['ledeDescription'] is not None: if data['ledeDescription'] is not None:
caption = '<span id="cap">' + data['ledeDescription'] + '</span>' caption = '<span id="cap">' + data['ledeDescription'] + '</span>'
body = data['body'].replace('\\n', '').replace('\\','') body = data['body'].replace('\\', '')
html = '<html><body>' + cat + title + subhead + auth + lede + caption + '<div>' + body html = '<html><body>' + cat + title + subhead + auth + lede + caption + '<div>' + body
return html return html
def preprocess_html(self, soup): def preprocess_html(self, soup):
for icon in soup.findAll('img', attrs={'class':'video-player__play-icon'}):
icon.decompose()
for div in soup.findAll('div', attrs={'class':'chart'}):
nos = div.find('noscript')
if nos:
nos.name = 'span'
for img in soup.findAll('img', attrs={'data-native-src':True}): for img in soup.findAll('img', attrs={'data-native-src':True}):
if img['data-native-src'].__contains__('videos') is False: if img['data-native-src'].__contains__('videos') is False:
img['src'] = img['data-native-src'] img['src'] = img['data-native-src']