mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 18:24:30 -04:00
Update Bloomberg
This commit is contained in:
parent
fd41f2719e
commit
65c2ba4c31
@ -75,6 +75,7 @@ class Bloomberg(BasicNewsRecipe):
|
|||||||
m = re.search('data-component-props="FeatureBody">', raw)
|
m = re.search('data-component-props="FeatureBody">', raw)
|
||||||
if not m:
|
if not m:
|
||||||
return raw
|
return raw
|
||||||
|
|
||||||
raw = raw[m.start():]
|
raw = raw[m.start():]
|
||||||
raw = raw.split('>', 1)[1]
|
raw = raw.split('>', 1)[1]
|
||||||
data = json.JSONDecoder().raw_decode(raw)[0]
|
data = json.JSONDecoder().raw_decode(raw)[0]
|
||||||
@ -107,12 +108,17 @@ class Bloomberg(BasicNewsRecipe):
|
|||||||
if data['ledeDescription'] is not None:
|
if data['ledeDescription'] is not None:
|
||||||
caption = '<span id="cap">' + data['ledeDescription'] + '</span>'
|
caption = '<span id="cap">' + data['ledeDescription'] + '</span>'
|
||||||
|
|
||||||
body = data['body'].replace('\\n', '').replace('\\','')
|
body = data['body'].replace('\\', '')
|
||||||
|
|
||||||
html = '<html><body>' + cat + title + subhead + auth + lede + caption + '<div>' + body
|
html = '<html><body>' + cat + title + subhead + auth + lede + caption + '<div>' + body
|
||||||
return html
|
return html
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
|
for icon in soup.findAll('img', attrs={'class':'video-player__play-icon'}):
|
||||||
|
icon.decompose()
|
||||||
|
for div in soup.findAll('div', attrs={'class':'chart'}):
|
||||||
|
nos = div.find('noscript')
|
||||||
|
if nos:
|
||||||
|
nos.name = 'span'
|
||||||
for img in soup.findAll('img', attrs={'data-native-src':True}):
|
for img in soup.findAll('img', attrs={'data-native-src':True}):
|
||||||
if img['data-native-src'].__contains__('videos') is False:
|
if img['data-native-src'].__contains__('videos') is False:
|
||||||
img['src'] = img['data-native-src']
|
img['src'] = img['data-native-src']
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre import browser
|
from calibre import browser
|
||||||
from calibre.ptempfile import PersistentTemporaryFile
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
@ -16,10 +15,8 @@ class Bloomberg(BasicNewsRecipe):
|
|||||||
ignore_duplicate_articles = {'url'}
|
ignore_duplicate_articles = {'url'}
|
||||||
resolve_internal_links = True
|
resolve_internal_links = True
|
||||||
oldest_article = 2 # days
|
oldest_article = 2 # days
|
||||||
|
|
||||||
delay = 1.5
|
delay = 1.5
|
||||||
|
|
||||||
masthead_url = 'https://assets.bwbx.io/s3/javelin/public/hub/images/BW-Logo-Black-cc9035fbb3.svg'
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
#auth {font-size:small; font-weight:bold;}
|
#auth {font-size:small; font-weight:bold;}
|
||||||
#time {font-size:small;}
|
#time {font-size:small;}
|
||||||
@ -89,12 +86,17 @@ class Bloomberg(BasicNewsRecipe):
|
|||||||
if data['ledeDescription'] is not None:
|
if data['ledeDescription'] is not None:
|
||||||
caption = '<span id="cap">' + data['ledeDescription'] + '</span>'
|
caption = '<span id="cap">' + data['ledeDescription'] + '</span>'
|
||||||
|
|
||||||
body = data['body'].replace('\\n', '').replace('\\','')
|
body = data['body'].replace('\\', '')
|
||||||
|
|
||||||
html = '<html><body>' + cat + title + subhead + auth + lede + caption + '<div>' + body
|
html = '<html><body>' + cat + title + subhead + auth + lede + caption + '<div>' + body
|
||||||
return html
|
return html
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
|
for icon in soup.findAll('img', attrs={'class':'video-player__play-icon'}):
|
||||||
|
icon.decompose()
|
||||||
|
for div in soup.findAll('div', attrs={'class':'chart'}):
|
||||||
|
nos = div.find('noscript')
|
||||||
|
if nos:
|
||||||
|
nos.name = 'span'
|
||||||
for img in soup.findAll('img', attrs={'data-native-src':True}):
|
for img in soup.findAll('img', attrs={'data-native-src':True}):
|
||||||
if img['data-native-src'].__contains__('videos') is False:
|
if img['data-native-src'].__contains__('videos') is False:
|
||||||
img['src'] = img['data-native-src']
|
img['src'] = img['data-native-src']
|
||||||
|
Loading…
x
Reference in New Issue
Block a user