From 65c2ba4c313c7a8516bca8d20bd4adc29a071080 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 23 Nov 2022 14:57:57 +0530 Subject: [PATCH] Update Bloomberg --- recipes/bloomberg-business-week.recipe | 10 ++++++++-- recipes/bloomberg.recipe | 12 +++++++----- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/recipes/bloomberg-business-week.recipe b/recipes/bloomberg-business-week.recipe index 8b65ad2da2..4374c75969 100644 --- a/recipes/bloomberg-business-week.recipe +++ b/recipes/bloomberg-business-week.recipe @@ -75,6 +75,7 @@ class Bloomberg(BasicNewsRecipe): m = re.search('data-component-props="FeatureBody">', raw) if not m: return raw + raw = raw[m.start():] raw = raw.split('>', 1)[1] data = json.JSONDecoder().raw_decode(raw)[0] @@ -107,12 +108,17 @@ class Bloomberg(BasicNewsRecipe): if data['ledeDescription'] is not None: caption = '' + data['ledeDescription'] + '' - body = data['body'].replace('\\n', '').replace('\\','') - + body = data['body'].replace('\\', '') html = '' + cat + title + subhead + auth + lede + caption + '
' + body return html def preprocess_html(self, soup): + for icon in soup.findAll('img', attrs={'class':'video-player__play-icon'}): + icon.decompose() + for div in soup.findAll('div', attrs={'class':'chart'}): + nos = div.find('noscript') + if nos: + nos.name = 'span' for img in soup.findAll('img', attrs={'data-native-src':True}): if img['data-native-src'].__contains__('videos') is False: img['src'] = img['data-native-src'] diff --git a/recipes/bloomberg.recipe b/recipes/bloomberg.recipe index 3440f179ef..28210d9baf 100644 --- a/recipes/bloomberg.recipe +++ b/recipes/bloomberg.recipe @@ -1,4 +1,3 @@ - from calibre.web.feeds.news import BasicNewsRecipe from calibre import browser from calibre.ptempfile import PersistentTemporaryFile @@ -16,10 +15,8 @@ class Bloomberg(BasicNewsRecipe): ignore_duplicate_articles = {'url'} resolve_internal_links = True oldest_article = 2 # days - delay = 1.5 - masthead_url = 'https://assets.bwbx.io/s3/javelin/public/hub/images/BW-Logo-Black-cc9035fbb3.svg' extra_css = ''' #auth {font-size:small; font-weight:bold;} #time {font-size:small;} @@ -89,12 +86,17 @@ class Bloomberg(BasicNewsRecipe): if data['ledeDescription'] is not None: caption = '' + data['ledeDescription'] + '' - body = data['body'].replace('\\n', '').replace('\\','') - + body = data['body'].replace('\\', '') html = '' + cat + title + subhead + auth + lede + caption + '
' + body return html def preprocess_html(self, soup): + for icon in soup.findAll('img', attrs={'class':'video-player__play-icon'}): + icon.decompose() + for div in soup.findAll('div', attrs={'class':'chart'}): + nos = div.find('noscript') + if nos: + nos.name = 'span' for img in soup.findAll('img', attrs={'data-native-src':True}): if img['data-native-src'].__contains__('videos') is False: img['src'] = img['data-native-src']