diff --git a/recipes/bloomberg.recipe b/recipes/bloomberg.recipe index a822d34fbb..8f510284cd 100644 --- a/recipes/bloomberg.recipe +++ b/recipes/bloomberg.recipe @@ -6,6 +6,45 @@ import json import random import time +def get_contents(x): + otype = x.get('type', '') + if otype == 'text': + return x.get('value', '') + elif otype == 'paragraph': + return '

' + x.get('value', '') + ''.join(map(get_contents, x.get('contents'))) + '

' + elif otype == 'heading': + return '

' + x.get('value', '') + ''.join(map(get_contents, x.get('contents'))) + '

' + elif otype == 'list': + return '' + elif otype == 'listItem': + return '
  • ' + x.get('value', '') + ''.join(map(get_contents, x.get('contents'))) + '
  • ' + elif otype == 'quote': + return '
    ' + x.get('value', '') + ''.join(map(get_contents, x.get('contents'))) + '
    ' + elif otype == 'media': + if x['subType'] == 'photo': + return '
    {}
    '.format( + x['data']['photo']['src'], x['data']['photo']['caption']) + elif x['subType'] == 'chart': + if x['data'] and x['data']['chart']: + return '
    '.format(x['data']['chart']['fallback']) + elif otype == 'link': + if x['data'] and x['content'] and x['content'][0] and x['content'][0]['value']: + if 'href' in x['data']: + return '' + x['content'][0]['value'] + '' + return '' + x['content'][0]['value'] + '' + elif otype == 'entity': + if x['content'] and x['content'][0] and x['content'][0]['value']: + if x['subType'] == 'story': + if x['data'] and x['data']['link'] and x['data']['link']['destination']: + if 'web' in x['data']['link']['destination']: + return '' + x['content'][0]['value'] + '' + return '' + x['content'][0]['value'] + '' + elif x['subType'] in ('person', 'security'): + return x['content'][0]['value'] + + return '' + + class Bloomberg(BasicNewsRecipe): title = u'Bloomberg' language = 'en' @@ -20,12 +59,11 @@ class Bloomberg(BasicNewsRecipe): simultaneous_downloads = 3 extra_css = ''' - #bold {font-weight:bold;} - #auth {font-size:small; font-weight:bold;} - #time, .chart {font-size:small;} - #subhead {font-style:italic; color:#404040;} - #cat {font-size:small; color:gray;} - .news-figure-caption-text, #cap, #img {font-size:small; text-align:center;} + .auth {font-size:small; font-weight:bold;} + .time, .chart {font-size:small;} + .subhead {font-style:italic; color:#404040;} + .cat {font-size:small; color:gray;} + .news-figure-caption-text, .cap, .img {font-size:small; text-align:center;} .news-figure-credit {font-size:small; text-align:center; color:#202020;} ''' @@ -89,27 +127,27 @@ class Bloomberg(BasicNewsRecipe): cat = subhead = lede = auth = caption = '' if 'primaryCategory' in data and data['primaryCategory'] is not None: - cat = '

    ' + data['primaryCategory'] + '

    ' + cat = '

    ' + data['primaryCategory'] + '

    ' if len(data['abstract']) != 0 and len(data['abstract']) == 2: - subhead = '

    ' + data['abstract'][0] + '

    ' + data['abstract'][1] + '

    ' + subhead = '

    ' + data['abstract'][0] + '

    ' + data['abstract'][1] + '

    ' else: if 'summary' in data: - subhead = '

    ' + data['summary'] + '

    ' + subhead = '

    ' + data['summary'] + '

    ' if 'byline' in data and data['byline'] is not None: - auth = '
    ' + data['byline']\ - + ' | ' + data['publishedAt'][:-14] + '
    ' + auth = '
    ' + data['byline']\ + + ' | ' + data['publishedAt'][:-14] + '
    ' if 'ledeImageUrl' in data and data['ledeImageUrl'] is not None: - lede = '

    '.format(data['ledeImageUrl']) + lede = '

    '.format(data['ledeImageUrl']) if 'ledeDescription' in data and data['ledeDescription'] is not None: - caption = '' + data['ledeDescription'] + '' + caption = '' + data['ledeDescription'] + '' else: if 'lede' in data and data['lede'] is not None: if 'alt' in data['lede'] and data['lede']['alt'] is not None: - caption = '' + data['lede']['alt'] + '' + caption = '' + data['lede']['alt'] + '' if m: time.sleep(3) @@ -117,142 +155,11 @@ class Bloomberg(BasicNewsRecipe): elif m2: body = '' body_data = data['body']['content'] - - for objects in body_data: - + for x in body_data: pause = random.choice((0.5, 1, 1.25)) time.sleep(pause) - - if objects['type'] == 'media' and objects['subType'] == 'photo': - body += '

    '.format(objects['data']['photo']['src']) - body += '' + objects['data']['photo']['caption'] + '

    ' - if objects['type'] == 'media' and objects['subType'] == 'chart': - if objects['data'] and objects['data']['chart']: - body += '

    '.format(objects['data']['chart']['fallback']) - - if objects['type'] == 'paragraph': - body += '

    ' - if 'value' in objects: - body += objects['value'] - - if 'content' not in objects: - continue - - for item in objects['content']: - - if item['type'] == 'text' and item['value']: - body += item['value'] - - if item['type'] == 'link' and item['data']: - if item['content'] and item['content'][0] and item['content'][0]['value']: - if 'href' in item['data']: - body += '' + item['content'][0]['value'] + '' - else: - body += '' + item['content'][0]['value'] + '' - - if item['type'] == 'entity': - if item['content'] and item['content'][0] and item['content'][0]['value']: - if item['subType'] == 'story': - if item['data'] and item['data']['link'] and item['data']['link']['destination']: - if 'web' in item['data']['link']['destination']: - body += '' + item['content'][0]['value'] + '' - else: - body += '' + item['content'][0]['value'] + '' - - elif item['subType'] == 'person' or 'security': - body += item['content'][0]['value'] - - if objects['type'] == 'heading': - body += '

    ' - if 'value' in objects: - body += objects['value'] - - if 'content' not in objects: - continue - - for item in objects['content']: - - if item['type'] == 'text' and item['value']: - body += item['value'] - - if item['type'] == 'link' and item['data']: - if item['content'] and item['content'][0] and item['content'][0]['value']: - if 'href' in item['data']: - body += '' + item['content'][0]['value'] + '' - else: - body += '' + item['content'][0]['value'] + '' - - if item['type'] == 'entity': - if item['content'] and item['content'][0] and item['content'][0]['value']: - if item['subType'] == 'story': - if item['data'] and item['data']['link'] and item['data']['link']['destination']: - if 'web' in item['data']['link']['destination']: - body += '' + item['content'][0]['value'] + '' - else: - body += '' + item['content'][0]['value'] + '' - - elif item['subType'] == 'person' or 'security': - body += item['content'][0]['value'] - - if objects['type'] == 'quote': - if 'value' in objects: - body +='

    ' + objects['value'] + '
    ' - if 'content' not in objects: - continue - for item in objects['content']: - if item['type'] == 'paragraph' and item['content'] and item['content'][0]: - if 'value' not in item['content'][0]: - continue - body += '
    ' + item['content'][0]['value'] + '
    ' - - if objects['type'] == 'list': - if 'content' not in objects: - continue - body += '' - - skip = ['ad', 'inline-newsletter', 'inline-recirc', 'tabularData', 'list', 'quote', 'heading', 'paragraph', 'media'] - if not any(x in objects['type'] for x in skip): - body += '

    ' - if 'value' in objects: - body += objects['value'] - if not 'content' in objects: - continue - for content in objects['content']: - if 'value' in content: - body += content['value'] - elif 'content' in content: - for cont1 in content['content']: - if 'value' in cont1: - body += cont1['value'] - elif 'content' in val_cont: - for cont2 in val_cont['content']: - if 'value' in cont2: - body += cont2['value'] - elif 'content' in cont2: - for cont3 in cont2['content']: - if 'value' in cont3: - body += cont3['value'] - elif 'content' in cont3: - for cont4 in cont3['content']: - if 'value' in cont4: - body += cont4['value'] - - html = '' + cat + title + subhead + auth + lede + caption + '

    ' + body - return html + body += get_contents(x) + return '' + cat + title + subhead + auth + lede + caption + '
    ' + body + '
    ' def preprocess_html(self, soup): for icon in soup.findAll('img', attrs={'class':'video-player__play-icon'}):