diff --git a/recipes/handelsblatt.recipe b/recipes/handelsblatt.recipe index 842fb5798c..90094cb276 100644 --- a/recipes/handelsblatt.recipe +++ b/recipes/handelsblatt.recipe @@ -2,8 +2,6 @@ # vim:fileencoding=utf-8 # License: GPLv3 Copyright: 2016, Aimylios -from __future__ import unicode_literals, division, absolute_import, print_function - ''' handelsblatt.com ''' @@ -63,18 +61,23 @@ class Handelsblatt(BasicNewsRecipe): 'vhb-teaser vhb-type-video']}), dict(name='small', attrs={'class': ['vhb-credit']}), dict(name='ul', attrs={'class': ['hcf-redaktion']}), - dict(name='div', attrs={'class': ['white_content', 'fb-post', - 'opinary-widget-wrapper', 'dg_health', + dict(name='div', attrs={'class': ['dg_health', 'fb-post', 'header-bar', + 'lb_consent--placeholder', + 'lb-item embed', 'lb-post-actions', + 'mod--displaynone', 'white_content', + 'opinary-widget-wrapper', 'vhb-article__content-element--shorttextgallery', 'vhb-hollow-area vhb-hollow-area--col-1']}), + dict(name='div', attrs={'class': re.compile('stepstone')}), dict(name='div', attrs={'class': re.compile('vhb-imagegallery')}), dict(name='div', attrs={'id': ['highcharts_infografik']}), dict(name='div', attrs={'id': re.compile('dax-sentiment')}), dict(name=['div', 'section'], attrs={'class': re.compile('slider')}), dict(name='a', attrs={'class': ['twitter-follow-button']}), + dict(name='img', attrs={'class': ['highlight-icon', 'lb-author__avatar']}), dict(name='img', attrs={'alt': re.compile('Handelsblatt Morning Briefing')}), dict(name='img', attrs={'alt': re.compile('Kolumnenkabinet')}), - dict(name=['link', 'blockquote']) + dict(name=['blockquote', 'button', 'link']) ] preprocess_regexps = [ @@ -97,6 +100,7 @@ class Handelsblatt(BasicNewsRecipe): .vhb-teaser-head {margin-top: 1em; margin-bottom: 1em} \ .vhb-hollow-area--innercontent {font-size: 0.6em} \ .hcf-location-mark {font-weight: bold} \ + .lb-post-header {margin-top: 1em} \ .panel-body p {margin-top: 0em}' def get_browser(self): @@ -135,9 +139,9 @@ class Handelsblatt(BasicNewsRecipe): for ul in row.findAll('ul'): entry = '' for li in ul.findAll(lambda tag: tag.name == 'li' and not tag.attrs): - entry = entry + self.tag_to_string(li).strip() + ', ' + entry += self.tag_to_string(li).strip() + ', ' for li in ul.findAll(lambda tag: tag.name == 'li' and tag.attrs): - entry = entry + self.tag_to_string(li) + entry += self.tag_to_string(li) ul.parent.replaceWith(entry) # remove all local hyperlinks for a in soup.findAll('a', {'href': True}):