mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update Handelsblatt
This commit is contained in:
parent
ab6d8bbd9f
commit
882c6d5c51
@ -2,8 +2,6 @@
|
|||||||
# vim:fileencoding=utf-8
|
# vim:fileencoding=utf-8
|
||||||
# License: GPLv3 Copyright: 2016, Aimylios <aimylios at gmx.de>
|
# License: GPLv3 Copyright: 2016, Aimylios <aimylios at gmx.de>
|
||||||
|
|
||||||
from __future__ import unicode_literals, division, absolute_import, print_function
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
handelsblatt.com
|
handelsblatt.com
|
||||||
'''
|
'''
|
||||||
@ -63,18 +61,23 @@ class Handelsblatt(BasicNewsRecipe):
|
|||||||
'vhb-teaser vhb-type-video']}),
|
'vhb-teaser vhb-type-video']}),
|
||||||
dict(name='small', attrs={'class': ['vhb-credit']}),
|
dict(name='small', attrs={'class': ['vhb-credit']}),
|
||||||
dict(name='ul', attrs={'class': ['hcf-redaktion']}),
|
dict(name='ul', attrs={'class': ['hcf-redaktion']}),
|
||||||
dict(name='div', attrs={'class': ['white_content', 'fb-post',
|
dict(name='div', attrs={'class': ['dg_health', 'fb-post', 'header-bar',
|
||||||
'opinary-widget-wrapper', 'dg_health',
|
'lb_consent--placeholder',
|
||||||
|
'lb-item embed', 'lb-post-actions',
|
||||||
|
'mod--displaynone', 'white_content',
|
||||||
|
'opinary-widget-wrapper',
|
||||||
'vhb-article__content-element--shorttextgallery',
|
'vhb-article__content-element--shorttextgallery',
|
||||||
'vhb-hollow-area vhb-hollow-area--col-1']}),
|
'vhb-hollow-area vhb-hollow-area--col-1']}),
|
||||||
|
dict(name='div', attrs={'class': re.compile('stepstone')}),
|
||||||
dict(name='div', attrs={'class': re.compile('vhb-imagegallery')}),
|
dict(name='div', attrs={'class': re.compile('vhb-imagegallery')}),
|
||||||
dict(name='div', attrs={'id': ['highcharts_infografik']}),
|
dict(name='div', attrs={'id': ['highcharts_infografik']}),
|
||||||
dict(name='div', attrs={'id': re.compile('dax-sentiment')}),
|
dict(name='div', attrs={'id': re.compile('dax-sentiment')}),
|
||||||
dict(name=['div', 'section'], attrs={'class': re.compile('slider')}),
|
dict(name=['div', 'section'], attrs={'class': re.compile('slider')}),
|
||||||
dict(name='a', attrs={'class': ['twitter-follow-button']}),
|
dict(name='a', attrs={'class': ['twitter-follow-button']}),
|
||||||
|
dict(name='img', attrs={'class': ['highlight-icon', 'lb-author__avatar']}),
|
||||||
dict(name='img', attrs={'alt': re.compile('Handelsblatt Morning Briefing')}),
|
dict(name='img', attrs={'alt': re.compile('Handelsblatt Morning Briefing')}),
|
||||||
dict(name='img', attrs={'alt': re.compile('Kolumnenkabinet')}),
|
dict(name='img', attrs={'alt': re.compile('Kolumnenkabinet')}),
|
||||||
dict(name=['link', 'blockquote'])
|
dict(name=['blockquote', 'button', 'link'])
|
||||||
]
|
]
|
||||||
|
|
||||||
preprocess_regexps = [
|
preprocess_regexps = [
|
||||||
@ -97,6 +100,7 @@ class Handelsblatt(BasicNewsRecipe):
|
|||||||
.vhb-teaser-head {margin-top: 1em; margin-bottom: 1em} \
|
.vhb-teaser-head {margin-top: 1em; margin-bottom: 1em} \
|
||||||
.vhb-hollow-area--innercontent {font-size: 0.6em} \
|
.vhb-hollow-area--innercontent {font-size: 0.6em} \
|
||||||
.hcf-location-mark {font-weight: bold} \
|
.hcf-location-mark {font-weight: bold} \
|
||||||
|
.lb-post-header {margin-top: 1em} \
|
||||||
.panel-body p {margin-top: 0em}'
|
.panel-body p {margin-top: 0em}'
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
@ -135,9 +139,9 @@ class Handelsblatt(BasicNewsRecipe):
|
|||||||
for ul in row.findAll('ul'):
|
for ul in row.findAll('ul'):
|
||||||
entry = ''
|
entry = ''
|
||||||
for li in ul.findAll(lambda tag: tag.name == 'li' and not tag.attrs):
|
for li in ul.findAll(lambda tag: tag.name == 'li' and not tag.attrs):
|
||||||
entry = entry + self.tag_to_string(li).strip() + ', '
|
entry += self.tag_to_string(li).strip() + ', '
|
||||||
for li in ul.findAll(lambda tag: tag.name == 'li' and tag.attrs):
|
for li in ul.findAll(lambda tag: tag.name == 'li' and tag.attrs):
|
||||||
entry = entry + self.tag_to_string(li)
|
entry += self.tag_to_string(li)
|
||||||
ul.parent.replaceWith(entry)
|
ul.parent.replaceWith(entry)
|
||||||
# remove all local hyperlinks
|
# remove all local hyperlinks
|
||||||
for a in soup.findAll('a', {'href': True}):
|
for a in soup.findAll('a', {'href': True}):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user