__license__ = 'GPL v3' __copyright__ = '2013, Armin Geller' ## # Written: May 2013 (new coding) # Version: 4.4 # Last update: 2020-12-29 ## ''' Fetch WirtschaftsWoche Online ''' import re from calibre.web.feeds.news import BasicNewsRecipe class WirtschaftsWocheOnline(BasicNewsRecipe): title = u'WirtschaftsWoche Online' __author__ = 'Armin Geller' # Update AGE 2013-01-05; 2018-03-01 description = u'German Online Portal of WirtschaftsWoche' publisher = 'Verlagsgruppe Handelsblatt GmbH Redaktion WirtschaftsWoche Online' category = 'business, economy, news, Germany' publication_type = 'weekly magazine' oldest_article = 7 max_articles_per_feed = 100 auto_cleanup = False no_stylesheets = True remove_javascript = True remove_empty_feeds = True timefmt = ' [%a, %d %b %Y]' language = 'de_DE' encoding = 'UTF-8' cover_source = 'https://www.ikiosk.de/shop/epaper/wirtschaftswoche.html' masthead_url = 'http://www.wiwo.de/images/wiwo_logo/5748610/1-formatOriginal.png' def get_cover_url(self): cover_source_soup = self.index_to_soup(self.cover_source) preview_image_div = cover_source_soup.find(attrs={'class': 'gallery'}) return preview_image_div.a.img['src'] extra_css = ''' h1, h2 {font-size: 1.6em; text-align: left} .c-leadtext {font-size: 1em; font-style: italic; font-weight: normal} h4 {font-size: 1.3em;text-align: left} h5, h6, a {font-size: 1em;text-align: left} .c-metadata {font-size: .75em;text-align: left; font-style: italic} ''' keep_only_tags = [ dict( name='div', attrs={ 'class': [ 'o-article__element', 'o-article__content c-richText', 'o-article__content-element o-article__content-element--richtext' ] } ) ] remove_tags = [ dict( name='div', attrs={ 'class': [ 'c-pagination u-flex', 'c-standard-article-teaser', 'c-pagination u-flex ajaxify', 'c-socialshare u-margin-xxl ', 'c-list', # AGe 2020-12-29 ] } ) ] feeds = [ (u'Schlagzeilen', u'http://www.wiwo.de/contentexport/feed/rss/schlagzeilen'), (u'Exklusiv', u'http://www.wiwo.de/contentexport/feed/rss/exklusiv'), # (u'Themen', u'http://www.wiwo.de/contentexport/feed/rss/themen'), # AGE no print version (u'Unternehmen', u'http://www.wiwo.de/contentexport/feed/rss/unternehmen'), (u'Finanzen', u'http://www.wiwo.de/contentexport/feed/rss/finanzen'), (u'Politik', u'http://www.wiwo.de/contentexport/feed/rss/politik'), (u'Erfolg', u'http://www.wiwo.de/contentexport/feed/rss/erfolg'), (u'Technologie', u'http://www.wiwo.de/contentexport/feed/rss/technologie'), # (u'Green-WiWo', u'http://green.wiwo.de/feed/rss/') # AGE offline ] # For hegi # AGE new 2018-03-21 # Add ': ' between headline part one and two # Wandel kostet Milliarden + ': ' + SUV und China sollen Audi wieder nach vorne bringen # https://www.wiwo.de/unternehmen/auto/wandel-kostet-milliarden-suv-und-china-sollen-audi-wieder-nach-vorne-bringen/21069566.html preprocess_regexps = [ ( re.compile( r'(c-overline--article">[^>]*)()', re.DOTALL | re.IGNORECASE ), lambda match: match.group(1) + ': ' + match.group(2) ), ] # /For hegi # one page n times url: https://www.wiwo.de/finanzen/geldanlage/bla-bla/21020646.html # all in one page article url: https://www.wiwo.de/finanzen/geldanlage/bla-bla/21020646-all.html def print_version(self, url): main, sep, rest = url.rpartition('.') return main + '-all' + sep + rest