#!/usr/bin/env python # vim:fileencoding=utf-8 __license__ = 'GPL v3' __copyright__ = '2013, Armin Geller' ## # Written: May 2013 (new coding) # Version: 5.1 # Last update: 2022-08-07 ## ## ''' Fetch WirtschaftsWoche Online ''' from calibre.web.feeds.news import BasicNewsRecipe class WirtschaftsWocheOnline(BasicNewsRecipe): title = u'WirtschaftsWoche Online' __author__ = 'Armin Geller' # Update AGE 2013-01-05, 2018-03-01, 2022-08-05 description = u'German Online Portal of WirtschaftsWoche' publisher = 'Verlagsgruppe Handelsblatt GmbH Redaktion WirtschaftsWoche Online' category = 'business, economy, news, Germany' publication_type = 'weekly magazine' oldest_article = 7 max_articles_per_feed = 100 auto_cleanup = False no_stylesheets = True remove_javascript = True remove_empty_feeds = True timefmt = ' [%a, %d %b %Y]' language = 'de' encoding = 'UTF-8' cover_source = 'https://www.ikiosk.de/shop/epaper/wirtschaftswoche.html' masthead_url = 'http://www.wiwo.de/images/wiwo_logo/5748610/1-formatOriginal.png' recipe_specific_options = { 'days': { 'short': 'Oldest article to download from this news source. In days ', 'long': 'For example, 0.5, gives you articles from the past 12 hours', 'default': str(oldest_article) } } def __init__(self, *args, **kwargs): BasicNewsRecipe.__init__(self, *args, **kwargs) d = self.recipe_specific_options.get('days') if d and isinstance(d, str): self.oldest_article = float(d) def get_cover_url(self): cover_source_soup = self.index_to_soup(self.cover_source) preview_image_div = cover_source_soup.find(attrs={'class': 'gallery'}) return preview_image_div.a.img['src'] extra_css = ''' h1, h2 {font-size: 1.6em; text-align: left} .c-leadtext {font-size: 1em; font-style: italic; font-weight: normal} h4 {font-size: 1.3em;text-align: left} h5, h6, a {font-size: 1em;text-align: left} .c-metadata {font-size: .75em;text-align: left; font-style: italic} ''' keep_only_tags = [ dict(name='div', attrs={'data-macro': [ 'lead-media', ]}), dict( name='div', attrs={ 'class': [ 'o-article__element', 'o-article__content c-richText', ] } ), dict(name='div', attrs={'id': [ 'biga_wrapper', ]}) ] remove_tags = [ dict(name='source'), dict( name='div', attrs={ 'class': [ 'c-list', 'o-article__element u-margin-xxl u-font-s-md', 'c-advertisment__fullWidth c-advertisment--P4_desktop', 'c-advertisment__fullWidth c-advertisment--P3_mobile', 'c-advertisment__fullWidth c-advertisment--P2_mobile', 'c-advertisment__fullWidth c-advertisment--CT_contentteaser_mobile_1', 'c-teaser c-teaser-inline', 'c-teaser c-teaser-inline isPremiumTeaser', 'c-teaser c-teaser-inline c-teaser-inline--column', 'c-teaser c-teaser-inline c-teaser-inline--xl isPremiumTeaser', 'o-teaser-list js-tt o-artbox o-artbox--articleboxplus', 'o-article__content-element u-margin-xxl', 'o-article__content-element u-margin-xxl ajaxify', 'o-article__element o-article__element-block', ] } ) ] feeds = [ (u'Schlagzeilen', u'http://www.wiwo.de/contentexport/feed/rss/schlagzeilen'), (u'Exklusiv', u'http://www.wiwo.de/contentexport/feed/rss/exklusiv'), # (u'Themen', u'http://www.wiwo.de/contentexport/feed/rss/themen'), # AGE no print version (u'Unternehmen', u'http://www.wiwo.de/contentexport/feed/rss/unternehmen'), (u'Finanzen', u'http://www.wiwo.de/contentexport/feed/rss/finanzen'), (u'Politik', u'http://www.wiwo.de/contentexport/feed/rss/politik'), (u'Erfolg', u'http://www.wiwo.de/contentexport/feed/rss/erfolg'), (u'Technologie', u'http://www.wiwo.de/contentexport/feed/rss/technologie') ] def image_url_processor(self, baseurl, url): if url.startswith('/images/'): return 'https://www.wiwo.de' + url return url def preprocess_raw_html(self, raw, url): # Slideshow image source need some help to find all images soup = self.index_to_soup(raw) for img in soup.findAll('img', attrs={'data-lazy': True}): img['src'] = 'https://www.wiwo.de' + img['data-lazy'] return str(soup) def print_version(self, url): main, sep, rest = url.rpartition('.') return main + '-all' + sep + rest