From b02496e7d5de4376c62c0ee3b65f2fa2fea7a1e1 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Thu, 15 Aug 2024 09:58:33 +0530 Subject: [PATCH] =?UTF-8?q?Create=20S=C3=BCddeutsche=20Zeitung=20Magazin?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- recipes/icons/sz_magazin.png | Bin 0 -> 601 bytes recipes/sueddeutsche.recipe | 2 +- recipes/sz_magazin.recipe | 88 +++++++++++++++++++++++++++++++++++ 3 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 recipes/icons/sz_magazin.png create mode 100644 recipes/sz_magazin.recipe diff --git a/recipes/icons/sz_magazin.png b/recipes/icons/sz_magazin.png new file mode 100644 index 0000000000000000000000000000000000000000..ca965e1a9157d73b73de283952eab2c4b2821b86 GIT binary patch literal 601 zcmV-f0;c_mP)M;D9I+k%t3fHCUKN@DCVJ1e<_G7RAcQN(xIskTfA6 z7B*3PZz_w)s$euJoG7ls2GNj!ScvZKd)FfK;GrlU7-ry{^PMx_IbS$(Y}MRqU&8zy za~k{)8@I#}LdC)xJc}4}T8CjE=2T-&&*46th&la;okr}-E%{W;>5~zD!acPw7iwRQ z;`T;jh&gRwTKlpTbK0qWIfyma~r{v$zv4#hkurdoqNso7{|tP-0HA+LuwgAH}DbwC|k3*_hMQ^B8Vy z;&Hr=_1c%!5W=O_-c@{tZM@y?6V10=`?41yblv3sHi{X literal 0 HcmV?d00001 diff --git a/recipes/sueddeutsche.recipe b/recipes/sueddeutsche.recipe index 6c862c0cd9..9e854fd073 100644 --- a/recipes/sueddeutsche.recipe +++ b/recipes/sueddeutsche.recipe @@ -8,7 +8,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Sueddeutsche(BasicNewsRecipe): - title = 'Sueddeutsche Zeitung' + title = 'Süddeutsche Zeitung' __author__ = 'unkn0wn' oldest_article = 1.2 masthead_url = 'https://upload.wikimedia.org/wikipedia/commons/4/42/S%C3%BCddeutsche_Zeitung_Logo.svg' diff --git a/recipes/sz_magazin.recipe b/recipes/sz_magazin.recipe new file mode 100644 index 0000000000..8b5ddbe5d0 --- /dev/null +++ b/recipes/sz_magazin.recipe @@ -0,0 +1,88 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from calibre.web.feeds.news import BasicNewsRecipe, classes + +def absurl(url): + if url.startswith('/'): + return 'https://sz-magazin.sueddeutsche.de' + url + + +class szmagazin(BasicNewsRecipe): + title = 'Süddeutsche Zeitung Magazin' + __author__ = 'unkn0wn' + description = ( + 'Das SZ-Magazin ist ein Wochenendmagazin, das der Süddeutschen Zeitung (SZ) beiliegt. Es erscheint jeden Freitag und umfasst verschiedene' + 'Themenbereiche wie Politik, Kultur, Gesellschaft, Wissenschaft, Technologie, Reisen, Mode und Lifestyle.' + ) + masthead_url = 'https://upload.wikimedia.org/wikipedia/commons/8/89/SZ-Magazin_logo.svg' + encoding = 'utf-8' + language = 'de' + remove_attributes = ['style', 'height', 'width'] + ignore_duplicate_articles = {'url'} + resolve_internal_links = True + remove_empty_feeds = True + no_javascript = True + no_stylesheets = True + + extra_css = ''' + .figure__caption, .articleheader__author { font-size:small; } + .articleheader__subline { font-style:italic; color:#202020; } + blockquote, em { color: #202020; } + ''' + + recipe_specific_options = { + 'issue': { + 'short': 'The edition to download (YYYY/Number format)', + 'long': 'For example, 2024/31' + } + } + + keep_only_tags = [ + classes( + 'articleheader__headline articleheader__subline articleheader__author articleheader__mediasection articlemain__content' + ) + ] + remove_tags = [ + dict(name=['svg', 'button', 'meta']), + classes('articlemain__ad inline-teaser') + ] + + def parse_index(self): + d = self.recipe_specific_options.get('issue') + if d and isinstance(d, str): + issue = 'https://sz-magazin.sueddeutsche.de/heft/' + d + else: + url = 'https://sz-magazin.sueddeutsche.de/hefte' + soup = self.index_to_soup(url) + issue = absurl(soup.find(**classes('teaser--issue')).a['href']) + + soup = self.index_to_soup(issue) + self.cover_url = soup.find(**classes('issue-page-header__img'))['src'] + self.title = 'SZ Magazin ' + self.tag_to_string(soup.find(**classes('page-header__headline'))) + self.description = self.tag_to_string(soup.find(**classes('page-header__text'))) + self.timefmt = ' [' + self.tag_to_string(soup.find(**classes('issue-page-header-date'))) + ']' + + ans = [] + + for a in soup.findAll('a', attrs={'class':'link teaser__text-link'}): + title = self.tag_to_string(a.h5) + url = absurl(a['href']) + desc = self.tag_to_string(a.p) + self.log(' ', title, '\n\t', desc, '\n\t', url) + ans.append({'title': title, 'url': url, 'description': desc}) + return [('Articles', ans)] + + def print_version(self, url): + return 'https://och.to/unlock/' + url.split('?')[0] + + def preprocess_html(self, soup): + h2 = soup.find(**classes('articleheader__subline')) + if h2: + h2.name = 'p' + for bq in soup.findAll('blockquote'): + h2 = bq.find('h2') + if h2: + h2.name = 'h4' + for img in soup.findAll('img', attrs={'data-src':True}): + img['src'] = img['data-src'] + return soup