mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update spektrum.de
This commit is contained in:
parent
d9845f1f45
commit
9920446eb8
@ -1,9 +1,9 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# vim:fileencoding=utf-8
|
# vim:fileencoding=utf-8
|
||||||
##
|
##
|
||||||
# Written: October 2012 (new coding)
|
## Written: October 2012 (new coding)
|
||||||
# Version: 9.0
|
## Version: 10.0
|
||||||
# Last update: 2018-02-22
|
## Last update: 2025-01-15
|
||||||
##
|
##
|
||||||
|
|
||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
@ -31,7 +31,7 @@ class Spektrum(BasicNewsRecipe):
|
|||||||
description = u'German online portal of Spektrum der Wissenschaft'
|
description = u'German online portal of Spektrum der Wissenschaft'
|
||||||
publisher = 'Spektrum der Wissenschaft Verlagsgesellschaft mbH'
|
publisher = 'Spektrum der Wissenschaft Verlagsgesellschaft mbH'
|
||||||
category = 'science news, Germany'
|
category = 'science news, Germany'
|
||||||
oldest_article = 7
|
oldest_article = 3
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
@ -39,26 +39,18 @@ class Spektrum(BasicNewsRecipe):
|
|||||||
language = 'de'
|
language = 'de'
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
ignore_duplicate_articles = {'title'}
|
ignore_duplicate_articles = {'title'}
|
||||||
|
scale_news_images_to_device = True
|
||||||
|
compress_news_images = True
|
||||||
|
|
||||||
cover_url = 'https://www.spektrum.de/js_css/sde/assets/img/svg/sdw_dark.svg'
|
cover_url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/1/1d/Spektrum_der_Wissenschaft_Logo_seit_2016.svg/640px-Spektrum_der_Wissenschaft_Logo_seit_2016.svg.png'
|
||||||
masthead_url = 'http://www.spektrum.de/fm/861/spektrum.de.png'
|
masthead_url = 'http://www.spektrum.de/fm/861/spektrum.de.png'
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(
|
(u'Spektrum.de', u'http://www.spektrum.de/alias/rss/spektrum-de-rss-feed/996406'),
|
||||||
u'Spektrum.de',
|
|
||||||
u'http://www.spektrum.de/alias/rss/spektrum-de-rss-feed/996406'
|
|
||||||
),
|
|
||||||
# (u'Spektrum der Wissenschaft', u'http://www.spektrum.de/alias/rss/spektrum-der-wissenschaft-rss-feed/982623'),
|
|
||||||
# (u'Gehirn & Geist', u'http://www.spektrum.de/alias/rss/gehirn-geist-rss-feed/982626'),
|
|
||||||
(
|
|
||||||
u'Sterne und Weltraum',
|
|
||||||
u'http://www.spektrum.de/alias/rss/sterne-und-weltraum-rss-feed/865248'
|
|
||||||
),
|
|
||||||
# (u'Meistgelesene Artikel',u'http://www.spektrum.de/alias/rss/spektrum-de-meistgelesene-artikel/1224665'), # AGe 2014-08-21 new
|
|
||||||
]
|
]
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='article', attrs={'class': 'content'}),
|
dict(name='article', attrs={'class':'content'}),classes('callout-box')
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
@ -71,6 +63,15 @@ class Spektrum(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def parse_feeds(self):
|
def parse_feeds(self):
|
||||||
|
unwanted_article_types = [
|
||||||
|
'podcast',
|
||||||
|
'video',
|
||||||
|
'raetsel',
|
||||||
|
'leseprobe',
|
||||||
|
# 'kolumne',
|
||||||
|
# 'rezension',
|
||||||
|
# 'news',
|
||||||
|
]
|
||||||
# Call parent's method.
|
# Call parent's method.
|
||||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||||
# Loop through all feeds.
|
# Loop through all feeds.
|
||||||
@ -79,16 +80,23 @@ class Spektrum(BasicNewsRecipe):
|
|||||||
for article in feed.articles[:]:
|
for article in feed.articles[:]:
|
||||||
if 'VIDEO' in article.title:
|
if 'VIDEO' in article.title:
|
||||||
feed.articles.remove(article)
|
feed.articles.remove(article)
|
||||||
# Remove articles with 'video','podcast' or 'rezension' in the url.
|
continue
|
||||||
elif 'podcast' in article.url:
|
# Remove articles with '..' in the url.
|
||||||
feed.articles.remove(article)
|
for keyword in unwanted_article_types:
|
||||||
elif 'video' in article.url:
|
if keyword in article.url:
|
||||||
feed.articles.remove(article)
|
|
||||||
elif 'rezension' in article.url:
|
|
||||||
feed.articles.remove(article)
|
feed.articles.remove(article)
|
||||||
|
continue
|
||||||
|
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
def preprocess_html(self, soup, *a):
|
def preprocess_html(self, soup):
|
||||||
for img in soup.findAll('img', attrs={'data-src': True}):
|
for noscript in soup.findAll('noscript'):
|
||||||
img['src'] = img['data-src']
|
noscript.name = 'div'
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
def preprocess_raw_html(self, raw, url):
|
||||||
|
# remove articles requiring login and advertisements
|
||||||
|
unwantedtag = 'content pw-premium'
|
||||||
|
if unwantedtag in raw:
|
||||||
|
self.abort_article('Skipping unwanted article with tag:' + unwantedtag)
|
||||||
|
return raw
|
||||||
|
Loading…
x
Reference in New Issue
Block a user