mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
RedaktionsNetzwerk Deutschland by epubli
This commit is contained in:
parent
2c148fe3f5
commit
ed9f133e86
122
recipes/rnd.recipe
Normal file
122
recipes/rnd.recipe
Normal file
@ -0,0 +1,122 @@
|
||||
#!/usr/bin/env python
|
||||
##
|
||||
## Written: March 2020
|
||||
## Version: 1.2
|
||||
## Last update: 2024-09-26
|
||||
##
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
'''
|
||||
Fetch RSS-Feeds from saechsische.de
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
def prefixed_classes(classes):
|
||||
q = frozenset(classes.split(' '))
|
||||
|
||||
def matcher(x):
|
||||
if x:
|
||||
for candidate in frozenset(x.split()):
|
||||
for x in q:
|
||||
if candidate.startswith(x):
|
||||
return True
|
||||
return False
|
||||
return {'attrs': {'class': matcher}}
|
||||
|
||||
|
||||
class RND(BasicNewsRecipe):
|
||||
title = 'RedaktionsNetzwerk Deutschland'
|
||||
__author__ = 'epubli'
|
||||
description = 'RSS-Feeds von RedaktionsNetzwerk Deutschland (rnd.de)'
|
||||
publisher = 'RND'
|
||||
publication_type = 'newspaper'
|
||||
language = 'de'
|
||||
encoding = 'UTF-8'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 40
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
compress_news_images = True
|
||||
compress_news_images_auto_size = 8
|
||||
scale_news_images_to_device = True
|
||||
delay = 1
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
|
||||
cover_url = 'https://www.designtagebuch.de/wp-content/uploads/mediathek//2022/03/rnd-logo-bildmarke-1500x1125.jpg'
|
||||
|
||||
keep_only_tags = [
|
||||
prefixed_classes('ArticleHeadstyled__ArticleHeader- Articlestyled__ArticleBodyWrapper- ArticleImagestyled__ArticleImage-')
|
||||
]
|
||||
remove_tags = [
|
||||
prefixed_classes('CallToActionBasestyled__Container- ArticleDetailsstyled__ArticleDetailsToggleCheckbox-'
|
||||
' ArticleDetailsstyled__ArticleDetailsToggleButton- ArticleImagestyled__ArticleImageCheckbox-'
|
||||
' Adstyled__AdWrapper- MoreItemsBlockstyled__TitleWrapper- MoreItemsBlockstyled__MoreItemsBlock-'
|
||||
' ContentTeaserstyled__Article- Buttonstyled__Button-'),
|
||||
dict(name='source')
|
||||
]
|
||||
|
||||
feeds = [
|
||||
('Politik', 'https://www.rnd.de/arc/outboundfeeds/rss/category/politik/'),
|
||||
('Wirtschaft', 'https://www.rnd.de/arc/outboundfeeds/rss/category/wirtschaft/'),
|
||||
('Sport', 'https://www.rnd.de/arc/outboundfeeds/rss/category/sport/'),
|
||||
('Panorama', 'https://www.rnd.de/arc/outboundfeeds/rss/category/panorama/'),
|
||||
#('Promis', 'https://www.rnd.de/arc/outboundfeeds/rss/category/promis/'),
|
||||
#('Reise', 'https://www.rnd.de/arc/outboundfeeds/rss/category/reise/'),
|
||||
#('Medien', 'https://www.rnd.de/arc/outboundfeeds/rss/category/medien/'),
|
||||
#('Digital', 'https://www.rnd.de/arc/outboundfeeds/rss/category/digital/'),
|
||||
#('Kultur', 'https://www.rnd.de/arc/outboundfeeds/rss/category/kultur/'),
|
||||
#('Wissen', 'https://www.rnd.de/arc/outboundfeeds/rss/category/wissen/'),
|
||||
#('Familie', 'https://www.rnd.de/arc/outboundfeeds/rss/category/familie/'),
|
||||
#('Gesundheit', 'https://www.rnd.de/arc/outboundfeeds/rss/category/gesundheit/'),
|
||||
#('Lifestyle', 'https://www.rnd.de/arc/outboundfeeds/rss/category/lifestyle/'),
|
||||
#('Bauen & Wohnen', 'https://www.rnd.de/arc/outboundfeeds/rss/category/bauen-und-wohnen/'),
|
||||
#('Geld & Finanzen', 'https://www.rnd.de/arc/outboundfeeds/rss/category/geld-und-finanzen/'),
|
||||
#('Liebe & Partnerschaft', 'https://www.rnd.de/arc/outboundfeeds/rss/category/liebe-und-partnerschaft/'),
|
||||
#('Beruf & Bildung', 'https://www.rnd.de/arc/outboundfeeds/rss/category/beruf-und-bildung/'),
|
||||
#('E-Mobility', 'https://www.rnd.de/arc/outboundfeeds/rss/category/e-mobility/')
|
||||
]
|
||||
|
||||
|
||||
def parse_feeds(self):
|
||||
# Call parent's method.
|
||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||
# Loop through all feeds.
|
||||
for feed in feeds:
|
||||
# Loop through all articles in feed.
|
||||
for article in feed.articles[:]:
|
||||
# Remove articles with '...' in the url.
|
||||
if '/anzeige/' in article.url:
|
||||
print('Removing:',article.title)
|
||||
feed.articles.remove(article)
|
||||
# Remove articles with '...' in the title.
|
||||
elif 'Liveticker' in article.title:
|
||||
print('Removing:',article.title)
|
||||
feed.articles.remove(article)
|
||||
elif 'Liveblog' in article.title:
|
||||
print('Removing:',article.title)
|
||||
feed.articles.remove(article)
|
||||
elif 'Newsblog' in article.title:
|
||||
print('Removing:',article.title)
|
||||
feed.articles.remove(article)
|
||||
elif 'Podcast' in article.title:
|
||||
print('Removing:',article.title)
|
||||
feed.articles.remove(article)
|
||||
|
||||
return feeds
|
||||
|
||||
def preprocess_raw_html(self, raw, url):
|
||||
# remove articles requiring login and advertisements
|
||||
unwantedtag='ArticleHeadstyled__ArticleHeadPaidIconContainer'
|
||||
if unwantedtag in raw:
|
||||
print('Skipping unwanted article with tag:',unwantedtag)
|
||||
self.abort_article('Skipping unwanted article')
|
||||
|
||||
unwanted_article_keywords = ['Zum Login']
|
||||
for keyword in unwanted_article_keywords:
|
||||
if keyword in raw:
|
||||
print('Skipping unwanted article with keyword(s):',keyword)
|
||||
#self.abort_article('Skipping unwanted article')
|
||||
return raw
|
Loading…
x
Reference in New Issue
Block a user