mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 10:44:09 -04:00
Update Saechsische Zeitung
This commit is contained in:
parent
c466c04db4
commit
eacbbc1f88
@ -1,22 +1,21 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
##
|
##
|
||||||
# Written: March 2020
|
## Written: March 2020
|
||||||
# Version: 1.0
|
## Version: 1.1
|
||||||
# Last update: 2020-03-27
|
## Last update: 2023-03-31
|
||||||
##
|
##
|
||||||
from __future__ import unicode_literals, division, absolute_import, print_function
|
from __future__ import unicode_literals, division, absolute_import, print_function
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Fetch RSS-Feeds from saechsische.de
|
Fetch RSS-Feeds from saechsische.de
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
def classes(classes):
|
def classes(classes):
|
||||||
q = frozenset(classes.split(' '))
|
q = frozenset(classes.split(' '))
|
||||||
return dict(
|
return dict(attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||||
attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)}
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class Saechsische(BasicNewsRecipe):
|
class Saechsische(BasicNewsRecipe):
|
||||||
@ -32,45 +31,95 @@ class Saechsische(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
|
compress_news_images = True
|
||||||
|
compress_news_images_auto_size = 8
|
||||||
scale_news_images_to_device = True
|
scale_news_images_to_device = True
|
||||||
|
delay = 1
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
ignore_duplicate_articles = {'title', 'url'}
|
||||||
cover_url = 'https://www.saechsische.de/img/logo.svg'
|
|
||||||
|
cover_url = 'https://www.saechsische.de/img/logo.svg'
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('Dresden', 'feed://www.saechsische.de/rss/dresden'),
|
('Dresden', 'feed://www.saechsische.de/rss/dresden'),
|
||||||
('Sachsen', 'feed://saechsische.de/rss/sachsen'),
|
('Sachsen', 'feed://saechsische.de/rss/sachsen'),
|
||||||
('Dynamo', 'feed://www.saechsische.de/rss/dynamo'),
|
('Deutschland und Welt', 'feed://www.saechsische.de/rss/deutschland-welt'),
|
||||||
('Politik', 'feed://www.saechsische.de/rss/politik'),
|
('Politik', 'feed://www.saechsische.de/rss/politik'),
|
||||||
('Wirtschaft', 'feed://www.saechsische.de/rss/wirtschaft'),
|
('Wirtschaft', 'feed://www.saechsische.de/rss/wirtschaft'),
|
||||||
('Feuilleton', 'feed://www.saechsische.de/rss/feuilleton'),
|
('Feuilleton', 'feed://www.saechsische.de/rss/feuilleton'),
|
||||||
('Sport', 'feed://www.saechsische.de/rss/sport'),
|
('Sport', 'feed://www.saechsische.de/rss/sport'),
|
||||||
('Deutschland und Welt', 'feed://www.saechsische.de/rss/deutschland-welt'),
|
#('Dynamo', 'feed://www.saechsische.de/rss/dynamo'),
|
||||||
# ('Bautzen', 'feed://www.saechsische.de/rss/bautzen'),
|
#('Bautzen', 'feed://www.saechsische.de/rss/bautzen'),
|
||||||
# ('Bischofswerda', 'feed://www.saechsische.de/rss/bischofswerda'),
|
#('Bischofswerda', 'feed://www.saechsische.de/rss/bischofswerda'),
|
||||||
# ('Dippoldiswalde', 'feed://www.saechsische.de/rss/dippoldiswalde'),
|
#('Dippoldiswalde', 'feed://www.saechsische.de/rss/dippoldiswalde'),
|
||||||
# ('Döbeln', 'feed://www.saechsische.de/rss/doebeln'),
|
#('Döbeln', 'feed://www.saechsische.de/rss/doebeln'),
|
||||||
# ('Freital', 'feed://www.saechsische.de/rss/freital'),
|
#('Freital', 'feed://www.saechsische.de/rss/freital'),
|
||||||
# ('Großenhain', 'feed://www.saechsische.de/rss/grossenhain'),
|
#('Großenhain', 'feed://www.saechsische.de/rss/grossenhain'),
|
||||||
# ('Görlitz', 'feed://www.saechsische.de/rss/goerlitz'),
|
#('Görlitz', 'feed://www.saechsische.de/rss/goerlitz'),
|
||||||
# ('Kamenz', 'feed://www.saechsische.de/rss/kamenz'),
|
#('Kamenz', 'feed://www.saechsische.de/rss/kamenz'),
|
||||||
# ('Löbau', 'feed://www.saechsische.de/rss/loebau'),
|
#('Löbau', 'feed://www.saechsische.de/rss/loebau'),
|
||||||
# ('Meißen', 'feed://www.saechsische.de/rss/meissen'),
|
#('Meißen', 'feed://www.saechsische.de/rss/meissen'),
|
||||||
# ('Niesky', 'feed://www.saechsische.de/rss/niesky'),
|
#('Niesky', 'feed://www.saechsische.de/rss/niesky'),
|
||||||
# ('Pirna', 'feed://www.saechsische.de/rss/pirna'),
|
#('Pirna', 'feed://www.saechsische.de/rss/pirna'),
|
||||||
# ('Radeberg', 'feed://www.saechsische.de/rss/radeberg'),
|
#('Radeberg', 'feed://www.saechsische.de/rss/radeberg'),
|
||||||
# ('Radebeul', 'feed://www.saechsische.de/rss/radebeul'),
|
#('Radebeul', 'feed://www.saechsische.de/rss/radebeul'),
|
||||||
# ('Riesa', 'feed://www.saechsische.de/rss/riesa'),
|
#('Riesa', 'feed://www.saechsische.de/rss/riesa'),
|
||||||
# ('Sebnitz', 'feed://www.saechsische.de/rss/sebnitz'),
|
#('Sebnitz', 'feed://www.saechsische.de/rss/sebnitz'),
|
||||||
# ('Zittau', 'feed://www.saechsische.de/rss/zittau'),
|
#('Zittau', 'feed://www.saechsische.de/rss/zittau'),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
template_css = '''
|
||||||
|
.article_date { color: gray; font-family: monospace;}
|
||||||
|
.article_description { text-indent: 0pt; }
|
||||||
|
a.article { font-weight: bold; text-align:left; }
|
||||||
|
a.feed { font-weight: bold; }
|
||||||
|
.calibre_navbar { font-size: 200% !important; }
|
||||||
|
'''
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h2 {margin-top: 0em;}
|
||||||
|
'''
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='article', attrs={'class': 'article-detail'}),
|
dict(name='article', attrs={'class':'article-detail'}),
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [ classes('article-fill'),
|
||||||
classes('article-fill'),
|
dict(name='div', attrs={'class':'related-articles'}),
|
||||||
dict(name='div', attrs={'class': 'article-related-container'}),
|
dict(name='a', attrs={'class':'article-remember-link'}),
|
||||||
dict(name='div', attrs={'id': 'article-header'}),
|
dict(name='a', attrs={'href':'https://www.saechsische.de/dresden'}),
|
||||||
dict(name='span', attrs={'class': 'article-plus'}),
|
dict(name='a', attrs={'href':'https://www.saechsische.de/content/newsletter-lp?utm_content=dresden_kompakt'}),
|
||||||
]
|
dict(name='div', attrs={'class':'article-detail-socials'}),
|
||||||
|
dict(name='div', attrs={'class':'d-desktop-none'}),
|
||||||
|
dict(name='div', attrs={'class':'floating-share-icon'}),
|
||||||
|
]
|
||||||
|
|
||||||
|
def parse_feeds(self):
|
||||||
|
# Call parent's method.
|
||||||
|
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||||
|
# Loop through all feeds.
|
||||||
|
for feed in feeds:
|
||||||
|
# Loop through all articles in feed.
|
||||||
|
for article in feed.articles[:]:
|
||||||
|
# Remove articles with '...' in the url.
|
||||||
|
if '/anzeige/' in article.url:
|
||||||
|
print('Removing:',article.title)
|
||||||
|
feed.articles.remove(article)
|
||||||
|
elif 'newsletter-dresden' in article.url:
|
||||||
|
print('Removing:',article.title)
|
||||||
|
feed.articles.remove(article)
|
||||||
|
# Remove articles with '...' in the title.
|
||||||
|
elif 'Newsblog' in article.title:
|
||||||
|
print('Removing:',article.title)
|
||||||
|
feed.articles.remove(article)
|
||||||
|
elif 'Podcast' in article.title:
|
||||||
|
print('Removing:',article.title)
|
||||||
|
feed.articles.remove(article)
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
def preprocess_raw_html(self, raw, url):
|
||||||
|
# remove Newsblogs, articles requiring login and advertisements
|
||||||
|
unwanted_article_keywords = ['unser Newsblog', 'Zum Login', '00:00 Uhr',]
|
||||||
|
for keyword in unwanted_article_keywords:
|
||||||
|
if keyword in raw:
|
||||||
|
print('Skipping unwanted article with keyword(s):',keyword)
|
||||||
|
self.abort_article('Skipping unwanted article')
|
||||||
|
return raw
|
||||||
|
Loading…
x
Reference in New Issue
Block a user