mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
132 lines
5.9 KiB
Python
132 lines
5.9 KiB
Python
#!/usr/bin/env python
|
|
# vim:fileencoding=utf-8
|
|
# License: GPLv3 Copyright: 2023, Joel Davies <joeld.dev at gmail.com>
|
|
|
|
from calibre.web.feeds.news import BasicNewsRecipe
|
|
|
|
|
|
class DRNyheder(BasicNewsRecipe):
|
|
|
|
# Feeds are found here: https://www.dr.dk/nyheder/dr-nyheder-som-rss-feed
|
|
feeds = [
|
|
('Seneste nyt', 'https://www.dr.dk/nyheder/service/feeds/senestenyt'),
|
|
('Indland', 'https://www.dr.dk/nyheder/service/feeds/indland'),
|
|
('Udland', 'https://www.dr.dk/nyheder/service/feeds/udland'),
|
|
('Penge', 'https://www.dr.dk/nyheder/service/feeds/penge'),
|
|
('Politik', 'https://www.dr.dk/nyheder/service/feeds/politik'),
|
|
# ('Sporten', 'https://www.dr.dk/nyheder/service/feeds/sporten'),
|
|
# ('Seneste sport', 'https://www.dr.dk/nyheder/service/feeds/senestesport'),
|
|
('Viden', 'https://www.dr.dk/nyheder/service/feeds/viden'),
|
|
('Kultur', 'https://www.dr.dk/nyheder/service/feeds/kultur'),
|
|
('Musik', 'https://www.dr.dk/nyheder/service/feeds/musik'),
|
|
('Mit Liv', 'https://www.dr.dk/nyheder/service/feeds/mitliv'),
|
|
('Mad', 'https://www.dr.dk/nyheder/service/feeds/mad'),
|
|
('Vejret', 'https://www.dr.dk/nyheder/service/feeds/vejret'),
|
|
('Regionale', 'https://www.dr.dk/nyheder/service/feeds/regionale'),
|
|
('DR Hovedstadsområdet', 'https://www.dr.dk/nyheder/service/feeds/regionale/kbh'),
|
|
('DR Bornholm', 'https://www.dr.dk/nyheder/service/feeds/regionale/bornholm'),
|
|
('DR Syd og Sønderjylland', 'https://www.dr.dk/nyheder/service/feeds/regionale/syd'),
|
|
('DR Fyn', 'https://www.dr.dk/nyheder/service/feeds/regionale/fyn'),
|
|
('DR Midt- og Vestjylland', 'https://www.dr.dk/nyheder/service/feeds/regionale/vest'),
|
|
('DR Nordjylland', 'https://www.dr.dk/nyheder/service/feeds/regionale/nord'),
|
|
('DR Trekantområdet', 'https://www.dr.dk/nyheder/service/feeds/regionale/trekanten'),
|
|
('DR Sjælland', 'https://www.dr.dk/nyheder/service/feeds/regionale/sjaelland'),
|
|
('DR Østjylland', 'https://www.dr.dk/nyheder/service/feeds/regionale/oestjylland')
|
|
]
|
|
|
|
title = 'DR Nyheder'
|
|
__author__ = 'Joel Davies'
|
|
publisher = 'DR Nyheder'
|
|
description = 'Her finder du nyheder fra DR.'
|
|
category = 'news, politics, money, culture, sport, science, Denmark'
|
|
publication_type = 'newspaper'
|
|
encoding = 'utf8'
|
|
language = 'da'
|
|
oldest_article = 4 # 2 might be best
|
|
max_articles_per_feed = 50 # 100 better, this is just for testing
|
|
no_stylesheets = True
|
|
use_embedded_content = False
|
|
auto_cleanup = False
|
|
remove_empty_feeds = True
|
|
ignore_duplicate_articles = {'title', 'url'}
|
|
simultaneous_downloads = 20
|
|
compress_news_images = True
|
|
masthead_url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/1/18/DR_logo.svg/1024px-DR_logo.svg.png'
|
|
|
|
extra_css = '''
|
|
.dre-byline__contributions {
|
|
margin-bottom: 10px;
|
|
}
|
|
|
|
.dre-byline__contributions div {
|
|
display: inline;
|
|
}
|
|
|
|
.dre-byline__contribution + .dre-byline__contribution:before {
|
|
display: inline;
|
|
content: ", ";
|
|
}
|
|
|
|
.dre-standard-article__figure {
|
|
margin-bottom: 30px;
|
|
text-align: center;
|
|
}
|
|
|
|
.dre-picture {
|
|
margin-bottom: 10px;
|
|
}
|
|
|
|
.dre-picture__image {
|
|
max-width: 100%;
|
|
height: auto;
|
|
}
|
|
|
|
.dre-standard-article__figure-caption {
|
|
font-size: .85em;
|
|
color: #575757;
|
|
}
|
|
'''
|
|
|
|
# Skip articles with /stories/ URL as these are Instagram story-style interactive pieces that play videos
|
|
# Also DRTV as these are just links to the live TV channel
|
|
def preprocess_raw_html(self, raw_html, url):
|
|
if '/stories/' in url or '/drtv/' in url:
|
|
self.abort_article('Skipping unsupported article type')
|
|
return raw_html
|
|
|
|
# Generate cover from the first image on the dr.dk homepage
|
|
def get_cover_url(self):
|
|
cover_url = None
|
|
soup = self.index_to_soup('https://www.dr.dk/')
|
|
main_content = soup.find('ul', attrs={'class': 'dre-grid-layout'})
|
|
cover_item = main_content.find('img')
|
|
if cover_item:
|
|
cover_url = cover_item['src']
|
|
return cover_url
|
|
|
|
keep_only_tags = [
|
|
|
|
dict(name='h1', attrs={'class': 'dre-article-title__heading'}), # Title
|
|
dict(name='div', attrs={'class': 'dre-article-byline'}), # Author
|
|
dict(name='figure', attrs={'class': 'dre-standard-article__figure'}), # Comment out to remove images
|
|
dict(name='p', attrs={'class': 'dre-article-body-paragraph'}), # All body text of the article
|
|
dict(name='article', attrs={'itemtype': 'http://schema.org/NewsArticle'}),
|
|
# dict(name="h1", attrs={'class': 'hydra-latest-news-page-short-news__title'}),
|
|
# dict(name="p", attrs={'class': 'hydra-latest-news-page-short-news__paragraph'}),
|
|
# dict(name="div", attrs={'class': 'dre-speech'}),
|
|
# dict(name="div", attrs={'itemprop': 'author'})
|
|
]
|
|
|
|
remove_tags = [
|
|
dict(name='ol', attrs={'class': 'hydra-latest-news-page__list'}),
|
|
dict(name='div', attrs={'class': [
|
|
'hydra-latest-news-page-short-news__share', 'hydra-latest-news-page-short-news__a11y-container',
|
|
'hydra-latest-news-page-short-news__meta', 'hydra-latest-news-page-short-news__image-slider', 'dre-byline__dates']}),
|
|
dict(name='source'),
|
|
# dict(name='menu', attrs={'class': 'share'}),
|
|
# dict(name='menu', attrs={'class': 'dr-site-share-horizontal'}),
|
|
]
|
|
|
|
# Fixes images having the wrong aspect ratio
|
|
remove_attributes = ['width', 'height']
|