calibre/recipes/unian_net_en.recipe
2025-01-24 11:14:14 +01:00

38 lines
1.4 KiB
Python

#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe
class Unian(BasicNewsRecipe):
title = 'UNIAN'
description = ('UNIAN (Ukrainian Independent News Agency of News) is the largest independent news agency,'
" first in Ukraine, founded in 1993, remaining the leader among the country's news media,"
' being the most cited source of news from across Ukraine.')
__author__ = 'bugmen00t'
publication_type = 'newspaper'
oldest_article = 30
max_articles_per_feed = 100
language = 'en_UK'
cover_url = 'https://www.unian.info/images/unian-512x512.png'
auto_cleanup = False
no_stylesheets = True
remove_tags_before = dict(name='h1')
remove_tags_after = dict(name='div', attrs={'class': 'article-text'})
remove_tags = [
dict(name='span', attrs={'class': 'article__info-item comments'}),
dict(name='span', attrs={'class': 'article__info-item views'}),
dict(name='div', attrs={'class': 'read-also-slider'}),
dict(name='div', attrs={'class': 'nts-video-wrapper'})
]
feeds = [
(u'News Agency UNIAN', u'https://rss.unian.net/site/news_eng.rss')
]
def preprocess_html(self, soup):
for img in soup.findAll('img', attrs={'data-src': True}):
img['src'] = img['data-src']
return soup