#!/usr/bin/env python # vim:fileencoding=utf-8 from calibre.web.feeds.news import BasicNewsRecipe class Unian(BasicNewsRecipe): title = 'UNIAN' description = ('UNIAN (Ukrainian Independent News Agency of News) is the largest independent news agency,' " first in Ukraine, founded in 1993, remaining the leader among the country's news media," ' being the most cited source of news from across Ukraine.') __author__ = 'bugmen00t' publication_type = 'newspaper' oldest_article = 30 max_articles_per_feed = 100 language = 'en_UK' cover_url = 'https://www.unian.info/images/unian-512x512.png' auto_cleanup = False no_stylesheets = True remove_tags_before = dict(name='h1') remove_tags_after = dict(name='div', attrs={'class': 'article-text'}) remove_tags = [ dict(name='span', attrs={'class': 'article__info-item comments'}), dict(name='span', attrs={'class': 'article__info-item views'}), dict(name='div', attrs={'class': 'read-also-slider'}), dict(name='div', attrs={'class': 'nts-video-wrapper'}) ] feeds = [ (u'News Agency UNIAN', u'https://rss.unian.net/site/news_eng.rss') ] def preprocess_html(self, soup): for img in soup.findAll('img', attrs={'data-src': True}): img['src'] = img['data-src'] return soup