#!/usr/bin/env python # vim:fileencoding=utf-8 from calibre.web.feeds.news import BasicNewsRecipe class TJournal(BasicNewsRecipe): title = u'TJournal' __author__ = 'bug_me_not (with fixes by bugmen00t)' description = 'TJournal: издание о медиа, технологиях и трендах' publisher = 'tjournal.ru' category = 'news' language = 'ru' no_stylesheets = False remove_javascript = True oldest_article = 30 max_articles_per_feed = 100 cover_url = 'https://tjournal.ru/static/build/tjournal.ru/images/search_logo.png' remove_tags_before = dict(name='div', attrs={'class': 'content-title"'}) remove_tags_after = dict( name='div', attrs={'class': 'content-footer content-footer--full l-island-a'} ) remove_tags = [ dict( name='div', attrs={'class': 'content-footer content-footer--full l-island-a'} ), dict(name='div', attrs={'air-module': 'module.distributionFloating'}), dict(name='span', attrs={'class': 'content-editorial-tick'}), dict(name='vue'), dict(name='div', attrs={'class': 'comments'}), dict(name='div', attrs={'class': 'propaganda'}), dict(name='div', attrs={'class': 'propaganda propaganda--with-footer'}), dict(name='div', attrs={'air-module': 'module.gallery'}), dict(name='div', attrs={'class': 'content-container'}), dict( name='div', attrs={'class': 'content-header__item content-header-number'} ), dict(name='span', attrs={'class': 'views__value'}), dict(name='span', attrs={'class': 'views__label'}) ] feeds = [( '\u041F\u043E\u043F\u0443\u043B\u044F\u0440\u043D\u043E\u0435', 'https://tjournal.ru/rss' ), ( '\u041D\u043E\u0432\u043E\u0441\u0442\u0438', 'https://tjournal.ru/rss/news' ), ('\u0421\u0432\u0435\u0436\u0435\u0435', 'https://tjournal.ru/rss/new'), ( '\u0422\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0438\u0438', 'https://tjournal.ru/rss/tech' ), ( '\u0420\u0430\u0437\u0431\u043E\u0440\u044B', 'https://tjournal.ru/rss/analysis' ), ( '\u0418\u043D\u0442\u0435\u0440\u043D\u0435\u0442', 'https://tjournal.ru/rss/internet' )] def preprocess_html(self, soup): for img in soup.findAll('img', attrs={'data-image-src': True}): img['src'] = img['data-image-src'] return soup