mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-09-29 15:31:08 -04:00
71 lines
2.6 KiB
Python
71 lines
2.6 KiB
Python
#!/usr/bin/env python
|
||
# vim:fileencoding=utf-8
|
||
|
||
from calibre.web.feeds.news import BasicNewsRecipe
|
||
|
||
|
||
class TJournal(BasicNewsRecipe):
|
||
title = u'TJournal'
|
||
__author__ = 'bug_me_not (with fixes by bugmen00t)'
|
||
description = 'TJournal: издание о медиа, технологиях и трендах'
|
||
publisher = 'tjournal.ru'
|
||
category = 'news'
|
||
language = 'ru'
|
||
no_stylesheets = False
|
||
remove_javascript = True
|
||
oldest_article = 30
|
||
max_articles_per_feed = 100
|
||
cover_url = 'https://tjournal.ru/static/build/tjournal.ru/images/search_logo.png'
|
||
|
||
remove_tags_before = dict(name='div', attrs={'class': 'content-title"'})
|
||
|
||
remove_tags_after = dict(
|
||
name='div',
|
||
attrs={'class': 'content-footer content-footer--full l-island-a'}
|
||
)
|
||
|
||
remove_tags = [
|
||
dict(
|
||
name='div',
|
||
attrs={'class': 'content-footer content-footer--full l-island-a'}
|
||
),
|
||
dict(name='div', attrs={'air-module': 'module.distributionFloating'}),
|
||
dict(name='span', attrs={'class': 'content-editorial-tick'}),
|
||
dict(name='vue'),
|
||
dict(name='div', attrs={'class': 'comments'}),
|
||
dict(name='div', attrs={'class': 'propaganda'}),
|
||
dict(name='div', attrs={'class': 'propaganda propaganda--with-footer'}),
|
||
dict(name='div', attrs={'air-module': 'module.gallery'}),
|
||
dict(name='div', attrs={'class': 'content-container'}),
|
||
dict(
|
||
name='div',
|
||
attrs={'class': 'content-header__item content-header-number'}
|
||
),
|
||
dict(name='span', attrs={'class': 'views__value'}),
|
||
dict(name='span', attrs={'class': 'views__label'})
|
||
]
|
||
|
||
feeds = [(
|
||
'\u041F\u043E\u043F\u0443\u043B\u044F\u0440\u043D\u043E\u0435',
|
||
'https://tjournal.ru/rss'
|
||
), (
|
||
'\u041D\u043E\u0432\u043E\u0441\u0442\u0438', 'https://tjournal.ru/rss/news'
|
||
), ('\u0421\u0432\u0435\u0436\u0435\u0435', 'https://tjournal.ru/rss/new'),
|
||
(
|
||
'\u0422\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0438\u0438',
|
||
'https://tjournal.ru/rss/tech'
|
||
),
|
||
(
|
||
'\u0420\u0430\u0437\u0431\u043E\u0440\u044B',
|
||
'https://tjournal.ru/rss/analysis'
|
||
),
|
||
(
|
||
'\u0418\u043D\u0442\u0435\u0440\u043D\u0435\u0442',
|
||
'https://tjournal.ru/rss/internet'
|
||
)]
|
||
|
||
def preprocess_html(self, soup):
|
||
for img in soup.findAll('img', attrs={'data-image-src': True}):
|
||
img['src'] = img['data-image-src']
|
||
return soup
|