various russian news sources by bugmen00t

This commit is contained in:
Kovid Goyal 2022-07-22 12:26:01 +05:30
parent 2ae8b45852
commit 543ac61c1f
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
15 changed files with 282 additions and 0 deletions

32
recipes/bbc_ru.recipe Normal file
View File

@ -0,0 +1,32 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe
class BBC(BasicNewsRecipe):
title = u'BBC Russian'
description = u'\u0420\u0443\u0441\u0441\u043A\u0430\u044F \u0441\u043B\u0443\u0436\u0431\u0430 BBC'
__author__ = 'bugmen00t'
publication_type = 'newspaper'
oldest_article = 14
max_articles_per_feed = 50
language = 'ru'
cover_url = 'https://news.files.bbci.co.uk/ws/img/logos/og/russian.png'
auto_cleanup = False
no_stylesheets = True
remove_tags_before = dict(name='h1')
remove_tags_after = dict(name='main', attrs={'aria-hidden': 'true'})
remove_tags = [
dict(name='section', attrs={'role': 'region'}),
dict(name='footer'),
dict(name='aside')
]
feeds = [(
u'\u041D\u043E\u0432\u043E\u0441\u0442\u0438 BBC',
'https://feeds.bbci.co.uk/russian/rss.xml'
)]

42
recipes/cedar.recipe Normal file
View File

@ -0,0 +1,42 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe
class Cedar(BasicNewsRecipe):
title = u'\u041A\u0435\u0434\u0440'
description = u'\u041D\u0435\u0437\u0430\u0432\u0438\u0441\u0438\u043C\u043E\u0435 \u043C\u0435\u0434\u0438\u0430 \u043E\u0431 \u043E\u043A\u0440\u0443\u0436\u0430\u044E\u0449\u0435\u0439 \u0441\u0440\u0435\u0434\u0435' # noqa
__author__ = 'bugmen00t'
publication_type = 'blog'
oldest_article = 30
max_articles_per_feed = 20
language = 'ru'
cover_url = 'https://kedr.media/wp-content/themes/kedrmedia_gutenberg/assets/img/logo-bg.svg'
auto_cleanup = True
no_stylesheets = False
remove_tags_before = dict(name='div', attrs={'class': 'post-header'})
remove_tags_after = dict(name='div', attrs={'class': 'post-content'})
feeds = [(
u'\u0418\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u044F',
'https://kedr.media/category/research/feed'
),
(
u'\u0418\u0441\u0442\u043E\u0440\u0438\u0438',
'https://kedr.media/category/stories/feed'
),
(
u'\u041C\u043D\u0435\u043D\u0438\u044F',
'https://kedr.media/category/opinions/feed'
),
(
u'\u0418\u043D\u0442\u0435\u0440\u0432\u044C\u044E',
'https://kedr.media/category/interview/feed'
),
(
u'\u041E\u0431\u044A\u044F\u0441\u043D\u044F\u0435\u043C',
'https://kedr.media/category/explain/feed'
)]

View File

@ -0,0 +1,71 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe
class DeutscheWelle(BasicNewsRecipe):
title = u'Deutsche Welle \u043D\u0430 \u0440\u0443\u0441\u0441\u043A\u043E\u043C'
description = u'\u0420\u0443\u0441\u0441\u043A\u0430\u044F \u0440\u0435\u0434\u0430\u043A\u0446\u0438\u044F Deutsche Welle: \u043D\u043E\u0432\u043E\u0441\u0442\u0438, \u0430\u043D\u0430\u043B\u0438\u0442\u0438\u043A\u0430, \u043A\u043E\u043C\u043C\u0435\u043D\u0442\u0430\u0440\u0438\u0438 \u0438 \u0440\u0435\u043F\u043E\u0440\u0442\u0430\u0436\u0438 \u0438\u0437 \u0413\u0435\u0440\u043C\u0430\u043D\u0438\u0438 \u0438 \u0415\u0432\u0440\u043E\u043F\u044B, \u043D\u0435\u043C\u0435\u0446\u043A\u0438\u0439 \u0438 \u0435\u0432\u0440\u043E\u043F\u0435\u0439\u0441\u043A\u0438\u0439 \u0432\u0437\u0433\u043B\u044F\u0434 \u043D\u0430 \u0441\u043E\u0431\u044B\u0442\u0438\u044F \u0432 \u0420\u043E\u0441\u0441\u0438\u0438 \u0438 \u043C\u0438\u0440\u0435, \u043F\u0440\u0430\u043A\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0435 \u0441\u043E\u0432\u0435\u0442\u044B \u0434\u043B\u044F \u0442\u0443\u0440\u0438\u0441\u0442\u043E\u0432 \u0438 \u0442\u0435\u0445, \u043A\u0442\u043E \u0436\u0435\u043B\u0430\u0435\u0442 \u0443\u0447\u0438\u0442\u044C\u0441\u044F \u0438\u043B\u0438 \u0440\u0430\u0431\u043E\u0442\u0430\u0442\u044C \u0432 \u0413\u0435\u0440\u043C\u0430\u043D\u0438\u0438 \u0438 \u0434\u0440\u0443\u0433\u0438\u0445 \u0441\u0442\u0440\u0430\u043D\u0430\u0445 \u0415\u0432\u0440\u043E\u0441\u043E\u044E\u0437\u0430.' # noqa
__author__ = 'bugmen00t'
publication_type = 'newspaper'
oldest_article = 14
max_articles_per_feed = 100
language = 'ru'
cover_url = 'https://www.dw.com/cssi/dwlogo-print.gif'
auto_cleanup = False
no_stylesheets = False
remove_tags_before = dict(name='h1')
remove_tags_after = dict(name='div', attrs={'class': 'longText'})
feeds = [
(
u'\u0412\u0435\u0441\u044C \u0441\u0430\u0439\u0442',
'https://rss.dw.com/xml/rss-ru-all'
),
(
u'\u041D\u043E\u0432\u043E\u0441\u0442\u0438',
'http://rss.dw.de/xml/rss-ru-news'
),
(
u'\u041F\u043E\u043B\u0438\u0442\u0438\u043A\u0430 \u0438 \u043E\u0431\u0449\u0435\u0441\u0442\u0432\u043E',
'http://rss.dw.de/xml/rss-ru-pol'
),
(
u'\u042D\u043A\u043E\u043D\u043E\u043C\u0438\u043A\u0430',
'http://rss.dw.de/xml/rss-ru-eco'
),
(
u'\u0410\u0432\u0442\u043E\u043C\u043E\u0431\u0438\u043B\u044C',
'http://rss.dw.de/xml/rss-ru-auto'
),
(
u'\u041A\u0443\u043B\u044C\u0442\u0443\u0440\u0430 \u0438 \u0441\u0442\u0438\u043B\u044C \u0436\u0438\u0437\u043D\u0438',
'http://rss.dw.de/xml/rss-ru-cul'
),
(u'\u0420\u043E\u0441\u0441\u0438\u044F', 'http://rss.dw.de/xml/rss-ru-rus'),
(
u'\u0413\u0435\u0440\u043C\u0430\u043D\u0438\u044F',
'http://rss.dw.de/xml/rss-ru-ger'
),
(u'\u0415\u0432\u0440\u043E\u043F\u0430', 'http://rss.dw.de/xml/rss-ru-eu'),
(
u'\u0411\u0435\u043B\u0430\u0440\u0443\u0441\u044C',
'http://rss.dw.de/xml/rss-ru-bel'
),
(
u'\u0423\u0447\u0435\u0431\u0430 \u0438 \u043A\u0430\u0440\u044C\u0435\u0440\u0430',
'http://rss.dw.de/xml/rss-ru-campus-karriere'
),
(u'\u0423\u0447\u0435\u0431\u0430 ', 'http://rss.dw.de/xml/rss-ru-campus'),
(
u'\u041A\u0430\u0440\u044C\u0435\u0440\u0430 ',
'http://rss.dw.de/xml/rss-ru-karriere'
),
(
u'\u0422\u0443\u0440\u0438\u0441\u0442\u0443 \u043D\u0430 \u0437\u0430\u043C\u0435\u0442\u043A\u0443',
'http://rss.dw.de/xml/rss-ru-discover-ger'
)
]

BIN
recipes/icons/bbc_ru.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 88 B

BIN
recipes/icons/cedar.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.0 KiB

BIN
recipes/icons/favicon.ico Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.7 KiB

BIN
recipes/icons/old_games.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 452 B

BIN
recipes/icons/unian_net.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 448 B

BIN
recipes/icons/verstka.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.0 KiB

View File

@ -0,0 +1,38 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe
class NovayaGazetaEurope(BasicNewsRecipe):
title = u'\u041D\u043E\u0432\u0430\u044F \u0413\u0430\u0437\u0435\u0442\u0430. \u0415\u0432\u0440\u043E\u043F\u0430'
__author__ = 'bugmen00t'
description = u'\u0413\u043E\u0432\u043E\u0440\u0438\u043C \u043A\u0430\u043A \u0435\u0441\u0442\u044C. \u041F\u0438\u0448\u0435\u043C \u043E \u043F\u0440\u043E\u0438\u0441\u0445\u043E\u0434\u044F\u0449\u0435\u043C \u0432 \u0420\u043E\u0441\u0441\u0438\u0438, \u0423\u043A\u0440\u0430\u0438\u043D\u0435 \u0438 \u0415\u0432\u0440\u043E\u043F\u0435. \u041D\u043E\u0432\u043E\u0441\u0442\u0438, \u0430\u043D\u0430\u043B\u0438\u0442\u0438\u043A\u0430, \u043C\u043D\u0435\u043D\u0438\u044F \u044D\u043A\u0441\u043F\u0435\u0440\u0442\u043E\u0432, \u0441\u043F\u0435\u0446\u0438\u0430\u043B\u044C\u043D\u044B\u0435 \u0440\u0435\u043F\u043E\u0440\u0442\u0430\u0436\u0438 \u0438 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0441\u043A\u0438\u0435 \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u044F.' # noqa
publisher = '\u041A\u0438\u0440\u0438\u043B\u043B \u041C\u0430\u0440\u0442\u044B\u043D\u043E\u0432'
publication_type = 'newspaper'
category = 'news'
language = 'ru'
cover_url = 'https://bucketeer-e05bbc84-baa3-437e-9518-adb32be77984.s3.amazonaws.com/public/images/5dc71e2d-9763-4f05-8f4e-92049fa32af7_513x513.png'
oldest_article = 15
max_articles_per_feed = 50
auto_cleanup = False
remove_tags_before = dict(name='h1')
remove_tags_after = dict(
name='div', attrs={'class': 'ArticleBlocks_wrapperNoAside__11_bu'}
)
remove_tags = [dict(name='div', attrs={'class': 'EmbedNative_root__2lgsH'})]
feeds = [(
u'\u041D\u043E\u0432\u043E\u0441\u0442\u0438',
'https://novayagazeta.eu/feed/rss/ru'
)]
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup

42
recipes/old_games.recipe Normal file
View File

@ -0,0 +1,42 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe
class OGRU(BasicNewsRecipe):
title = u'Old-Games.RU'
__author__ = 'bugmen00t'
description = u'Old-Games.RU \u2014 \u043A\u0440\u0443\u043F\u043D\u0435\u0439\u0448\u0438\u0439 \u0440\u043E\u0441\u0441\u0438\u0439\u0441\u043A\u0438\u0439 \u0430\u0440\u0445\u0438\u0432 \u0441\u0442\u0430\u0440\u044B\u0445 \u043A\u043E\u043C\u043F\u044C\u044E\u0442\u0435\u0440\u043D\u044B\u0445 \u0438\u0433\u0440. \u041C\u044B \u043D\u0435 \u0441\u0442\u0430\u0432\u0438\u043C \u043F\u0435\u0440\u0435\u0434 \u0441\u043E\u0431\u043E\u0439 \u0446\u0435\u043B\u0438 \u0441\u043E\u0431\u0440\u0430\u0442\u044C \u0432\u0441\u0435 \u0438\u0433\u0440\u044B, \u0447\u0442\u043E \u0435\u0441\u0442\u044C \u0432 \u043C\u0438\u0440\u0435, \u043D\u043E \u043C\u044B \u0441\u0442\u0430\u0440\u0430\u0435\u043C\u0441\u044F, \u0447\u0442\u043E\u0431\u044B \u043D\u0430 \u0441\u0430\u0439\u0442\u0435 \u0431\u044B\u043B\u043E \u043F\u0440\u0435\u0434\u0441\u0442\u0430\u0432\u043B\u0435\u043D\u043E \u0431\u043E\u043B\u044C\u0448\u0438\u043D\u0441\u0442\u0432\u043E \u0448\u0435\u0434\u0435\u0432\u0440\u043E\u0432, \u0440\u0435\u0434\u043A\u043E\u0441\u0442\u0435\u0439 \u0438 \u043F\u0440\u043E\u0441\u0442\u043E \u0438\u043D\u0442\u0435\u0440\u0435\u0441\u043D\u044B\u0445 \u043F\u0440\u043E\u0435\u043A\u0442\u043E\u0432 \u043F\u0440\u043E\u0448\u043B\u044B\u0445 \u043B\u0435\u0442. \u0421 \u0442\u0435\u0447\u0435\u043D\u0438\u0435\u043C \u0432\u0440\u0435\u043C\u0435\u043D\u0438 \u0433\u0440\u0430\u0444\u0438\u0447\u0435\u0441\u043A\u043E\u0435 \u0438 \u0437\u0432\u0443\u043A\u043E\u0432\u043E\u0435 \u043E\u0444\u043E\u0440\u043C\u043B\u0435\u043D\u0438\u0435 \u0438\u0433\u0440 \u043D\u0430\u0448\u0435\u0433\u043E \u0430\u0440\u0445\u0438\u0432\u0430 \u0437\u0430\u043C\u0435\u0442\u043D\u043E \u0443\u0441\u0442\u0430\u0440\u0435\u043B\u043E, \u043D\u043E \u0438\u0433\u0440\u043E\u0432\u043E\u0439 \u043F\u0440\u043E\u0446\u0435\u0441\u0441 \u043E\u0441\u0442\u0430\u043B\u0441\u044F \u043F\u0440\u0435\u0436\u043D\u0438\u043C, \u0438 \u043F\u043E\u0440\u043E\u0439 \u043E\u043D \u0433\u043E\u0440\u0430\u0437\u0434\u043E \u0438\u043D\u0442\u0435\u0440\u0435\u0441\u043D\u0435\u0435, \u0447\u0435\u043C \u0432\u043E \u043C\u043D\u043E\u0433\u0438\u0445 \u0441\u043E\u0432\u0440\u0435\u043C\u0435\u043D\u043D\u044B\u0445 \u00AB\u0445\u0438\u0442\u0430\u0445\u00BB.' # noqa
publisher = 'Old-Games.RU'
publication_type = 'blog'
category = 'news, games, retro'
language = 'ru'
cover_url = 'https://www.old-games.ru/forum/styles/default/old-games/logo.og.png'
oldest_article = 50
max_articles_per_feed = 50
no_stylesheets = True
auto_cleanup = False
remove_tags_before = dict(name='article')
remove_tags_after = dict(name='article')
remove_attributes = ['style']
remove_tags = [
dict(name='p', attrs={'id': 'pageDescription'}),
dict(name='div', attrs={'class': 'pageNavLinkGroup'}),
dict(name='div', attrs={'class': 'tagBlock TagContainer'}),
dict(name='div', attrs={'class': 'NoAutoHeader PollContainer'}),
dict(name='div', attrs={'class': 'likesSummary secondaryContent'}),
dict(name='div', attrs={'class': 'editDate'}),
dict(name='div', attrs={'class': 'attachedFiles'}),
dict(name='div', attrs={'class': 'item muted postNumber hashPermalink OverlayTrigger'}),
dict(name='div', attrs={'class': 'messageUserInfo'})
]
feeds = [
(u'\u041D\u043E\u0432\u043E\u0441\u0442\u0438', 'https://feeds.feedburner.com/Old-games-ru-news'),
(u'\u0421\u0442\u0430\u0442\u044C\u0438', 'https://feeds.feedburner.com/Old-games-ru-articles')
]

29
recipes/unian_net.recipe Normal file
View File

@ -0,0 +1,29 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe
class Unian(BasicNewsRecipe):
title = '\u0423\u041D\u0418\u0410\u041D '
description = 'Украинское Независимое Информационное Агентство Новостей первое в Украине и самое большое независимое информационное агентство, основанное в 1993 году, лидер среди новостных медиа страны, самый цитируемый источник новостей о событиях в стране.' # noqa
__author__ = 'bugmen00t'
publication_type = 'newspaper'
oldest_article = 7
max_articles_per_feed = 100
language = 'ru'
cover_url = 'https://www.unian.net/images/unian-512x512.png'
auto_cleanup = False
no_stylesheets = True
remove_tags_before = dict(name='h1')
remove_tags_after = dict(name='div', attrs={'class': 'article-text'})
remove_tags = [
dict(name='span', attrs={'class': 'article__info-item comments'}),
dict(name='span', attrs={'class': 'article__info-item views'}),
dict(name='div', attrs={'class': 'read-also-slider'})
]
feeds = [
(u'\u0423\u041D\u0418\u0410\u041D', u'https://rss.unian.net/site/news_rus.rss')
]

28
recipes/verstka.recipe Normal file
View File

@ -0,0 +1,28 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe
class Verstka(BasicNewsRecipe):
title = u'\u0412\u0451\u0440\u0441\u0442\u043A\u0430'
description = u'\u041E\u0431\u0449\u0435\u0441\u0442\u0432\u0435\u043D\u043D\u043E-\u043F\u043E\u043B\u0438\u0442\u0438\u0447\u0435\u0441\u043A\u043E\u0435 \u0438\u0437\u0434\u0430\u043D\u0438\u0435, \u043A\u043E\u0442\u043E\u0440\u043E\u0435 \u0438\u0441\u0441\u043B\u0435\u0434\u0443\u0435\u0442 \u0438 \u043E\u043F\u0438\u0441\u044B\u0432\u0430\u0435\u0442, \u043A\u0430\u043A \u0444\u0443\u043D\u043A\u0446\u0438\u043E\u043D\u0438\u0440\u0443\u0435\u0442 \u043E\u0431\u0449\u0435\u0441\u0442\u0432\u043E \u0432 \u0420\u043E\u0441\u0441\u0438\u0438.' # noqa
__author__ = 'bugmen00t'
publication_type = 'newspaper'
oldest_article = 21
max_articles_per_feed = 20
language = 'ru'
cover_url = 'https://secureservercdn.net/160.153.137.128/yji.7dd.myftpupload.com/wp-content/uploads/2022/04/Screenshot-2022-04-26-at-22.19.30-300x68.png'
auto_cleanup = False
no_stylesheets = False
remove_tags_before = dict(name='h1')
remove_tags_after = dict(name='div', attrs={'class': 'wp-block-spacer'})
feeds = [
(u'\u041D\u043E\u0432\u043E\u0441\u0442\u0438', 'https://verstka.media/category/news/feed/'),
(u'\u0421\u0442\u0430\u0442\u044C\u0438', 'https://verstka.media/category/article/feed/'),
(u'\u041A\u043E\u043B\u043E\u043D\u043A\u0438', 'https://verstka.media/category/column/feed/'),
(u'\u0418\u043D\u0442\u0435\u0440\u0432\u044C\u044E', 'https://verstka.media/category/interview/feed/')
]