more russian and ukranian news sources by bugmen00t

This commit is contained in:
Kovid Goyal 2022-07-25 18:46:54 +05:30
parent bd7aa63c14
commit f1a560b61a
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
16 changed files with 280 additions and 0 deletions

32
recipes/bbc_uk.recipe Normal file
View File

@ -0,0 +1,32 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe
class BBC(BasicNewsRecipe):
title = u'BBC Ukrainian'
description = u'BBC News \u0423\u043A\u0440\u0430\u0457\u043D\u0430'
__author__ = 'bugmen00t'
publication_type = 'newspaper'
oldest_article = 14
max_articles_per_feed = 50
language = 'uk'
cover_url = 'https://news.files.bbci.co.uk/ws/img/logos/og/ukrainian.png'
auto_cleanup = True
no_stylesheets = True
remove_tags_before = dict(name='h1')
remove_tags_after = dict(name='main', attrs={'aria-hidden': 'true'})
remove_tags = [
dict(name='section', attrs={'role': 'region'}),
dict(name='footer'),
dict(name='aside')
]
feeds = [(
u'\u041D\u043E\u0432\u0438\u043D\u0438 BBC',
'https://feeds.bbci.co.uk/ukrainian/rss.xml'
)]

BIN
recipes/icons/bbc_uk.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 88 B

Binary file not shown.

BIN
recipes/icons/kholod.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.1 KiB

BIN
recipes/icons/meduza.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 513 B

BIN
recipes/icons/meduza_ru.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 513 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 663 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.1 KiB

38
recipes/istories.recipe Normal file
View File

@ -0,0 +1,38 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe
class IStories(BasicNewsRecipe):
title = u'\u0412\u0430\u0436\u043D\u044B\u0435 \u0438\u0441\u0442\u043E\u0440\u0438\u0438'
description = u'\u0418\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0438\u0437\u0434\u0430\u043D\u0438\u0435, \u0441\u043F\u0435\u0446\u0438\u0430\u043B\u0438\u0437\u0438\u0440\u0443\u044E\u0449\u0435\u0435\u0441\u044F \u043D\u0430 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0441\u043A\u0438\u0445 \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u044F\u0445. \u041E\u0441\u043D\u043E\u0432\u0430\u043D\u043E \u0432 2020 \u0433\u043E\u0434\u0443 \u0440\u043E\u0441\u0441\u0438\u0439\u0441\u043A\u0438\u043C\u0438 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0430\u043C\u0438 \u0420\u043E\u043C\u0430\u043D\u043E\u043C \u0410\u043D\u0438\u043D\u044B\u043C \u0438 \u041E\u043B\u0435\u0441\u0435\u0439 \u0428\u043C\u0430\u0433\u0443\u043D.' # noqa
__author__ = 'bugmen00t'
publisher = 'Roman Anin & Olesya Shmagun'
publication_type = 'blog'
oldest_article = 21
max_articles_per_feed = 50
language = 'ru'
cover_url = 'https://static.istories.media/public/cover.png'
auto_cleanup = False
no_stylesheets = True
remove_tags_before = dict(name='h1')
remove_tags_after = dict(name='article')
remove_tags = [
dict(name='header'),
dict(name='footer'),
dict(name='form', attrs={'class': 'subscr'}),
dict(name='div', attrs={'class': 'row'}),
dict(name='div', attrs={'class': 'arrow-black'}),
dict(name='div', attrs={'class': 'article-foot'}),
dict(name='div', attrs={'class': 'article-toggle'}),
dict(name='div', attrs={'class': 'article-soc'})
]
feeds = [(
u'\u0438\u0441\u0442\u043E\u0440\u0438\u0438',
'https://istories.media/rss/all.xml'
)]

47
recipes/kholod.recipe Normal file
View File

@ -0,0 +1,47 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe
class Kholod(BasicNewsRecipe):
title = u'\u0425\u043E\u043B\u043E\u0434'
description = u'\u0418\u0441\u0442\u043E\u0440\u0438\u0438 \u043E \u0420\u043E\u0441\u0441\u0438\u0438: \u043C\u044B \u0438\u0449\u0435\u043C \u0437\u0430\u0445\u0432\u0430\u0442\u044B\u0432\u0430\u044E\u0449\u0438\u0435 \u0438\u0441\u0442\u043E\u0440\u0438\u0438 \u043F\u043E \u0432\u0441\u0435\u0439 \u0420\u043E\u0441\u0441\u0438\u0438, \u0430 \u043F\u043E\u0442\u043E\u043C \u0440\u0430\u0441\u0441\u043A\u0430\u0437\u044B\u0432\u0430\u0435\u043C \u0432\u0430\u043C.' # noqa
__author__ = 'bugmen00t'
publisher = '\u0422\u0430\u0438\u0441\u0438\u044F \u0411\u0435\u043A\u0431\u0443\u043B\u0430\u0442\u043E\u0432\u0430'
publication_type = 'blog'
oldest_article = 14
max_articles_per_feed = 200
language = 'ru'
cover_url = 'https://image.simplecastcdn.com/images/93a97011-6988-4787-8242-e202b2840fde/08e85f64-9901-44e1-b20c-7da01c5ce0c0/holodpodcastlogo.jpg'
auto_cleanup = False
no_stylesheets = False
remove_tags_before = dict(name='h1')
remove_tags_after = dict(
name='div', attrs={'class': 'article__content the-content text-column'}
)
remove_tags = [dict(name='div', attrs={'class': 'inlinemore'})]
feeds = [(
u'\u0410\u043A\u0442\u0443\u0430\u043B\u044C\u043D\u043E',
'https://holod.media/sections/daily/feed/'
),
(
u'\u0418\u0441\u0442\u043E\u0440\u0438\u0438',
'https://holod.media/sections/stories/feed/'
),
(
u'\u041C\u043D\u0435\u043D\u0438\u044F',
'https://holod.media/sections/opinions/feed/'
),
(
u'\u0418\u043D\u0442\u0435\u0440\u0432\u044C\u044E',
'https://holod.media/sections/interviews/feed/'
),
(
u'\u041E\u0431\u044A\u044F\u0441\u043D\u044F\u0435\u043C',
'https://holod.media/sections/explainers/feed/'
)]

31
recipes/meduza.recipe Normal file
View File

@ -0,0 +1,31 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe
class Meduza(BasicNewsRecipe):
title = u'Meduza'
description = u'Russian- and English-language independent news website, based in Latvia. It was founded in 2014 by a group of former employees of the then independent Lenta.ru news website. Every day we bring you the most important news and feature stories from hundreds of sources in Russia and across the former Soviet Union.' # noqa
__author__ = 'bugmen00t'
publisher = 'Medusa Project SIA'
publication_type = 'blog'
oldest_article = 21
max_articles_per_feed = 100
language = 'en_RU'
cover_url = 'https://meduza.io/impro/E_cJMv0IQxOC45z-YXeGuzuPB2kQ_A1XsZYrdByOCnk/fill/1200/0/ce/0/aHR0cHM6Ly9tZWR1/emEuaW8vaW1hZ2Uv/YXR0YWNobWVudHMv/aW1hZ2VzLzAwNi83/MTgvODcyL29yaWdp/bmFsLzVPSmRDdWc1/bC1JVG9lTXBqSHFH/ZXcucG5n.png' # noqa
auto_cleanup = False
no_stylesheets = False
remove_tags_before = dict(name='h1')
remove_tags_after = dict(name='div', attrs={'class': 'GeneralMaterial-article'})
remove_tags = [
dict(name='div', attrs={'id': 'div-gpt-ad'}),
]
feeds = [
(u'News', 'https://meduza.io/rss2/en/news'),
(u'Feature stories', 'https://meduza.io/rss2/en/stories'),
]

35
recipes/meduza_ru.recipe Normal file
View File

@ -0,0 +1,35 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe
class Meduza(BasicNewsRecipe):
title = u'Meduza'
description = u'\u041C\u0435\u0436\u0434\u0443\u043D\u0430\u0440\u043E\u0434\u043D\u043E\u0435 \u0440\u0443\u0441\u0441\u043A\u043E\u044F\u0437\u044B\u0447\u043D\u043E\u0435 \u0438\u0437\u0434\u0430\u043D\u0438\u0435. \u041C\u044B \u0432\u044B\u0431\u0438\u0440\u0430\u0435\u043C \u0434\u043B\u044F \u0432\u0430\u0441 \u0441\u0430\u043C\u044B\u0435 \u0432\u0430\u0436\u043D\u044B\u0435 \u043D\u043E\u0432\u043E\u0441\u0442\u0438 \u0438 \u0433\u043E\u0442\u043E\u0432\u0438\u043C \u043B\u0443\u0447\u0448\u0438\u0435 \u0442\u0435\u043A\u0441\u0442\u044B \u043E \u0442\u043E\u043C, \u0447\u0442\u043E \u043F\u0440\u043E\u0438\u0441\u0445\u043E\u0434\u0438\u0442 \u0432 \u0420\u043E\u0441\u0441\u0438\u0438 \u0438 \u043C\u0438\u0440\u0435.' # noqa
__author__ = 'bugmen00t'
publisher = 'Medusa Project SIA'
publication_type = 'blog'
oldest_article = 7
max_articles_per_feed = 100
language = 'ru'
cover_url = 'https://meduza.io/impro/E_cJMv0IQxOC45z-YXeGuzuPB2kQ_A1XsZYrdByOCnk/fill/1200/0/ce/0/aHR0cHM6Ly9tZWR1/emEuaW8vaW1hZ2Uv/YXR0YWNobWVudHMv/aW1hZ2VzLzAwNi83/MTgvODcyL29yaWdp/bmFsLzVPSmRDdWc1/bC1JVG9lTXBqSHFH/ZXcucG5n.png' # noqa
auto_cleanup = False
no_stylesheets = False
remove_tags_before = dict(name='h1')
remove_tags_after = dict(name='div', attrs={'class': 'GeneralMaterial-article'})
remove_tags = [dict(name='div', attrs={'id': 'div-gpt-ad'})]
feeds = [
(
u'\u041D\u043E\u0432\u043E\u0441\u0442\u0438',
'https://meduza.io/rss2/news'
),
(
u'\u0418\u0441\u0442\u043E\u0440\u0438\u0438',
'https://meduza.io/rss2/articles'
),
]

36
recipes/n_plus_one.recipe Normal file
View File

@ -0,0 +1,36 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1467724863(BasicNewsRecipe):
title = 'N+1'
__author__ = 'bugmen00t'
description = '\u041D\u0430\u0443\u0447\u043D\u043E-\u043F\u043E\u043F\u0443\u043B\u044F\u0440\u043D\u043E\u0435 \u0440\u0430\u0437\u0432\u043B\u0435\u043A\u0430\u0442\u0435\u043B\u044C\u043D\u043E\u0435 \u0438\u0437\u0434\u0430\u043D\u0438\u0435 \u043E \u0442\u043E\u043C, \u0447\u0442\u043E \u043F\u0440\u043E\u0438\u0441\u0445\u043E\u0434\u0438\u0442 \u0432 \u043D\u0430\u0443\u043A\u0435, \u0442\u0435\u0445\u043D\u0438\u043A\u0435 \u0438 \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0438\u044F\u0445 \u043F\u0440\u044F\u043C\u043E \u0441\u0435\u0439\u0447\u0430\u0441. \u041D\u043E\u0432\u043E\u0441\u0442\u0438, \u0431\u043E\u043B\u044C\u0448\u0438\u0435 \u0441\u0442\u0430\u0442\u044C\u0438, \u0431\u043B\u043E\u0433\u0438 \u2014 \u044D\u0442\u043E \u0432\u0441\u0435 \u043F\u0440\u043E \u043D\u0430\u0441. \u041C\u044B \u0438\u0449\u0435\u043C \u0441\u0430\u043C\u043E\u0435 \u0438\u043D\u0442\u0435\u0440\u0435\u0441\u043D\u043E\u0435 \u0438 \u0434\u043E\u0441\u0442\u0430\u0432\u043B\u044F\u0435\u043C \u044D\u0442\u043E \u0447\u0438\u0442\u0430\u0442\u0435\u043B\u044F\u043C \u0432 \u043F\u043E\u043D\u044F\u0442\u043D\u043E\u0439, \u044F\u0441\u043D\u043E\u0439, \u043F\u0440\u0438\u0432\u043B\u0435\u043A\u0430\u0442\u0435\u043B\u044C\u043D\u043E\u0439 (\u0438 \u0441 \u0432\u0438\u0437\u0443\u0430\u043B\u044C\u043D\u043E\u0439 \u0442\u043E\u0447\u043A\u0438 \u0437\u0440\u0435\u043D\u0438\u044F) \u0444\u043E\u0440\u043C\u0435. \u041C\u044B \u2014 \u0447\u0443\u0442\u044C \u0431\u043E\u043B\u044C\u0448\u0435, \u0447\u0435\u043C \u043F\u0440\u043E\u0441\u0442\u043E \u043D\u0430\u0443\u043A\u0430!' # noqa
publisher = 'N+1'
category = 'news'
cover_url = u'https://nplus1.ru/i/logo.png'
language = 'ru'
no_stylesheets = False
remove_javascript = True
auto_cleanup = False
oldest_article = 14
max_articles_per_feed = 100
# remove_tags_before = dict(name='article', attrs={'class':'content'})
remove_tags_before = dict(name='h1')
remove_tags_after = dict(name='div', attrs={'class': 'body'})
remove_tags = [
dict(name='div', attrs={'class': 'share-incut'}),
dict(name='div', attrs={'class': 'share-mobile'})
]
feeds = [
(
'\u041d\u043e\u0432\u043e\u0441\u0442\u0438 \u043d\u0430\u0443\u043a\u0438',
'https://nplus1.ru/rss'
),
]

View File

@ -0,0 +1,34 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe
class NovayaGazetaEurope(BasicNewsRecipe):
title = u'Novaya Gazeta Europe'
__author__ = 'bugmen00t'
description = u'English edition of Novaya Gazeta Europe: news, analytics, expert opinions, special reports and investigative journalism.'
publisher = 'Kirill Martynov'
category = 'news'
language = 'en_RU'
cover_url = 'https://bucketeer-e05bbc84-baa3-437e-9518-adb32be77984.s3.amazonaws.com/public/images/5dc71e2d-9763-4f05-8f4e-92049fa32af7_513x513.png'
oldest_article = 15
max_articles_per_feed = 50
auto_cleanup = False
remove_tags_before = dict(name='h1')
remove_tags_after = dict(
name='div', attrs={'class': 'ArticleBlocks_wrapperNoAside__11_bu'}
)
remove_tags = [dict(name='div', attrs={'class': 'EmbedNative_root__2lgsH'})]
feeds = [(u'News', 'https://novayagazeta.eu/feed/rss/en')]
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup

27
recipes/sobesednik.recipe Normal file
View File

@ -0,0 +1,27 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe
class Sobesednik(BasicNewsRecipe):
title = u'\u0421\u043E\u0431\u0435\u0441\u0435\u0434\u043D\u0438\u043A'
description = u'\u0421\u0432\u0435\u0436\u0438\u0435 \u044D\u043A\u0441\u043A\u043B\u044E\u0437\u0438\u0432\u043D\u044B\u0435 \u043D\u043E\u0432\u043E\u0441\u0442\u0438 \u043E \u043F\u043E\u043B\u0438\u0442\u0438\u043A\u0435 \u0438 \u0448\u043E\u0443 \u0431\u0438\u0437\u043D\u0435\u0441\u0435' # noqa
__author__ = 'bugmen00t'
publisher = '\u041E\u041E\u041E \u00AB\u0421\u043E\u0431\u0435\u0441\u0435\u0434\u043D\u0438\u043A-\u041C\u0435\u0434\u0438\u0430\u00BB'
publication_type = 'newspaper'
oldest_article = 7
max_articles_per_feed = 100
language = 'ru'
cover_url = 'https://sobesednik.ru/images/logo1.png'
auto_cleanup = False
no_stylesheets = False
remove_tags_before = dict(name='h1')
remove_tags_after = dict(name='div', attrs={'id': 'article-content'})
feeds = [(
u'\u0413\u0430\u0437\u0435\u0442\u0430 "\u0421\u043E\u0431\u0435\u0441\u0435\u0434\u043D\u0438\u043A"',
'https://sobesednik.com/rss.xml'
)]