more russian and ukranian news sources by bugmen00t

2025-07-09 03:04:10 -04:00 · 2022-07-25 18:46:54 +05:30 · 2022-07-25 18:46:54 +05:30 · f1a560b61a
commit f1a560b61a
parent bd7aa63c14
16 changed files with 280 additions and 0 deletions
--- a/recipes/bbc_uk.recipe
+++ b/recipes/bbc_uk.recipe
@ -0,0 +1,32 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class BBC(BasicNewsRecipe):
+    title = u'BBC Ukrainian'
+    description = u'BBC News \u0423\u043A\u0440\u0430\u0457\u043D\u0430'
+    __author__ = 'bugmen00t'
+    publication_type = 'newspaper'
+    oldest_article = 14
+    max_articles_per_feed = 50
+    language = 'uk'
+    cover_url = 'https://news.files.bbci.co.uk/ws/img/logos/og/ukrainian.png'
+    auto_cleanup = True
+    no_stylesheets = True
+
+    remove_tags_before = dict(name='h1')
+
+    remove_tags_after = dict(name='main', attrs={'aria-hidden': 'true'})
+
+    remove_tags = [
+        dict(name='section', attrs={'role': 'region'}),
+        dict(name='footer'),
+        dict(name='aside')
+    ]
+
+    feeds = [(
+        u'\u041D\u043E\u0432\u0438\u043D\u0438 BBC',
+        'https://feeds.bbci.co.uk/ukrainian/rss.xml'
+    )]
--- a/recipes/icons/bbc_uk.png
+++ b/recipes/icons/bbc_uk.png
--- a/recipes/icons/istories.recipe
+++ b/recipes/icons/istories.recipe
--- a/recipes/icons/kholod.png
+++ b/recipes/icons/kholod.png
--- a/recipes/icons/meduza.png
+++ b/recipes/icons/meduza.png
--- a/recipes/icons/meduza_ru.png
+++ b/recipes/icons/meduza_ru.png
--- a/recipes/icons/n_plus_one.png
+++ b/recipes/icons/n_plus_one.png
--- a/recipes/icons/novaya_gazeta_europe_en.png
+++ b/recipes/icons/novaya_gazeta_europe_en.png
--- a/recipes/icons/sobesednik.png
+++ b/recipes/icons/sobesednik.png
--- a/recipes/istories.recipe
+++ b/recipes/istories.recipe
@ -0,0 +1,38 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class IStories(BasicNewsRecipe):
+    title = u'\u0412\u0430\u0436\u043D\u044B\u0435 \u0438\u0441\u0442\u043E\u0440\u0438\u0438'
+    description = u'\u0418\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0438\u0437\u0434\u0430\u043D\u0438\u0435, \u0441\u043F\u0435\u0446\u0438\u0430\u043B\u0438\u0437\u0438\u0440\u0443\u044E\u0449\u0435\u0435\u0441\u044F \u043D\u0430 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0441\u043A\u0438\u0445 \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u044F\u0445. \u041E\u0441\u043D\u043E\u0432\u0430\u043D\u043E \u0432 2020 \u0433\u043E\u0434\u0443 \u0440\u043E\u0441\u0441\u0438\u0439\u0441\u043A\u0438\u043C\u0438 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0430\u043C\u0438 \u0420\u043E\u043C\u0430\u043D\u043E\u043C \u0410\u043D\u0438\u043D\u044B\u043C \u0438 \u041E\u043B\u0435\u0441\u0435\u0439 \u0428\u043C\u0430\u0433\u0443\u043D.'  # noqa
+    __author__ = 'bugmen00t'
+    publisher = 'Roman Anin & Olesya Shmagun'
+    publication_type = 'blog'
+    oldest_article = 21
+    max_articles_per_feed = 50
+    language = 'ru'
+    cover_url = 'https://static.istories.media/public/cover.png'
+    auto_cleanup = False
+    no_stylesheets = True
+
+    remove_tags_before = dict(name='h1')
+
+    remove_tags_after = dict(name='article')
+
+    remove_tags = [
+        dict(name='header'),
+        dict(name='footer'),
+        dict(name='form', attrs={'class': 'subscr'}),
+        dict(name='div', attrs={'class': 'row'}),
+        dict(name='div', attrs={'class': 'arrow-black'}),
+        dict(name='div', attrs={'class': 'article-foot'}),
+        dict(name='div', attrs={'class': 'article-toggle'}),
+        dict(name='div', attrs={'class': 'article-soc'})
+    ]
+
+    feeds = [(
+        u'\u0438\u0441\u0442\u043E\u0440\u0438\u0438',
+        'https://istories.media/rss/all.xml'
+    )]
--- a/recipes/kholod.recipe
+++ b/recipes/kholod.recipe
@ -0,0 +1,47 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class Kholod(BasicNewsRecipe):
+    title = u'\u0425\u043E\u043B\u043E\u0434'
+    description = u'\u0418\u0441\u0442\u043E\u0440\u0438\u0438 \u043E \u0420\u043E\u0441\u0441\u0438\u0438: \u043C\u044B \u0438\u0449\u0435\u043C \u0437\u0430\u0445\u0432\u0430\u0442\u044B\u0432\u0430\u044E\u0449\u0438\u0435 \u0438\u0441\u0442\u043E\u0440\u0438\u0438 \u043F\u043E \u0432\u0441\u0435\u0439 \u0420\u043E\u0441\u0441\u0438\u0438, \u0430 \u043F\u043E\u0442\u043E\u043C \u0440\u0430\u0441\u0441\u043A\u0430\u0437\u044B\u0432\u0430\u0435\u043C \u0432\u0430\u043C.'  # noqa
+    __author__ = 'bugmen00t'
+    publisher = '\u0422\u0430\u0438\u0441\u0438\u044F \u0411\u0435\u043A\u0431\u0443\u043B\u0430\u0442\u043E\u0432\u0430'
+    publication_type = 'blog'
+    oldest_article = 14
+    max_articles_per_feed = 200
+    language = 'ru'
+    cover_url = 'https://image.simplecastcdn.com/images/93a97011-6988-4787-8242-e202b2840fde/08e85f64-9901-44e1-b20c-7da01c5ce0c0/holodpodcastlogo.jpg'
+    auto_cleanup = False
+    no_stylesheets = False
+
+    remove_tags_before = dict(name='h1')
+
+    remove_tags_after = dict(
+        name='div', attrs={'class': 'article__content the-content text-column'}
+    )
+
+    remove_tags = [dict(name='div', attrs={'class': 'inlinemore'})]
+
+    feeds = [(
+        u'\u0410\u043A\u0442\u0443\u0430\u043B\u044C\u043D\u043E',
+        'https://holod.media/sections/daily/feed/'
+    ),
+             (
+                 u'\u0418\u0441\u0442\u043E\u0440\u0438\u0438',
+                 'https://holod.media/sections/stories/feed/'
+             ),
+             (
+                 u'\u041C\u043D\u0435\u043D\u0438\u044F',
+                 'https://holod.media/sections/opinions/feed/'
+             ),
+             (
+                 u'\u0418\u043D\u0442\u0435\u0440\u0432\u044C\u044E',
+                 'https://holod.media/sections/interviews/feed/'
+             ),
+             (
+                 u'\u041E\u0431\u044A\u044F\u0441\u043D\u044F\u0435\u043C',
+                 'https://holod.media/sections/explainers/feed/'
+             )]
--- a/recipes/meduza.recipe
+++ b/recipes/meduza.recipe
@ -0,0 +1,31 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class Meduza(BasicNewsRecipe):
+    title = u'Meduza'
+    description = u'Russian- and English-language independent news website, based in Latvia. It was founded in 2014 by a group of former employees of the then independent Lenta.ru news website. Every day we bring you the most important news and feature stories from hundreds of sources in Russia and across the former Soviet Union.'  # noqa
+    __author__ = 'bugmen00t'
+    publisher = 'Medusa Project SIA'
+    publication_type = 'blog'
+    oldest_article = 21
+    max_articles_per_feed = 100
+    language = 'en_RU'
+    cover_url = 'https://meduza.io/impro/E_cJMv0IQxOC45z-YXeGuzuPB2kQ_A1XsZYrdByOCnk/fill/1200/0/ce/0/aHR0cHM6Ly9tZWR1/emEuaW8vaW1hZ2Uv/YXR0YWNobWVudHMv/aW1hZ2VzLzAwNi83/MTgvODcyL29yaWdp/bmFsLzVPSmRDdWc1/bC1JVG9lTXBqSHFH/ZXcucG5n.png'  # noqa
+    auto_cleanup = False
+    no_stylesheets = False
+
+    remove_tags_before = dict(name='h1')
+
+    remove_tags_after = dict(name='div', attrs={'class': 'GeneralMaterial-article'})
+
+    remove_tags = [
+        dict(name='div', attrs={'id': 'div-gpt-ad'}),
+    ]
+
+    feeds = [
+        (u'News', 'https://meduza.io/rss2/en/news'),
+        (u'Feature stories', 'https://meduza.io/rss2/en/stories'),
+    ]
--- a/recipes/meduza_ru.recipe
+++ b/recipes/meduza_ru.recipe
@ -0,0 +1,35 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class Meduza(BasicNewsRecipe):
+    title = u'Meduza'
+    description = u'\u041C\u0435\u0436\u0434\u0443\u043D\u0430\u0440\u043E\u0434\u043D\u043E\u0435 \u0440\u0443\u0441\u0441\u043A\u043E\u044F\u0437\u044B\u0447\u043D\u043E\u0435 \u0438\u0437\u0434\u0430\u043D\u0438\u0435. \u041C\u044B \u0432\u044B\u0431\u0438\u0440\u0430\u0435\u043C \u0434\u043B\u044F \u0432\u0430\u0441 \u0441\u0430\u043C\u044B\u0435 \u0432\u0430\u0436\u043D\u044B\u0435 \u043D\u043E\u0432\u043E\u0441\u0442\u0438 \u0438 \u0433\u043E\u0442\u043E\u0432\u0438\u043C \u043B\u0443\u0447\u0448\u0438\u0435 \u0442\u0435\u043A\u0441\u0442\u044B \u043E \u0442\u043E\u043C, \u0447\u0442\u043E \u043F\u0440\u043E\u0438\u0441\u0445\u043E\u0434\u0438\u0442 \u0432 \u0420\u043E\u0441\u0441\u0438\u0438 \u0438 \u043C\u0438\u0440\u0435.'  # noqa
+    __author__ = 'bugmen00t'
+    publisher = 'Medusa Project SIA'
+    publication_type = 'blog'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    language = 'ru'
+    cover_url = 'https://meduza.io/impro/E_cJMv0IQxOC45z-YXeGuzuPB2kQ_A1XsZYrdByOCnk/fill/1200/0/ce/0/aHR0cHM6Ly9tZWR1/emEuaW8vaW1hZ2Uv/YXR0YWNobWVudHMv/aW1hZ2VzLzAwNi83/MTgvODcyL29yaWdp/bmFsLzVPSmRDdWc1/bC1JVG9lTXBqSHFH/ZXcucG5n.png'  # noqa
+    auto_cleanup = False
+    no_stylesheets = False
+
+    remove_tags_before = dict(name='h1')
+
+    remove_tags_after = dict(name='div', attrs={'class': 'GeneralMaterial-article'})
+
+    remove_tags = [dict(name='div', attrs={'id': 'div-gpt-ad'})]
+
+    feeds = [
+        (
+            u'\u041D\u043E\u0432\u043E\u0441\u0442\u0438',
+            'https://meduza.io/rss2/news'
+        ),
+        (
+            u'\u0418\u0441\u0442\u043E\u0440\u0438\u0438',
+            'https://meduza.io/rss2/articles'
+        ),
+    ]
--- a/recipes/n_plus_one.recipe
+++ b/recipes/n_plus_one.recipe
@ -0,0 +1,36 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class AdvancedUserRecipe1467724863(BasicNewsRecipe):
+    title = 'N+1'
+    __author__ = 'bugmen00t'
+    description = '\u041D\u0430\u0443\u0447\u043D\u043E-\u043F\u043E\u043F\u0443\u043B\u044F\u0440\u043D\u043E\u0435 \u0440\u0430\u0437\u0432\u043B\u0435\u043A\u0430\u0442\u0435\u043B\u044C\u043D\u043E\u0435 \u0438\u0437\u0434\u0430\u043D\u0438\u0435 \u043E \u0442\u043E\u043C, \u0447\u0442\u043E \u043F\u0440\u043E\u0438\u0441\u0445\u043E\u0434\u0438\u0442 \u0432 \u043D\u0430\u0443\u043A\u0435, \u0442\u0435\u0445\u043D\u0438\u043A\u0435 \u0438 \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0438\u044F\u0445 \u043F\u0440\u044F\u043C\u043E \u0441\u0435\u0439\u0447\u0430\u0441. \u041D\u043E\u0432\u043E\u0441\u0442\u0438, \u0431\u043E\u043B\u044C\u0448\u0438\u0435 \u0441\u0442\u0430\u0442\u044C\u0438, \u0431\u043B\u043E\u0433\u0438 \u2014 \u044D\u0442\u043E \u0432\u0441\u0435 \u043F\u0440\u043E \u043D\u0430\u0441. \u041C\u044B \u0438\u0449\u0435\u043C \u0441\u0430\u043C\u043E\u0435 \u0438\u043D\u0442\u0435\u0440\u0435\u0441\u043D\u043E\u0435 \u0438 \u0434\u043E\u0441\u0442\u0430\u0432\u043B\u044F\u0435\u043C \u044D\u0442\u043E \u0447\u0438\u0442\u0430\u0442\u0435\u043B\u044F\u043C \u0432 \u043F\u043E\u043D\u044F\u0442\u043D\u043E\u0439, \u044F\u0441\u043D\u043E\u0439, \u043F\u0440\u0438\u0432\u043B\u0435\u043A\u0430\u0442\u0435\u043B\u044C\u043D\u043E\u0439 (\u0438 \u0441 \u0432\u0438\u0437\u0443\u0430\u043B\u044C\u043D\u043E\u0439 \u0442\u043E\u0447\u043A\u0438 \u0437\u0440\u0435\u043D\u0438\u044F) \u0444\u043E\u0440\u043C\u0435. \u041C\u044B \u2014 \u0447\u0443\u0442\u044C \u0431\u043E\u043B\u044C\u0448\u0435, \u0447\u0435\u043C \u043F\u0440\u043E\u0441\u0442\u043E \u043D\u0430\u0443\u043A\u0430!'  # noqa
+    publisher = 'N+1'
+    category = 'news'
+    cover_url = u'https://nplus1.ru/i/logo.png'
+    language = 'ru'
+    no_stylesheets = False
+    remove_javascript = True
+    auto_cleanup = False
+    oldest_article = 14
+    max_articles_per_feed = 100
+
+    #    remove_tags_before = dict(name='article', attrs={'class':'content'})
+    remove_tags_before = dict(name='h1')
+
+    remove_tags_after = dict(name='div', attrs={'class': 'body'})
+
+    remove_tags = [
+        dict(name='div', attrs={'class': 'share-incut'}),
+        dict(name='div', attrs={'class': 'share-mobile'})
+    ]
+
+    feeds = [
+        (
+            '\u041d\u043e\u0432\u043e\u0441\u0442\u0438 \u043d\u0430\u0443\u043a\u0438',
+            'https://nplus1.ru/rss'
+        ),
+    ]
--- a/recipes/novaya_gazeta_europe_en.recipe
+++ b/recipes/novaya_gazeta_europe_en.recipe
@ -0,0 +1,34 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class NovayaGazetaEurope(BasicNewsRecipe):
+    title = u'Novaya Gazeta Europe'
+    __author__ = 'bugmen00t'
+    description = u'English edition of Novaya Gazeta Europe: news, analytics, expert opinions, special reports and investigative journalism.'
+    publisher = 'Kirill Martynov'
+    category = 'news'
+    language = 'en_RU'
+    cover_url = 'https://bucketeer-e05bbc84-baa3-437e-9518-adb32be77984.s3.amazonaws.com/public/images/5dc71e2d-9763-4f05-8f4e-92049fa32af7_513x513.png'
+    oldest_article = 15
+    max_articles_per_feed = 50
+    auto_cleanup = False
+
+    remove_tags_before = dict(name='h1')
+
+    remove_tags_after = dict(
+        name='div', attrs={'class': 'ArticleBlocks_wrapperNoAside__11_bu'}
+    )
+
+    remove_tags = [dict(name='div', attrs={'class': 'EmbedNative_root__2lgsH'})]
+
+    feeds = [(u'News', 'https://novayagazeta.eu/feed/rss/en')]
+
+    def preprocess_html(self, soup):
+        for alink in soup.findAll('a'):
+            if alink.string is not None:
+                tstr = alink.string
+                alink.replaceWith(tstr)
+        return soup
--- a/recipes/sobesednik.recipe
+++ b/recipes/sobesednik.recipe
@ -0,0 +1,27 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class Sobesednik(BasicNewsRecipe):
+    title = u'\u0421\u043E\u0431\u0435\u0441\u0435\u0434\u043D\u0438\u043A'
+    description = u'\u0421\u0432\u0435\u0436\u0438\u0435 \u044D\u043A\u0441\u043A\u043B\u044E\u0437\u0438\u0432\u043D\u044B\u0435 \u043D\u043E\u0432\u043E\u0441\u0442\u0438 \u043E \u043F\u043E\u043B\u0438\u0442\u0438\u043A\u0435 \u0438 \u0448\u043E\u0443 \u0431\u0438\u0437\u043D\u0435\u0441\u0435'  # noqa
+    __author__ = 'bugmen00t'
+    publisher = '\u041E\u041E\u041E \u00AB\u0421\u043E\u0431\u0435\u0441\u0435\u0434\u043D\u0438\u043A-\u041C\u0435\u0434\u0438\u0430\u00BB'
+    publication_type = 'newspaper'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    language = 'ru'
+    cover_url = 'https://sobesednik.ru/images/logo1.png'
+    auto_cleanup = False
+    no_stylesheets = False
+
+    remove_tags_before = dict(name='h1')
+
+    remove_tags_after = dict(name='div', attrs={'id': 'article-content'})
+
+    feeds = [(
+        u'\u0413\u0430\u0437\u0435\u0442\u0430 "\u0421\u043E\u0431\u0435\u0441\u0435\u0434\u043D\u0438\u043A"',
+        'https://sobesednik.com/rss.xml'
+    )]