diff --git a/recipes/bbc_uk.recipe b/recipes/bbc_uk.recipe new file mode 100644 index 0000000000..a5aa0249ec --- /dev/null +++ b/recipes/bbc_uk.recipe @@ -0,0 +1,32 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 + +from calibre.web.feeds.news import BasicNewsRecipe + + +class BBC(BasicNewsRecipe): + title = u'BBC Ukrainian' + description = u'BBC News \u0423\u043A\u0440\u0430\u0457\u043D\u0430' + __author__ = 'bugmen00t' + publication_type = 'newspaper' + oldest_article = 14 + max_articles_per_feed = 50 + language = 'uk' + cover_url = 'https://news.files.bbci.co.uk/ws/img/logos/og/ukrainian.png' + auto_cleanup = True + no_stylesheets = True + + remove_tags_before = dict(name='h1') + + remove_tags_after = dict(name='main', attrs={'aria-hidden': 'true'}) + + remove_tags = [ + dict(name='section', attrs={'role': 'region'}), + dict(name='footer'), + dict(name='aside') + ] + + feeds = [( + u'\u041D\u043E\u0432\u0438\u043D\u0438 BBC', + 'https://feeds.bbci.co.uk/ukrainian/rss.xml' + )] diff --git a/recipes/icons/bbc_uk.png b/recipes/icons/bbc_uk.png new file mode 100644 index 0000000000..eb7cb3459a Binary files /dev/null and b/recipes/icons/bbc_uk.png differ diff --git a/recipes/icons/istories.recipe b/recipes/icons/istories.recipe new file mode 100644 index 0000000000..73d412cbfc Binary files /dev/null and b/recipes/icons/istories.recipe differ diff --git a/recipes/icons/kholod.png b/recipes/icons/kholod.png new file mode 100644 index 0000000000..2e03216b62 Binary files /dev/null and b/recipes/icons/kholod.png differ diff --git a/recipes/icons/meduza.png b/recipes/icons/meduza.png new file mode 100644 index 0000000000..19c9cbbe8e Binary files /dev/null and b/recipes/icons/meduza.png differ diff --git a/recipes/icons/meduza_ru.png b/recipes/icons/meduza_ru.png new file mode 100644 index 0000000000..19c9cbbe8e Binary files /dev/null and b/recipes/icons/meduza_ru.png differ diff --git a/recipes/icons/n_plus_one.png b/recipes/icons/n_plus_one.png new file mode 100644 index 0000000000..5cf07a8daf Binary files /dev/null and b/recipes/icons/n_plus_one.png differ diff --git a/recipes/icons/novaya_gazeta_europe_en.png b/recipes/icons/novaya_gazeta_europe_en.png new file mode 100644 index 0000000000..946319432b Binary files /dev/null and b/recipes/icons/novaya_gazeta_europe_en.png differ diff --git a/recipes/icons/sobesednik.png b/recipes/icons/sobesednik.png new file mode 100644 index 0000000000..fa8121a312 Binary files /dev/null and b/recipes/icons/sobesednik.png differ diff --git a/recipes/istories.recipe b/recipes/istories.recipe new file mode 100644 index 0000000000..4984ff0dd3 --- /dev/null +++ b/recipes/istories.recipe @@ -0,0 +1,38 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 + +from calibre.web.feeds.news import BasicNewsRecipe + + +class IStories(BasicNewsRecipe): + title = u'\u0412\u0430\u0436\u043D\u044B\u0435 \u0438\u0441\u0442\u043E\u0440\u0438\u0438' + description = u'\u0418\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0438\u0437\u0434\u0430\u043D\u0438\u0435, \u0441\u043F\u0435\u0446\u0438\u0430\u043B\u0438\u0437\u0438\u0440\u0443\u044E\u0449\u0435\u0435\u0441\u044F \u043D\u0430 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0441\u043A\u0438\u0445 \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u043D\u0438\u044F\u0445. \u041E\u0441\u043D\u043E\u0432\u0430\u043D\u043E \u0432 2020 \u0433\u043E\u0434\u0443 \u0440\u043E\u0441\u0441\u0438\u0439\u0441\u043A\u0438\u043C\u0438 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0430\u043C\u0438 \u0420\u043E\u043C\u0430\u043D\u043E\u043C \u0410\u043D\u0438\u043D\u044B\u043C \u0438 \u041E\u043B\u0435\u0441\u0435\u0439 \u0428\u043C\u0430\u0433\u0443\u043D.' # noqa + __author__ = 'bugmen00t' + publisher = 'Roman Anin & Olesya Shmagun' + publication_type = 'blog' + oldest_article = 21 + max_articles_per_feed = 50 + language = 'ru' + cover_url = 'https://static.istories.media/public/cover.png' + auto_cleanup = False + no_stylesheets = True + + remove_tags_before = dict(name='h1') + + remove_tags_after = dict(name='article') + + remove_tags = [ + dict(name='header'), + dict(name='footer'), + dict(name='form', attrs={'class': 'subscr'}), + dict(name='div', attrs={'class': 'row'}), + dict(name='div', attrs={'class': 'arrow-black'}), + dict(name='div', attrs={'class': 'article-foot'}), + dict(name='div', attrs={'class': 'article-toggle'}), + dict(name='div', attrs={'class': 'article-soc'}) + ] + + feeds = [( + u'\u0438\u0441\u0442\u043E\u0440\u0438\u0438', + 'https://istories.media/rss/all.xml' + )] diff --git a/recipes/kholod.recipe b/recipes/kholod.recipe new file mode 100644 index 0000000000..e2af0a55e2 --- /dev/null +++ b/recipes/kholod.recipe @@ -0,0 +1,47 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 + +from calibre.web.feeds.news import BasicNewsRecipe + + +class Kholod(BasicNewsRecipe): + title = u'\u0425\u043E\u043B\u043E\u0434' + description = u'\u0418\u0441\u0442\u043E\u0440\u0438\u0438 \u043E \u0420\u043E\u0441\u0441\u0438\u0438: \u043C\u044B \u0438\u0449\u0435\u043C \u0437\u0430\u0445\u0432\u0430\u0442\u044B\u0432\u0430\u044E\u0449\u0438\u0435 \u0438\u0441\u0442\u043E\u0440\u0438\u0438 \u043F\u043E \u0432\u0441\u0435\u0439 \u0420\u043E\u0441\u0441\u0438\u0438, \u0430 \u043F\u043E\u0442\u043E\u043C \u0440\u0430\u0441\u0441\u043A\u0430\u0437\u044B\u0432\u0430\u0435\u043C \u0432\u0430\u043C.' # noqa + __author__ = 'bugmen00t' + publisher = '\u0422\u0430\u0438\u0441\u0438\u044F \u0411\u0435\u043A\u0431\u0443\u043B\u0430\u0442\u043E\u0432\u0430' + publication_type = 'blog' + oldest_article = 14 + max_articles_per_feed = 200 + language = 'ru' + cover_url = 'https://image.simplecastcdn.com/images/93a97011-6988-4787-8242-e202b2840fde/08e85f64-9901-44e1-b20c-7da01c5ce0c0/holodpodcastlogo.jpg' + auto_cleanup = False + no_stylesheets = False + + remove_tags_before = dict(name='h1') + + remove_tags_after = dict( + name='div', attrs={'class': 'article__content the-content text-column'} + ) + + remove_tags = [dict(name='div', attrs={'class': 'inlinemore'})] + + feeds = [( + u'\u0410\u043A\u0442\u0443\u0430\u043B\u044C\u043D\u043E', + 'https://holod.media/sections/daily/feed/' + ), + ( + u'\u0418\u0441\u0442\u043E\u0440\u0438\u0438', + 'https://holod.media/sections/stories/feed/' + ), + ( + u'\u041C\u043D\u0435\u043D\u0438\u044F', + 'https://holod.media/sections/opinions/feed/' + ), + ( + u'\u0418\u043D\u0442\u0435\u0440\u0432\u044C\u044E', + 'https://holod.media/sections/interviews/feed/' + ), + ( + u'\u041E\u0431\u044A\u044F\u0441\u043D\u044F\u0435\u043C', + 'https://holod.media/sections/explainers/feed/' + )] diff --git a/recipes/meduza.recipe b/recipes/meduza.recipe new file mode 100644 index 0000000000..d3ff8e0036 --- /dev/null +++ b/recipes/meduza.recipe @@ -0,0 +1,31 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 + +from calibre.web.feeds.news import BasicNewsRecipe + + +class Meduza(BasicNewsRecipe): + title = u'Meduza' + description = u'Russian- and English-language independent news website, based in Latvia. It was founded in 2014 by a group of former employees of the then independent Lenta.ru news website. Every day we bring you the most important news and feature stories from hundreds of sources in Russia and across the former Soviet Union.' # noqa + __author__ = 'bugmen00t' + publisher = 'Medusa Project SIA' + publication_type = 'blog' + oldest_article = 21 + max_articles_per_feed = 100 + language = 'en_RU' + cover_url = 'https://meduza.io/impro/E_cJMv0IQxOC45z-YXeGuzuPB2kQ_A1XsZYrdByOCnk/fill/1200/0/ce/0/aHR0cHM6Ly9tZWR1/emEuaW8vaW1hZ2Uv/YXR0YWNobWVudHMv/aW1hZ2VzLzAwNi83/MTgvODcyL29yaWdp/bmFsLzVPSmRDdWc1/bC1JVG9lTXBqSHFH/ZXcucG5n.png' # noqa + auto_cleanup = False + no_stylesheets = False + + remove_tags_before = dict(name='h1') + + remove_tags_after = dict(name='div', attrs={'class': 'GeneralMaterial-article'}) + + remove_tags = [ + dict(name='div', attrs={'id': 'div-gpt-ad'}), + ] + + feeds = [ + (u'News', 'https://meduza.io/rss2/en/news'), + (u'Feature stories', 'https://meduza.io/rss2/en/stories'), + ] diff --git a/recipes/meduza_ru.recipe b/recipes/meduza_ru.recipe new file mode 100644 index 0000000000..dcecb40686 --- /dev/null +++ b/recipes/meduza_ru.recipe @@ -0,0 +1,35 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 + +from calibre.web.feeds.news import BasicNewsRecipe + + +class Meduza(BasicNewsRecipe): + title = u'Meduza' + description = u'\u041C\u0435\u0436\u0434\u0443\u043D\u0430\u0440\u043E\u0434\u043D\u043E\u0435 \u0440\u0443\u0441\u0441\u043A\u043E\u044F\u0437\u044B\u0447\u043D\u043E\u0435 \u0438\u0437\u0434\u0430\u043D\u0438\u0435. \u041C\u044B \u0432\u044B\u0431\u0438\u0440\u0430\u0435\u043C \u0434\u043B\u044F \u0432\u0430\u0441 \u0441\u0430\u043C\u044B\u0435 \u0432\u0430\u0436\u043D\u044B\u0435 \u043D\u043E\u0432\u043E\u0441\u0442\u0438 \u0438 \u0433\u043E\u0442\u043E\u0432\u0438\u043C \u043B\u0443\u0447\u0448\u0438\u0435 \u0442\u0435\u043A\u0441\u0442\u044B \u043E \u0442\u043E\u043C, \u0447\u0442\u043E \u043F\u0440\u043E\u0438\u0441\u0445\u043E\u0434\u0438\u0442 \u0432 \u0420\u043E\u0441\u0441\u0438\u0438 \u0438 \u043C\u0438\u0440\u0435.' # noqa + __author__ = 'bugmen00t' + publisher = 'Medusa Project SIA' + publication_type = 'blog' + oldest_article = 7 + max_articles_per_feed = 100 + language = 'ru' + cover_url = 'https://meduza.io/impro/E_cJMv0IQxOC45z-YXeGuzuPB2kQ_A1XsZYrdByOCnk/fill/1200/0/ce/0/aHR0cHM6Ly9tZWR1/emEuaW8vaW1hZ2Uv/YXR0YWNobWVudHMv/aW1hZ2VzLzAwNi83/MTgvODcyL29yaWdp/bmFsLzVPSmRDdWc1/bC1JVG9lTXBqSHFH/ZXcucG5n.png' # noqa + auto_cleanup = False + no_stylesheets = False + + remove_tags_before = dict(name='h1') + + remove_tags_after = dict(name='div', attrs={'class': 'GeneralMaterial-article'}) + + remove_tags = [dict(name='div', attrs={'id': 'div-gpt-ad'})] + + feeds = [ + ( + u'\u041D\u043E\u0432\u043E\u0441\u0442\u0438', + 'https://meduza.io/rss2/news' + ), + ( + u'\u0418\u0441\u0442\u043E\u0440\u0438\u0438', + 'https://meduza.io/rss2/articles' + ), + ] diff --git a/recipes/n_plus_one.recipe b/recipes/n_plus_one.recipe new file mode 100644 index 0000000000..89a5140ee6 --- /dev/null +++ b/recipes/n_plus_one.recipe @@ -0,0 +1,36 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 + +from calibre.web.feeds.news import BasicNewsRecipe + + +class AdvancedUserRecipe1467724863(BasicNewsRecipe): + title = 'N+1' + __author__ = 'bugmen00t' + description = '\u041D\u0430\u0443\u0447\u043D\u043E-\u043F\u043E\u043F\u0443\u043B\u044F\u0440\u043D\u043E\u0435 \u0440\u0430\u0437\u0432\u043B\u0435\u043A\u0430\u0442\u0435\u043B\u044C\u043D\u043E\u0435 \u0438\u0437\u0434\u0430\u043D\u0438\u0435 \u043E \u0442\u043E\u043C, \u0447\u0442\u043E \u043F\u0440\u043E\u0438\u0441\u0445\u043E\u0434\u0438\u0442 \u0432 \u043D\u0430\u0443\u043A\u0435, \u0442\u0435\u0445\u043D\u0438\u043A\u0435 \u0438 \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0438\u044F\u0445 \u043F\u0440\u044F\u043C\u043E \u0441\u0435\u0439\u0447\u0430\u0441. \u041D\u043E\u0432\u043E\u0441\u0442\u0438, \u0431\u043E\u043B\u044C\u0448\u0438\u0435 \u0441\u0442\u0430\u0442\u044C\u0438, \u0431\u043B\u043E\u0433\u0438 \u2014 \u044D\u0442\u043E \u0432\u0441\u0435 \u043F\u0440\u043E \u043D\u0430\u0441. \u041C\u044B \u0438\u0449\u0435\u043C \u0441\u0430\u043C\u043E\u0435 \u0438\u043D\u0442\u0435\u0440\u0435\u0441\u043D\u043E\u0435 \u0438 \u0434\u043E\u0441\u0442\u0430\u0432\u043B\u044F\u0435\u043C \u044D\u0442\u043E \u0447\u0438\u0442\u0430\u0442\u0435\u043B\u044F\u043C \u0432 \u043F\u043E\u043D\u044F\u0442\u043D\u043E\u0439, \u044F\u0441\u043D\u043E\u0439, \u043F\u0440\u0438\u0432\u043B\u0435\u043A\u0430\u0442\u0435\u043B\u044C\u043D\u043E\u0439 (\u0438 \u0441 \u0432\u0438\u0437\u0443\u0430\u043B\u044C\u043D\u043E\u0439 \u0442\u043E\u0447\u043A\u0438 \u0437\u0440\u0435\u043D\u0438\u044F) \u0444\u043E\u0440\u043C\u0435. \u041C\u044B \u2014 \u0447\u0443\u0442\u044C \u0431\u043E\u043B\u044C\u0448\u0435, \u0447\u0435\u043C \u043F\u0440\u043E\u0441\u0442\u043E \u043D\u0430\u0443\u043A\u0430!' # noqa + publisher = 'N+1' + category = 'news' + cover_url = u'https://nplus1.ru/i/logo.png' + language = 'ru' + no_stylesheets = False + remove_javascript = True + auto_cleanup = False + oldest_article = 14 + max_articles_per_feed = 100 + + # remove_tags_before = dict(name='article', attrs={'class':'content'}) + remove_tags_before = dict(name='h1') + + remove_tags_after = dict(name='div', attrs={'class': 'body'}) + + remove_tags = [ + dict(name='div', attrs={'class': 'share-incut'}), + dict(name='div', attrs={'class': 'share-mobile'}) + ] + + feeds = [ + ( + '\u041d\u043e\u0432\u043e\u0441\u0442\u0438 \u043d\u0430\u0443\u043a\u0438', + 'https://nplus1.ru/rss' + ), + ] diff --git a/recipes/novaya_gazeta_europe_en.recipe b/recipes/novaya_gazeta_europe_en.recipe new file mode 100644 index 0000000000..97ccaf4638 --- /dev/null +++ b/recipes/novaya_gazeta_europe_en.recipe @@ -0,0 +1,34 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 + +from calibre.web.feeds.news import BasicNewsRecipe + + +class NovayaGazetaEurope(BasicNewsRecipe): + title = u'Novaya Gazeta Europe' + __author__ = 'bugmen00t' + description = u'English edition of Novaya Gazeta Europe: news, analytics, expert opinions, special reports and investigative journalism.' + publisher = 'Kirill Martynov' + category = 'news' + language = 'en_RU' + cover_url = 'https://bucketeer-e05bbc84-baa3-437e-9518-adb32be77984.s3.amazonaws.com/public/images/5dc71e2d-9763-4f05-8f4e-92049fa32af7_513x513.png' + oldest_article = 15 + max_articles_per_feed = 50 + auto_cleanup = False + + remove_tags_before = dict(name='h1') + + remove_tags_after = dict( + name='div', attrs={'class': 'ArticleBlocks_wrapperNoAside__11_bu'} + ) + + remove_tags = [dict(name='div', attrs={'class': 'EmbedNative_root__2lgsH'})] + + feeds = [(u'News', 'https://novayagazeta.eu/feed/rss/en')] + + def preprocess_html(self, soup): + for alink in soup.findAll('a'): + if alink.string is not None: + tstr = alink.string + alink.replaceWith(tstr) + return soup diff --git a/recipes/sobesednik.recipe b/recipes/sobesednik.recipe new file mode 100644 index 0000000000..674b2f945b --- /dev/null +++ b/recipes/sobesednik.recipe @@ -0,0 +1,27 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 + +from calibre.web.feeds.news import BasicNewsRecipe + + +class Sobesednik(BasicNewsRecipe): + title = u'\u0421\u043E\u0431\u0435\u0441\u0435\u0434\u043D\u0438\u043A' + description = u'\u0421\u0432\u0435\u0436\u0438\u0435 \u044D\u043A\u0441\u043A\u043B\u044E\u0437\u0438\u0432\u043D\u044B\u0435 \u043D\u043E\u0432\u043E\u0441\u0442\u0438 \u043E \u043F\u043E\u043B\u0438\u0442\u0438\u043A\u0435 \u0438 \u0448\u043E\u0443 \u0431\u0438\u0437\u043D\u0435\u0441\u0435' # noqa + __author__ = 'bugmen00t' + publisher = '\u041E\u041E\u041E \u00AB\u0421\u043E\u0431\u0435\u0441\u0435\u0434\u043D\u0438\u043A-\u041C\u0435\u0434\u0438\u0430\u00BB' + publication_type = 'newspaper' + oldest_article = 7 + max_articles_per_feed = 100 + language = 'ru' + cover_url = 'https://sobesednik.ru/images/logo1.png' + auto_cleanup = False + no_stylesheets = False + + remove_tags_before = dict(name='h1') + + remove_tags_after = dict(name='div', attrs={'id': 'article-content'}) + + feeds = [( + u'\u0413\u0430\u0437\u0435\u0442\u0430 "\u0421\u043E\u0431\u0435\u0441\u0435\u0434\u043D\u0438\u043A"', + 'https://sobesednik.com/rss.xml' + )]