diff --git a/recipes/coda.recipe b/recipes/coda.recipe new file mode 100644 index 0000000000..e1538704b6 --- /dev/null +++ b/recipes/coda.recipe @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 + +from calibre.web.feeds.news import BasicNewsRecipe + + +class Coda(BasicNewsRecipe): + title = 'Coda' + __author__ = 'bugmen00t' + description = 'Coda Story reports on major currents shaping our world from disinformation to authoritarian technologies to the war on science. Coda stays on these stories to reveal why they matter, how they are connected and where they are heading next.' # noqa + publisher = 'Natalia Antelava & Ilan Greenberg' + category = 'blog' + cover_url = u'https://www.codastory.com/wp-content/uploads/2021/05/AT_thumbnail_512x512.png' + language = 'en_RU' + no_stylesheets = False + remove_javascript = False + auto_cleanup = False + oldest_article = 180 + max_articles_per_feed = 50 + + remove_tags_before = dict(name='div', attrs={'class': 'article'}) + + remove_tags_after = dict(name='div', attrs={'class': 'article'}) + + remove_tags = [ + dict(name='li', attrs={'class': 'material-meta__type'}), + dict(name='div', attrs={'class': 'more'}) + ] + + feeds = [('CODA', 'https://www.codastory.com/feed/')] diff --git a/recipes/coda_ru.recipe b/recipes/coda_ru.recipe new file mode 100644 index 0000000000..6d65dd6e32 --- /dev/null +++ b/recipes/coda_ru.recipe @@ -0,0 +1,32 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 + +from calibre.web.feeds.news import BasicNewsRecipe + + +class Coda(BasicNewsRecipe): + title = 'Coda \u043D\u0430 \u0440\u0443\u0441\u0441\u043A\u043E\u043C' + __author__ = 'bugmen00t' + description = 'Coda - \u043C\u0435\u0434\u0438\u0430, \u043A\u043E\u0442\u043E\u0440\u043E\u0435 \u0432\u044B\u044F\u0432\u043B\u044F\u0435\u0442 \u0441\u0432\u044F\u0437\u0438 \u043C\u0435\u0436\u0434\u0443 \u0441\u043E\u0431\u044B\u0442\u0438\u044F\u043C\u0438 \u0438 \u043F\u0440\u043E\u0434\u043E\u043B\u0436\u0430\u0435\u0442 \u0441\u043B\u0435\u0434\u0438\u0442\u044C \u0437\u0430 \u0438\u0441\u0442\u043E\u0440\u0438\u044F\u043C\u0438 \u0434\u0430\u0436\u0435 \u043F\u043E\u0441\u043B\u0435 \u0442\u043E\u0433\u043E, \u043A\u0430\u043A \u043E\u043D\u0438 \u043E\u043D\u0438 \u043F\u0440\u043E\u043F\u0430\u043B\u0438 \u0438\u0437 \u043F\u043E\u0432\u0435\u0441\u0442\u043A\u0438. Coda \u043F\u043E\u0433\u0440\u0443\u0436\u0430\u0435\u0442\u0441\u044F \u043D\u0435 \u0442\u043E\u043B\u044C\u043A\u043E \u0432 \u0441\u0430\u043C\u0438 \u043A\u0440\u0438\u0437\u0438\u0441\u044B, \u043D\u043E \u0438 \u0432 \u043A\u043E\u043D\u0442\u0435\u043A\u0441\u0442, \u043A\u043E\u0442\u043E\u0440\u044B\u0439 \u0438\u0445 \u043E\u043A\u0440\u0443\u0436\u0430\u0435\u0442' # noqa + publisher = 'Natalia Antelava & Ilan Greenberg' + category = 'blog' + cover_url = u'https://www.codastory.com/wp-content/uploads/2021/05/AT_thumbnail_512x512.png' + language = 'ru' + no_stylesheets = False + remove_javascript = False + auto_cleanup = False + oldest_article = 180 + max_articles_per_feed = 50 + + remove_tags_before = dict(name='article') + + remove_tags_after = dict(name='div', attrs={'class': 'article'}) + + remove_tags = [ + dict(name='li', attrs={'class': 'material-meta__type'}), + dict(name='div', attrs={'class': 'more'}) + ] + + feeds = [ + ('CODA', 'https://www.codastory.com/ru/feed/') + ] diff --git a/recipes/icons/coda.png b/recipes/icons/coda.png new file mode 100644 index 0000000000..69c16bbf3c Binary files /dev/null and b/recipes/icons/coda.png differ diff --git a/recipes/icons/coda_ru.png b/recipes/icons/coda_ru.png new file mode 100644 index 0000000000..69c16bbf3c Binary files /dev/null and b/recipes/icons/coda_ru.png differ diff --git a/recipes/icons/mel.png b/recipes/icons/mel.png new file mode 100644 index 0000000000..1ce593def6 Binary files /dev/null and b/recipes/icons/mel.png differ diff --git a/recipes/icons/opennet.png b/recipes/icons/opennet.png new file mode 100644 index 0000000000..ec5a89a21f Binary files /dev/null and b/recipes/icons/opennet.png differ diff --git a/recipes/icons/paperpaper.png b/recipes/icons/paperpaper.png new file mode 100644 index 0000000000..f802cb701d Binary files /dev/null and b/recipes/icons/paperpaper.png differ diff --git a/recipes/icons/project.png b/recipes/icons/project.png new file mode 100644 index 0000000000..a15c53ab8a Binary files /dev/null and b/recipes/icons/project.png differ diff --git a/recipes/icons/project_en.png b/recipes/icons/project_en.png new file mode 100644 index 0000000000..a15c53ab8a Binary files /dev/null and b/recipes/icons/project_en.png differ diff --git a/recipes/icons/sobaka.png b/recipes/icons/sobaka.png new file mode 100644 index 0000000000..c6776d5c52 Binary files /dev/null and b/recipes/icons/sobaka.png differ diff --git a/recipes/mel.recipe b/recipes/mel.recipe new file mode 100644 index 0000000000..44430b58d0 --- /dev/null +++ b/recipes/mel.recipe @@ -0,0 +1,101 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 + +from calibre.web.feeds.news import BasicNewsRecipe + + +class Mel(BasicNewsRecipe): + title = '\u041C\u0435\u043B' + __author__ = 'bugmen00t' + description = '\u041C\u0435\u0434\u0438\u0430 \u043F\u0440\u043E \u043E\u0431\u0440\u0430\u0437\u043E\u0432\u0430\u043D\u0438\u0435 \u0438 \u0432\u043E\u0441\u043F\u0438\u0442\u0430\u043D\u0438\u0435 \u0434\u0435\u0442\u0435\u0439' # noqa + publisher = '\u0418\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0438\u0437\u0434\u0430\u043D\u0438\u0435 \u00AB\u041C\u0435\u043B\u00BB' + category = 'blog' + cover_url = u'https://static.mel.fm/images/project/site/dummyLogo.png' + language = 'ru' + no_stylesheets = False + remove_javascript = False + auto_cleanup = False + oldest_article = 7 + max_articles_per_feed = 50 + + remove_tags_before = dict(name='article') + + remove_tags_after = dict(name='div', attrs={'class': 'b-pb-article__body'}) + + remove_tags = [ + dict(name='div', attrs={'class': 'b-ad-space__horizontal-centering'}), + dict( + name='div', + attrs={ + 'class': + 'subscriptionBlock b-pb-publication-body__publication-newsletter-form' + } + ), + dict( + name='div', + attrs={ + 'class': + 'subscriptionBlock b-pb-article__publication-newsletter-form b-pb-article__publication-newsletter-form-post' + } + ), + dict( + name='div', + attrs={ + 'class': 'newsletter-form b-pb-article__publication-newsletter-form' + } + ), + dict(name='div', attrs={'class': 'bottom-wrapper'}), + dict( + name='div', + attrs={ + 'class': + 'b-pb-article__right-column b-pb-article__right-column_without-cover b-pb-article__right-column_on-post-page' + } + ), + dict( + name='div', + attrs={ + 'class': + 'b-pb-article__right-column b-pb-article__right-column_with-cover' + } + ), + dict(name='div', attrs={'class': 'main-tag'}), + dict(name='div', attrs={'class': 'main-tag_mobile'}), + dict(name='div', attrs={'class': 'publication-header__counter'}), + dict( + name='div', + attrs={ + 'class': + 'smi2-news-container smi2-news-container_desktop smi2-news-container_desktop-news' + } + ), + dict( + name='div', + attrs={'class': 'smi2-news-container smi2-news-container_desktop'} + ), + dict( + name='div', + attrs={ + 'class': + 'smi2-news-container smi2-news-container_mobile smi2-news-container_mobile-news' + } + ), + dict( + name='div', + attrs={'class': 'smi2-news-container smi2-news-container_mobile'} + ) + ] + + feeds = [( + '\u0412\u0441\u0435 \u043C\u0430\u0442\u0435\u0440\u0438\u0430\u043B\u044B', + 'https://mel.fm/rss/default-all' + ), + ( + '\u041D\u043E\u0432\u043E\u0441\u0442\u0438', + 'https://mel.fm/rss/default-news' + )] + + def preprocess_html(self, soup): + for img in soup.findAll('img', attrs={'data-src': True}): + img['src'] = img['data-src'] + return soup diff --git a/recipes/opennet.recipe b/recipes/opennet.recipe new file mode 100644 index 0000000000..5d22ca4afa --- /dev/null +++ b/recipes/opennet.recipe @@ -0,0 +1,59 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 + +from calibre.web.feeds.news import BasicNewsRecipe + + +class PaperPaper(BasicNewsRecipe): + title = 'OpenNet.ru' + __author__ = 'bugmen00t' + description = '\u0420\u0443\u0441\u0441\u043A\u043E\u044F\u0437\u044B\u0447\u043D\u044B\u0439 \u043F\u043E\u0440\u0442\u0430\u043B, \u043F\u043E\u0441\u0432\u044F\u0449\u0451\u043D\u043D\u044B\u0439 \u043E\u0442\u043A\u0440\u044B\u0442\u044B\u043C \u0438 \u0441\u0432\u043E\u0431\u043E\u0434\u043D\u044B\u043C \u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0438\u044F\u043C (FOSS): \u0435\u0436\u0435\u0434\u043D\u0435\u0432\u043D\u044B\u0435 \u043D\u043E\u0432\u043E\u0441\u0442\u0438 \u0432 \u043E\u0431\u043B\u0430\u0441\u0442\u0438 \u0441\u0432\u043E\u0431\u043E\u0434\u043D\u043E\u0433\u043E \u043F\u0440\u043E\u0433\u0440\u0430\u043C\u043C\u043D\u043E\u0433\u043E \u043E\u0431\u0435\u0441\u043F\u0435\u0447\u0435\u043D\u0438\u044F, Linux, BSD \u0438 UNIX-\u043F\u043E\u0434\u043E\u0431\u043D\u044B\u0445 \u043E\u043F\u0435\u0440\u0430\u0446\u0438\u043E\u043D\u043D\u044B\u0445 \u0441\u0438\u0441\u0442\u0435\u043C.' # noqa + publisher = 'Maxim Chirkov' + category = 'blog' + cover_url = u'https://www.opennet.ru/opennet.gif' + language = 'ru' + no_stylesheets = False + remove_javascript = False + auto_cleanup = False + oldest_article = 14 + max_articles_per_feed = 50 + + remove_tags_before = dict(name='div', attrs={'id': 'as2'}) + + remove_tags_after = dict(name='tr', attrs={'bgcolor': '#D9DAC6'}) + + remove_tags = [dict(name='tr', attrs={'bgcolor': '#D9DAC6'})] + + feeds = [ + ( + '\u0413\u043B\u0430\u0432\u043D\u044B\u0435 \u043D\u043E\u0432\u043E\u0441\u0442\u0438', + 'https://www.opennet.ru/opennews/opennews_6_noadv.rss' + ), + ( + '\u041C\u0438\u043D\u0438-\u043D\u043E\u0432\u043E\u0441\u0442\u0438', + 'https://www.opennet.ru/opennews/opennews_mini_noadv.rss' + ), + ( + '\u0421\u043E\u0431\u044B\u0442\u0438\u044F', + 'https://www.opennet.ru/opennews/opennews_review.rss' + ), + ( + '\u041E\u0431\u0437\u043E\u0440\u044B \u0441\u0442\u0430\u0442\u0435\u0439', + 'https://www.opennet.ru/opennews/opennews_arts.rss' + ), + ( + '\u0411\u0435\u0437\u043E\u043F\u0430\u0441\u043D\u043E\u0441\u0442\u044C', + 'https://www.opennet.ru/opennews/opennews_sec.rss' + ), + ( + '\u041D\u043E\u0432\u044B\u0435 \u0432\u0435\u0440\u0441\u0438\u0438 \u043F\u0440\u043E\u0433\u0440\u0430\u043C\u043C', + 'https://www.opennet.ru/opennews/opennews_prog.rss' + ), ('Linux', 'https://www.opennet.ru/opennews/opennews_linux.rss'), + ('BSD', 'https://www.opennet.ru/opennews/opennews_bsd.rss'), + ('Ubuntu', 'https://www.opennet.ru/opennews/opennews_ubuntu_full.rss'), + ('Fedora', 'https://www.opennet.ru/opennews/opennews_fedora_full.rss'), + ( + 'Mozilla/Firefox', + 'https://www.opennet.ru/opennews/opennews_mozilla_full.rss' + ) + ] diff --git a/recipes/paperpaper.recipe b/recipes/paperpaper.recipe new file mode 100644 index 0000000000..53fbee1a5a --- /dev/null +++ b/recipes/paperpaper.recipe @@ -0,0 +1,95 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 + +from calibre.web.feeds.news import BasicNewsRecipe + + +class PaperPaper(BasicNewsRecipe): + title = '\u0411\u0443\u043C\u0430\u0433\u0430' + __author__ = 'bugmen00t' + description = '\u0418\u0437\u0434\u0430\u043D\u0438\u0435 \u043E \u043F\u0440\u0435\u043A\u0440\u0430\u0441\u043D\u043E\u043C \u0433\u043E\u0440\u043E\u0434\u0435 \u0421\u0430\u043D\u043A\u0442-\u041F\u0435\u0442\u0435\u0440\u0431\u0443\u0440\u0433\u0435, \u0432 \u043A\u043E\u0442\u043E\u0440\u043E\u043C, \u043A\u043E\u043D\u0435\u0447\u043D\u043E, \u0434\u0430\u043B\u0435\u043A\u043E \u043D\u0435 \u0432\u0441\u0451 \u0438\u0434\u0435\u0430\u043B\u044C\u043D\u043E, \u2014 \u0438 \u043F\u043E\u044D\u0442\u043E\u043C\u0443 \u043C\u044B \u0437\u0430\u043D\u0438\u043C\u0430\u0435\u043C\u0441\u044F \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0438\u043A\u043E\u0439, \u0447\u0442\u043E\u0431\u044B \u043F\u0440\u0438\u0432\u043B\u0435\u043A\u0430\u0442\u044C \u0432\u043D\u0438\u043C\u0430\u043D\u0438\u0435 \u043A \u0432\u0430\u0436\u043D\u044B\u043C \u0434\u043B\u044F \u0432\u0441\u0435\u0445 \u043F\u0440\u043E\u0431\u043B\u0435\u043C\u0430\u043C \u0438 \u0432\u043B\u0438\u044F\u0442\u044C \u043D\u0430 \u0438\u0445 \u0440\u0435\u0448\u0435\u043D\u0438\u0435.' # noqa + publisher = '\u041A\u0438\u0440\u0438\u043B\u043B \u0410\u0440\u0442\u0451\u043C\u0435\u043D\u043A\u043E, \u0422\u0430\u0442\u044C\u044F\u043D\u0430 \u0418\u0432\u0430\u043D\u043E\u0432\u0430' # noqa + category = 'newspaper' + cover_url = u'https://upload.wikimedia.org/wikipedia/commons/1/1f/Paperpaper_logo.jpg' + language = 'ru' + no_stylesheets = False + remove_javascript = False + auto_cleanup = False + oldest_article = 14 + max_articles_per_feed = 50 + + remove_tags_before = dict(name='article') + + remove_tags_after = dict(name='article') + + remove_tags = [ + dict(name='div', attrs={'class': 'bottom-block '}), + dict(name='div', attrs={'class': 'bottom-block news'}) + ] + + feeds = [ + # ('\u0412\u0441\u0435 \u043C\u0430\u0442\u0435\u0440\u0438\u0430\u043B\u044B', 'https://paperpaper.ru/feed/?service'), + ( + '\u0412\u0441\u0435 \u043C\u0430\u0442\u0435\u0440\u0438\u0430\u043B\u044B', + 'https://wemissedyou.rknrkn.ru/feed/?service' + ), + # ('\u041D\u043E\u0432\u043E\u0441\u0442\u0438', 'https://paperpaper.ru/category/what/news/feed/'), + ( + '\u041D\u043E\u0432\u043E\u0441\u0442\u0438', + 'https://wemissedyou.rknrkn.ru/category/what/news/feed/' + ), + # ('\u0418\u0441\u0442\u043E\u0440\u0438\u0438', 'https://paperpaper.ru/category/features/feed/'), + ( + '\u0418\u0441\u0442\u043E\u0440\u0438\u0438', + 'https://wemissedyou.rknrkn.ru/category/features/feed/' + ), + # ('\u0413\u0438\u0434\u044B', 'https://paperpaper.ru/category/guides/feed/'), + ( + '\u0413\u0438\u0434\u044B', + 'https://wemissedyou.rknrkn.ru/category/guides/feed/' + ), + # ('\u0421\u043F\u0438\u0441\u043E\u043A', 'https://paperpaper.ru/tag/%D1%81%D0%BF%D0%B8%D1%81%D0%BE%D0%BA/feed/'), + ( + '\u0421\u043F\u0438\u0441\u043E\u043A', + 'https://wemissedyou.rknrkn.ru/tag/%D1%81%D0%BF%D0%B8%D1%81%D0%BE%D0%BA/feed/' + ), + # ('\u042D\u043A\u0441\u043F\u0430\u0442\u044B', 'https://paperpaper.ru/tag/expat/feed/'), + ( + '\u042D\u043A\u0441\u043F\u0430\u0442\u044B', + 'https://wemissedyou.rknrkn.ru/tag/expat/feed/' + ), + # ('\u0418\u0441\u0442\u043E\u0440\u0438\u0438', 'https://paperpaper.ru/tag/stories/feed/'), + ( + '\u0418\u0441\u0442\u043E\u0440\u0438\u0438', + 'https://wemissedyou.rknrkn.ru/tag/stories/feed/' + ), + # ('\u041E\u0442\u0432\u0435\u0442\u044B', 'https://paperpaper.ru/tag/otvety/feed/'), + ( + '\u041E\u0442\u0432\u0435\u0442\u044B', + 'https://wemissedyou.rknrkn.ru/tag/otvety/feed/' + ), + # ('\u041F\u0443\u0442\u0435\u0448\u0435\u0441\u0442\u0432\u0438\u044F', 'https://paperpaper.ru/tag/traveltravel/feed/'), + ( + '\u041F\u0443\u0442\u0435\u0448\u0435\u0441\u0442\u0432\u0438\u044F', + 'https://wemissedyou.rknrkn.ru/tag/traveltravel/feed/' + ), + # ('\u041D\u0430\u0443\u0447\u043F\u043E\u043F', 'https://paperpaper.ru/category/main-cats/nauchpop/feed/'), + ( + '\u041D\u0430\u0443\u0447\u043F\u043E\u043F', + 'https://wemissedyou.rknrkn.ru/category/main-cats/nauchpop/feed/' + ), + ( + '\u0412\u043E\u0435\u043D\u043D\u044B\u0435 \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u044F \u0432 \u0423\u043A\u0440\u0430\u0438\u043D\u0435', + 'https://wemissedyou.rknrkn.ru/tag/obostrenie-vokrug-ukrainy-2022/feed/' + ), + # ('\u0423\u043A\u0440\u0430\u0438\u043D\u0430', 'https://paperpaper.ru/tag/ukraine/feed/'), + ( + '\u0423\u043A\u0440\u0430\u0438\u043D\u0430', + 'https://wemissedyou.rknrkn.ru/tag/ukraine/feed/' + ), + # ('\u041D\u0430\u0443\u043A\u0430', 'https://paperpaper.ru/tag/science/feed/'), + ( + '\u041D\u0430\u0443\u043A\u0430', + 'https://wemissedyou.rknrkn.ru/tag/science/feed/' + ) + ] diff --git a/recipes/project.recipe b/recipes/project.recipe new file mode 100644 index 0000000000..ebb3c672a1 --- /dev/null +++ b/recipes/project.recipe @@ -0,0 +1,39 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 + +from calibre.web.feeds.news import BasicNewsRecipe + + +class Project(BasicNewsRecipe): + title = '\u041F\u0440\u043E\u0435\u043A\u0442' + __author__ = 'bugmen00t' + description = '\u0418\u043D\u0442\u0435\u0440\u043D\u0435\u0442-\u0438\u0437\u0434\u0430\u043D\u0438\u0435, \u0441\u043E\u0437\u0434\u0430\u043D\u043D\u043E\u0435 \u0431\u044B\u0432\u0448\u0438\u043C \u0433\u043B\u0430\u0432\u043D\u044B\u043C \u0440\u0435\u0434\u0430\u043A\u0442\u043E\u0440\u043E\u043C \u0442\u0435\u043B\u0435\u043A\u0430\u043D\u0430\u043B\u0430 \u00AB\u0414\u043E\u0436\u0434\u044C\u00BB \u0438 \u0434\u0440\u0443\u0433\u0438\u0445 \u0421\u041C\u0418 \u0420\u043E\u043C\u0430\u043D\u043E\u043C \u0411\u0430\u0434\u0430\u043D\u0438\u043D\u044B\u043C, \u043A\u043E\u0442\u043E\u0440\u0435 \u0441\u043F\u0435\u0446\u0438\u0430\u043B\u0438\u0437\u0438\u0440\u0443\u044E\u0435\u0442\u0441\u044F \u043D\u0430 \u0440\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430\u0442\u0435\u043B\u044C\u0441\u043A\u043E\u0439 \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0438\u043A\u0435.' # noqa + publisher = '\u0420\u043E\u043C\u0430\u043D \u0411\u0430\u0434\u0430\u043D\u0438\u043D' + category = 'blog' + cover_url = u'https://proektmedia-stat.ams3.digitaloceanspaces.com/2018/08/proektmedia_facebook_default.png' + language = 'ru' + no_stylesheets = False + remove_javascript = False + auto_cleanup = False + oldest_article = 200 + max_articles_per_feed = 20 + + remove_tags_before = dict(name='main') + + remove_tags_after = dict( + name='div', attrs={'class': 'single-post__article js-post-article'} + ) + + remove_tags = [ + dict(name='span', attrs={'class': 'more'}), + dict(name='span', attrs={'class': 'close'}), + dict(name='div', attrs={'class': 'socials js-socials-icons'}) + ] + + feeds = [ + ('\u041F\u0440\u043E\u0435\u043A\u0442.', 'https://www.proekt.media/feed/') + ] + + def get_browser(self): + br = BasicNewsRecipe.get_browser(self, user_agent='common_words/based') + return br diff --git a/recipes/project_en.recipe b/recipes/project_en.recipe new file mode 100644 index 0000000000..be99e62ba3 --- /dev/null +++ b/recipes/project_en.recipe @@ -0,0 +1,38 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 + +from calibre.web.feeds.news import BasicNewsRecipe + + +class Project(BasicNewsRecipe): + title = 'The Project' + __author__ = 'bugmen00t' + description = 'The Project is an independent Russian media specialising in in-depth journalism.' + publisher = 'Roman Badanin' + category = 'blog' + cover_url = u'https://proektmedia-stat.ams3.digitaloceanspaces.com/2018/08/proektmedia_facebook_default.png' + language = 'en_RU' + no_stylesheets = False + remove_javascript = False + auto_cleanup = False + oldest_article = 600 + max_articles_per_feed = 20 + + remove_tags_before = dict(name='main') + + remove_tags_after = dict( + name='div', attrs={'class': 'single-post__article js-post-article'} + ) + + remove_tags = [ + # dict(name='div', attrs={'class': 'stk-grid stk-theme_45496__mb_3'}), + dict(name='span', attrs={'class': 'more'}), + dict(name='span', attrs={'class': 'close'}), + dict(name='div', attrs={'class': 'socials js-socials-icons'}) + ] + + feeds = [('The Project.', 'https://www.proekt.media/en/feed/')] + + def get_browser(self): + br = BasicNewsRecipe.get_browser(self, user_agent='common_words/based') + return br diff --git a/recipes/sobaka.recipe b/recipes/sobaka.recipe new file mode 100644 index 0000000000..1f6012645f --- /dev/null +++ b/recipes/sobaka.recipe @@ -0,0 +1,41 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 + +from calibre.web.feeds.news import BasicNewsRecipe + + +class Sobaka(BasicNewsRecipe): + title = '\u0421\u043E\u0431\u0430\u043A\u0430.ru' + __author__ = 'bugmen00t' + description = '\u0416\u0443\u0440\u043D\u0430\u043B \u043E \u043B\u044E\u0434\u044F\u0445 \u0432 \u041F\u0435\u0442\u0435\u0440\u0431\u0443\u0440\u0433\u0435' # noqa + publisher = '\u041E\u041E\u041E \u00AB\u0416\u0443\u0440\u043D\u0430\u043B\u044B \u0438 \u0441\u0430\u0439\u0442\u044B "\u0424\u0430\u0431\u0440\u0438\u043A\u0430 \u043A\u043E\u043D\u0442\u0435\u043D\u0442\u0430 "\u0422\u043E\u0447\u043A\u0430 \u0420\u0443"\u00BB' # noqa + category = 'magazine' + cover_url = u'https://static.sobaka.ru/images/post/00/04/31/21/_rotator.jpg' + language = 'ru' + no_stylesheets = False + remove_javascript = False + auto_cleanup = False + oldest_article = 7 + max_articles_per_feed = 50 + + remove_tags_before = dict(name='div', attrs={'class': 'b-post-view__head'}) + + remove_tags_after = dict(name='div', attrs={'class': 'b-post-view__foot'}) + + remove_tags = [ + dict(name='div', attrs={'class': 'b-post-view__telegram-promo'}), + dict(name='div', attrs={'class': 'b-post-view__tgb'}), + dict(name='div', attrs={'id': 'comments'}), + dict(name='div', attrs={'class': 'b-post-view__section'}), + dict(name='div', attrs={'class': 'b-post-view__share'}), + dict(name='div', attrs={'class': 'b-post-view__details-col b-post-view__details-col--w1'}) + ] + + feeds = [ + ('\u041D\u043E\u0432\u043E\u0441\u0442\u0438', 'https://www.sobaka.ru/rss/news.xml') + ] + + def preprocess_html(self, soup): + for img in soup.findAll('img', attrs={'data-src': True}): + img['src'] = img['data-src'] + return soup