diff --git a/recipes/3dnews.recipe b/recipes/3dnews.recipe index a44a8c769d..391c9f54e1 100644 --- a/recipes/3dnews.recipe +++ b/recipes/3dnews.recipe @@ -1,10 +1,11 @@ #!/usr/bin/env python # vim:fileencoding=utf-8 + from __future__ import unicode_literals, division, absolute_import, print_function from calibre.web.feeds.news import BasicNewsRecipe -class AdvancedUserRecipe1438446837(BasicNewsRecipe): +class News(BasicNewsRecipe): title = '3DNews: Daily Digital Digest' __author__ = 'bugmen00t' description = 'Независимое российское онлайн-издание, посвященное цифровым технологиям' @@ -18,48 +19,112 @@ class AdvancedUserRecipe1438446837(BasicNewsRecipe): max_articles_per_feed = 60 feeds = [ - ('\u041d\u043e\u0432\u043e\u0441\u0442\u0438 Hardware', - 'http://www.3dnews.ru/news/rss/'), - ('\u041d\u043e\u0432\u043e\u0441\u0442\u0438 Software', - 'http://www.3dnews.ru/software-news/rss/'), - ('\u0423\u043c\u043d\u044b\u0435 \u0432\u0435\u0449\u0438', - 'http://www.3dnews.ru/smart-things/rss/'), - ('\u0410\u043d\u0430\u043b\u0438\u0442\u0438\u043a\u0430', - 'http://www.3dnews.ru/editorial/rss/'), - ('\u041f\u0440\u043e\u0446\u0435\u0441\u0441\u043e\u0440\u044b \u0438 \u043f\u0430\u043c\u044f\u0442\u044c', - 'http://www.3dnews.ru/cpu/rss/'), - ('\u041c\u0430\u0442\u0435\u0440\u0438\u043d\u0441\u043a\u0438\u0435 \u043f\u043b\u0430\u0442\u044b', - 'http://www.3dnews.ru/motherboard/rss/'), - ('\u041a\u043e\u0440\u043f\u0443\u0441\u0430, \u0411\u041f \u0438 \u043e\u0445\u043b\u0430\u0436\u0434\u0435\u043d\u0438\u0435', - 'http://www.3dnews.ru/cooling/rss/'), - ('\u0412\u0438\u0434\u0435\u043e\u043a\u0430\u0440\u0442\u044b', - 'http://www.3dnews.ru/video/rss/'), - ('\u041c\u043e\u043d\u0438\u0442\u043e\u0440\u044b \u0438 \u043f\u0440\u043e\u0435\u043a\u0442\u043e\u0440\u044b', - 'http://www.3dnews.ru/display/rss/'), - ('\u041d\u0430\u043a\u043e\u043f\u0438\u0442\u0435\u043b\u0438', - 'http://www.3dnews.ru/storage/rss/'), - ('\u0426\u0438\u0444\u0440\u043e\u0432\u043e\u0439 \u0430\u0432\u0442\u043e\u043c\u043e\u0431\u0438\u043b\u044c', - 'http://www.3dnews.ru/auto/rss/'), - ('\u0421\u043e\u0442\u043e\u0432\u0430\u044f \u0441\u0432\u044f\u0437\u044c', - 'http://www.3dnews.ru/phone/rss/'), - ('\u041f\u0435\u0440\u0438\u0444\u0435\u0440\u0438\u044f', - 'http://www.3dnews.ru/peripheral/rss/'), - ('\u041d\u043e\u0443\u0442\u0431\u0443\u043a\u0438 \u0438 \u041f\u041a', - 'http://www.3dnews.ru/mobile/rss/'), - ('\u041f\u043b\u0430\u043d\u0448\u0435\u0442\u044b', - 'http://www.3dnews.ru/tablets/rss/'), - ('\u0417\u0432\u0443\u043a \u0438 \u0430\u043a\u0443\u0441\u0442\u0438\u043a\u0430', - 'http://www.3dnews.ru/multimedia/rss/'), - ('\u0426\u0438\u0444\u0440\u043e\u0432\u043e\u0435 \u0444\u043e\u0442\u043e \u0438 \u0432\u0438\u0434\u0435\u043e', - 'http://www.3dnews.ru/digital/rss/'), - ('\u0421\u0435\u0442\u0438 \u0438 \u043a\u043e\u043c\u043c\u0443\u043d\u0438\u043a\u0430\u0446\u0438\u0438', - 'http://www.3dnews.ru/communication/rss/'), - ('\u0418\u0433\u0440\u044b', 'http://www.3dnews.ru/games/rss/'), - ('\u041f\u0440\u043e\u0433\u0440\u0430\u043c\u043c\u043d\u043e\u0435 \u043e\u0431\u0435\u0441\u043f\u0435\u0447\u0435\u043d\u0438\u0435', - 'http://www.3dnews.ru/software/rss/'), - ('Off-\u0441\u044f\u043d\u043a\u0430', - 'http://www.3dnews.ru/offsyanka/rss/'), - ('\u041c\u0430\u0441\u0442\u0435\u0440\u0441\u043a\u0430\u044f', - 'http://www.3dnews.ru/workshop/rss/'), - ('ServerNews', 'http://servernews.ru/rss'), + ( + '\u0412\u0430\u0436\u043D\u044B\u0435 \u043D\u043E\u0432\u043E\u0441\u0442\u0438', + 'https://3dnews.ru/breaking/rss/' + ), + ( + '\u0412\u0441\u0435 \u043D\u043E\u0432\u043E\u0441\u0442\u0438', + 'https://3dnews.ru/news/rss/' + ), + ( + '\u041D\u043E\u0432\u043E\u0441\u0442\u0438 - \u0445\u0430\u0440\u0434', + 'https://3dnews.ru/hardware-news/rss' + ), + ( + '\u041D\u043E\u0432\u043E\u0441\u0442\u0438 - \u0433\u0430\u0434\u0436\u0435\u0442\u044B', + 'https://3dnews.ru/gadgets/rss/' + ), + ( + '\u041D\u043E\u0432\u043E\u0441\u0442\u0438 - \u0441\u043E\u0444\u0442', + 'https://3dnews.ru/software-news/rss/' + ), + ( + '\u041D\u043E\u0432\u043E\u0441\u0442\u0438 - \u0438\u0433\u0440\u044B', + 'https://3dnews.ru/games/rss/' + ), + ( + '\u0423\u043C\u043D\u044B\u0435 \u0412\u0435\u0449\u0438', + 'https://3dnews.ru/smart-things/rss/' + ), + ( + '\u0410\u043D\u0430\u043B\u0438\u0442\u0438\u043A\u0430', + 'https://3dnews.ru/editorial/rss/' + ), + ( + '\u041F\u0440\u043E\u0446\u0435\u0441\u0441\u043E\u0440\u044B \u0438 \u043F\u0430\u043C\u044F\u0442\u044C', + 'https://3dnews.ru/cpu/rss/' + ), + ( + '\u041C\u0430\u0442\u0435\u0440\u0438\u043D\u0441\u043A\u0438\u0435 \u043F\u043B\u0430\u0442\u044B', + 'https://3dnews.ru/motherboard/rss/' + ), + ( + '\u041A\u043E\u0440\u043F\u0443\u0441\u0430, \u0411\u041F \u0438 \u043E\u0445\u043B\u0430\u0436\u0434\u0435\u043D\u0438\u0435', + 'https://3dnews.ru/cooling/rss/' + ), + ( + '\u0412\u0438\u0434\u0435\u043E\u043A\u0430\u0440\u0442\u044B', + 'https://3dnews.ru/video/rss/' + ), + ( + '\u041C\u043E\u043D\u0438\u0442\u043E\u0440\u044B \u0438 \u043F\u0440\u043E\u0435\u043A\u0442\u043E\u0440\u044B', + 'https://3dnews.ru/display/rss/' + ), + ( + '\u041D\u0430\u043A\u043E\u043F\u0438\u0442\u0435\u043B\u0438', + 'https://3dnews.ru/storage/rss/' + ), + ( + '\u0426\u0438\u0444\u0440\u043E\u0432\u043E\u0439 \u0430\u0432\u0442\u043E\u043C\u043E\u0431\u0438\u043B\u044C', + 'https://3dnews.ru/auto/rss/' + ), + ( + '\u0421\u043E\u0442\u043E\u0432\u0430\u044F \u0441\u0432\u044F\u0437\u044C', + 'https://3dnews.ru/phone/rss/' + ), + ( + '\u041F\u0435\u0440\u0438\u0444\u0435\u0440\u0438\u044F', + 'https://3dnews.ru/peripheral/rss/' + ), + ( + '\u041D\u043E\u0443\u0442\u0431\u0443\u043A\u0438 \u0438 \u041F\u041A', + 'https://3dnews.ru/mobile/rss/' + ), + ( + '\u041F\u043B\u0430\u043D\u0448\u0435\u0442\u044B', + 'https://3dnews.ru/tablets/rss/' + ), + ( + '\u0417\u0432\u0443\u043A \u0438 \u0430\u043A\u0443\u0441\u0442\u0438\u043A\u0430', + 'https://3dnews.ru/multimedia/rss/' + ), + ( + '\u0426\u0438\u0444\u0440\u043E\u0432\u043E\u0435 \u0444\u043E\u0442\u043E \u0438 \u0432\u0438\u0434\u0435\u043E', + 'https://3dnews.ru/digital/rss/' + ), + ( + '\u0421\u0435\u0442\u0438 \u0438 \u043A\u043E\u043C\u043C\u0443\u043D\u0438\u043A\u0430\u0446\u0438\u0438', + 'https://3dnews.ru/communication/rss/' + ), + ( + '\u041F\u0440\u043E\u0433\u0440\u0430\u043C\u043C\u043D\u043E\u0435 \u043E\u0431\u0435\u0441\u043F\u0435\u0447\u0435\u043D\u0438\u0435', + 'https://3dnews.ru/software/rss/' + ), + ('Off-\u0441\u044F\u043D\u043A\u0430', 'https://3dnews.ru/offsyanka/rss/'), + ( + '\u041C\u0430\u0441\u0442\u0435\u0440\u0441\u043A\u0430\u044F', + 'https://3dnews.ru/workshop/rss/' + ), + ( + 'ServerNews - \u0441\u0442\u0430\u0442\u044C\u0438', + 'https://servernews.ru/rss' + ), + ( + 'ServerNews - \u043D\u043E\u0432\u043E\u0441\u0442\u0438', + 'https://servernews.ru/news/rss' + ) ] + + def print_version(self, url): + return url + '/print' diff --git a/recipes/7x7.recipe b/recipes/7x7.recipe index 5c4f7b56ec..691909692a 100644 --- a/recipes/7x7.recipe +++ b/recipes/7x7.recipe @@ -1,35 +1,34 @@ #!/usr/bin/env python # vim:fileencoding=utf-8 + from __future__ import unicode_literals, division, absolute_import, print_function from calibre.web.feeds.news import BasicNewsRecipe -class AdvancedUserRecipe1467715002(BasicNewsRecipe): +class News(BasicNewsRecipe): title = '7x7' __author__ = 'bugmen00t' description = '7x7 - межрегиональный интернет-журнал' publisher = '7x7-journal.ru' category = 'news' - cover_url = u'https://7x7-journal.ru/public/img/template/logo.png' + cover_url = u'https://semnasem.org/site-specific/7x7-journal.ru/images/frontend/logo/logo-header.svg' language = 'ru' no_stylesheets = True remove_javascript = True auto_cleanup = False - oldest_article = 100 - max_articles_per_feed = 100 + oldest_article = 14 + max_articles_per_feed = 30 feeds = [ - ('7x7', 'https://7x7-journal.ru/newsline/rss'), + ('7x7', 'https://semnasem.org/rss/default.xml'), ] - remove_tags_before = dict(name='article') - remove_tags_after = dict(name='div', attrs={'class': 'article__footer'}) + remove_tags_before = dict(name='article', attrs={'class': 'article'}) + + remove_tags_after = dict(name='div', attrs={'class': 'article__footer-wrap'}) + remove_tags = [ - dict(name='div', attrs={ - 'class': 'article__footer' - }), - dict(name='div', attrs={ - 'class': 'article__tags' - }) + dict(name='div', attrs={'class': 'article__footer-wrap'}), + dict(name='div', attrs={'class': 'promolink-widget'}) ] diff --git a/recipes/id_pixel.recipe b/recipes/id_pixel.recipe index f12da68f32..ac55cb039d 100644 --- a/recipes/id_pixel.recipe +++ b/recipes/id_pixel.recipe @@ -1,31 +1,44 @@ -#!/usr/bin/env python +#!/usr/bin/env python2 # vim:fileencoding=utf-8 + from __future__ import unicode_literals, division, absolute_import, print_function from calibre.web.feeds.news import BasicNewsRecipe class IdPixel(BasicNewsRecipe): - title = '\u0418\u0434\u0435\u0430\u043B\u044C\u043D\u044B\u0439 \u043F\u0438\u043A\u0441\u0435\u043B\u044C' - cover_url = u'http://idpixel.ru/i/logo.png' - description = '\u041D\u043E\u0432\u043E\u0441\u0442\u043D\u043E\u0439 \u043F\u0440\u043E\u0435\u043A\u0442 \u043E \u0440\u0435\u0442\u0440\u043E-\u0438\u0433\u0440\u0430\u0445 \u0438 \u0440\u0435\u0442\u0440\u043E-\u0442\u0435\u0445\u043D\u0438\u043A\u0435. \u0412\u043E\u0441\u044C\u043C\u0438\u0431\u0438\u0442\u043D\u044B\u0435 \u0438\u0433\u0440\u044B, \u0448\u0435\u0441\u0442\u043D\u0430\u0434\u0446\u0430\u0442\u0438\u0431\u0438\u0442\u043D\u044B\u0435 \u043A\u043E\u043D\u0441\u043E\u043B\u0438, \u0434\u043E\u043C\u0430\u0448\u043D\u0438\u0435 \u043A\u043E\u043C\u043F\u044C\u044E\u0442\u0435\u0440\u044B \u0441 \u0438\u0433\u0440\u0430\u043C\u0438 \u043D\u0430 \u043A\u0430\u0441\u0441\u0435\u0442\u0430\u0445 \u0438 \u0442\u0430\u043A \u0434\u0430\u043B\u0435\u0435. \u041C\u044B \u0438\u0449\u0435\u043C \u0440\u0435\u0442\u0440\u043E-\u043D\u043E\u0432\u043E\u0441\u0442\u0438 \u043F\u043E \u0432\u0441\u0435\u043C\u0443 \u0441\u0432\u0435\u0442\u0443 \u0438 \u0434\u043E\u043D\u043E\u0441\u0438\u043C \u0438\u0445 \u0434\u043E \u0432\u0430\u0441.' # noqa - publisher = '\u041C\u0438\u0445\u0430\u0438\u043B \u0421\u0443\u0434\u0430\u043A\u043E\u0432' - category = 'news' - __author__ = 'bugmen00t' - language = 'ru' - no_stylesheets = False + title = '\u0418\u0434\u0435\u0430\u043B\u044C\u043D\u044B\u0439 \u043F\u0438\u043A\u0441\u0435\u043B\u044C' + cover_url = u'https://idpixel.ru/i/logo2x.png' + description = '\u041D\u043E\u0432\u043E\u0441\u0442\u043D\u043E\u0439 \u043F\u0440\u043E\u0435\u043A\u0442 \u043E \u0440\u0435\u0442\u0440\u043E-\u0438\u0433\u0440\u0430\u0445 \u0438 \u0440\u0435\u0442\u0440\u043E-\u0442\u0435\u0445\u043D\u0438\u043A\u0435. \u0412\u043E\u0441\u044C\u043C\u0438\u0431\u0438\u0442\u043D\u044B\u0435 \u0438\u0433\u0440\u044B, \u0448\u0435\u0441\u0442\u043D\u0430\u0434\u0446\u0430\u0442\u0438\u0431\u0438\u0442\u043D\u044B\u0435 \u043A\u043E\u043D\u0441\u043E\u043B\u0438, \u0434\u043E\u043C\u0430\u0448\u043D\u0438\u0435 \u043A\u043E\u043C\u043F\u044C\u044E\u0442\u0435\u0440\u044B \u0441 \u0438\u0433\u0440\u0430\u043C\u0438 \u043D\u0430 \u043A\u0430\u0441\u0441\u0435\u0442\u0430\u0445 \u0438 \u0442\u0430\u043A \u0434\u0430\u043B\u0435\u0435. \u041C\u044B \u0438\u0449\u0435\u043C \u0440\u0435\u0442\u0440\u043E-\u043D\u043E\u0432\u043E\u0441\u0442\u0438 \u043F\u043E \u0432\u0441\u0435\u043C\u0443 \u0441\u0432\u0435\u0442\u0443 \u0438 \u0434\u043E\u043D\u043E\u0441\u0438\u043C \u0438\u0445 \u0434\u043E \u0432\u0430\u0441.' # noqa + publisher = '\u041C\u0438\u0445\u0430\u0438\u043B \u0421\u0443\u0434\u0430\u043A\u043E\u0432' + category = 'news' + __author__ = 'bugmen00t' + language = 'ru' + no_stylesheets = False remove_javascript = True - oldest_article = 300 - max_articles_per_feed = 100 + auto_cleanup = True + oldest_article = 100 + max_articles_per_feed = 50 - remove_tags_before = dict(name='div', attrs={'class':'blog-post'}) - remove_tags_after = dict(name='div', attrs={'style':'margin: 20px 0 0 2px;font-size: 16px;'}) - remove_tags = [dict(name='div',attrs={'class':' likely__widget likely__widget_vkontakte'}), - dict(name='div', attrs={'class':' likely__widget likely__widget_twitter'}), - dict(name='div', attrs={'class':' likely__widget likely__widget_facebook'}), - dict(name='div', attrs={'class':' likely__widget likely__widget_telegram'}), - dict(name='div', attrs={'class':' likely__widget likely__widget_odnoklassniki'}), - dict(name='div', attrs={'class':'comments_input_disabled'}), - dict(name='div', attrs={'id':'comments'}) - ] + remove_tags_before = dict(name='div', attrs={'class': 'blog-post'}) + remove_tags_after = dict( + name='div', attrs={'style': 'margin: 20px 0 0 2px;font-size: 16px;'} + ) + remove_tags = [ + dict( + name='div', attrs={'class': ' likely__widget likely__widget_vkontakte'} + ), + dict(name='div', attrs={'class': ' likely__widget likely__widget_twitter'}), + dict(name='div', attrs={'class': ' likely__widget likely__widget_facebook'}), + dict(name='div', attrs={'class': ' likely__widget likely__widget_telegram'}), + dict( + name='div', + attrs={'class': ' likely__widget likely__widget_odnoklassniki'} + ), + dict(name='div', attrs={'class': 'comments_input_disabled'}), + dict(name='div', attrs={'id': 'comments'}) + ] - feeds = [(u'\u041D\u043E\u0432\u043E\u0441\u0442\u0438', u'http://idpixel.ru/rss/news.rss')] + feeds = [( + u'\u041D\u043E\u0432\u043E\u0441\u0442\u0438', + u'https://idpixel.ru/rss/news.rss' + )] diff --git a/recipes/ixbt.recipe b/recipes/ixbt.recipe index a889f67ebf..97e7f68b04 100644 --- a/recipes/ixbt.recipe +++ b/recipes/ixbt.recipe @@ -1,59 +1,110 @@ -#!/usr/bin/env python +#!/usr/bin/env python2 # vim:fileencoding=utf-8 + from __future__ import unicode_literals, division, absolute_import, print_function from calibre.web.feeds.news import BasicNewsRecipe -class AdvancedUserRecipe1438255077(BasicNewsRecipe): +class Ixbt(BasicNewsRecipe): title = 'iXBT.com' __author__ = 'bugmen00t' - description = 'Специализированный российский информационно-аналитический сайт с самыми актуальными новостями из сферы IT, детальными обзорами смартфонов, планшетов, персональных компьютеров, компьютерных комплектующих, программного обеспечения и периферийных устройств' # noqa + description = 'Специализированный российский информационно-аналитический сервер, освещающий вопросы аппаратного обеспечения персональных компьютеров, коммуникаций и серверов, 3D-графики и звука, цифрового фото и видео, Hi-Fi аппаратуры и проекторов, мобильной связи и периферии, игровых приложений и многого другого.' # noqa publisher = 'www.ixbt.com' category = 'news' - cover_url = u'http://www.ixbt.com/pic/articles/logo.png' + cover_url = u'https://www.ixbt.com/images/ixbt-logo-new.jpg' language = 'ru' auto_cleanup = True oldest_article = 30 max_articles_per_feed = 100 - feeds = [(u'Новые статьи', u'http://www.ixbt.com/export/articles.rss'), - (u'Новости', u'http://www.ixbt.com/export/news.rss'), - (u'Новости железа', u'http://www.ixbt.com/export/hardnews.rss'), - (u'Новости ПО', u'http://www.ixbt.com/export/softnews.rss'), - (u'Новости DVD и домашних кинотеатров', - u'http://www.ixbt.com/export/dvdnews.rss'), - (u'Новости Apple', u'http://www.ixbt.com/export/applenews.rss'), - (u'Процессоры и системы охлаждения', - u'http://www.ixbt.com/export/sec_cpu.rss'), - (u'Системные платы, память и чипсеты', - u'http://www.ixbt.com/export/sec_mainboard.rss'), - (u'3D-Видео', u'http://www.ixbt.com/export/sec_video.rss'), - (u'Сети и Серверы', u'http://www.ixbt.com/export/sec_comm.rss'), - (u'Оптические приводы и носители информации', - u'http://www.ixbt.com/export/sec_optical.rss'), - (u'Принтеры и МФУ', u'http://www.ixbt.com/export/sec_printer.rss'), - (u'Мониторы и TV-тюнеры', u'http://www.ixbt.com/export/sec_monitor.rss'), - (u'Жёсткие диски и Flash-накопители', - u'http://www.ixbt.com/export/sec_storage.rss'), - (u'Цифровой звук: акустика, звуковые карты, наушники', - u'http://www.ixbt.com/export/sec_multimedia.rss'), - (u'ProAudio: звуковые карты, интерфейсы, студийные мониторы и наушники, MIDI-клавиатуры, профессиональное ПО', - u'http://www.ixbt.com/export/sec_proaudio.rss'), - (u'Цифровая фотография', u'http://www.ixbt.com/export/sec_digimage.rss'), - (u'Проекторы, кино и домашние кинотеатры', - u'http://www.ixbt.com/export/sec_dvd.rss'), - (u'Цифровое видео: камеры, захват и монтаж', - u'http://www.ixbt.com/export/sec_divideo.rss'), - (u'Ноутбуки и планшетные ПК', - u'http://www.ixbt.com/export/sec_portopc.rss'), - (u'Карманные компьютеры', u'http://www.ixbt.com/export/sec_pda.rss'), - (u'Мобильные телефоны', u'http://www.ixbt.com/export/sec_mobile.rss'), - (u'Периферия: мыши, клавиатуры, джойстики', - u'http://www.ixbt.com/export/sec_peripheral.rss'), - (u'Корпуса, блоки питания и ИБП', - u'http://www.ixbt.com/export/sec_power.rss'), - (u'Общеиндустриальные новости о новых продуктах и технологиях', u'http://www.ixbt.com/export/sec_editorial.rss')] - remove_tags_before = dict(name='div', attrs={'class': 'content'}) + remove_tags_after = dict(name='ul', attrs={'id': 'soc_ShareBlock'}) + + feeds = [ + ( + u'\u0421\u0442\u0430\u0442\u044C\u0438', + 'http://www.ixbt.com/export/articles.rss' + ), + ( + u'\u041D\u043E\u0432\u043E\u0441\u0442\u0438', + 'http://www.ixbt.com/export/news.rss' + ), + ( + 'Свежие новости DVD и домашних кинотеатров', + 'http://www.ixbt.com/export/dvdnews.rss' + ), + ( + 'Свежие новости из мира Apple', + 'http://www.ixbt.com/export/applenews.rss' + ), + ( + u'\u041F\u0440\u043E\u0446\u0435\u0441\u0441\u043E\u0440\u044B', + 'http://www.ixbt.com/export/sec_cpu.rss' + ), + ( + 'Системные платы, память и чипсеты', + 'http://www.ixbt.com/export/sec_mainboard.rss' + ), + ( + u'D-\u0412\u0438\u0434\u0435\u043E \u0438 TV-\u0442\u044E\u043D\u0435\u0440\u044B', + 'http://www.ixbt.com/export/sec_video.rss' + ), + ( + u'\u0421\u0435\u0442\u0438 \u0438 \u0421\u0435\u0440\u0432\u0435\u0440\u044B', + 'http://www.ixbt.com/export/sec_comm.rss' + ), + ( + 'Оптические приводы и носители информации', + 'http://www.ixbt.com/export/sec_optical.rss' + ), + ( + u'\u041F\u0440\u0438\u043D\u0442\u0435\u0440\u044B \u0438 \u041C\u0424\u0423', + 'http://www.ixbt.com/export/sec_printer.rss' + ), + ( + u'\u041C\u043E\u043D\u0438\u0442\u043E\u0440\u044B', + 'http://www.ixbt.com/export/sec_monitor.rss' + ), + ( + u'\u041D\u043E\u0441\u0438\u0442\u0435\u043B\u0438 \u0438\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0438\u0438', + 'http://www.ixbt.com/export/sec_storage.rss' + ), + ( + u'\u0426\u0438\u0444\u0440\u043E\u0432\u043E\u0439 \u0437\u0432\u0443\u043A', + 'http://www.ixbt.com/export/sec_multimedia.rss' + ), (u'ProAudio', 'http://www.ixbt.com/export/sec_proaudio.rss'), + ( + u'\u0418\u0437\u043E\u0431\u0440\u0430\u0436\u0435\u043D\u0438\u0435 \u0432 \u0447\u0438\u0441\u043B\u0430\u0445', + 'http://www.ixbt.com/export/sec_digimage.rss' + ), + ( + 'Проекторы, кино и домашние кинотеатры', + 'http://www.ixbt.com/export/sec_dvd.rss' + ), + ( + u'\u0426\u0438\u0444\u0440\u043E\u0432\u043E\u0435 \u0432\u0438\u0434\u0435\u043E', + 'http://www.ixbt.com/export/sec_divideo.rss' + ), + ( + u'\u041C\u043E\u0431\u0438\u043B\u044C\u043D\u044B\u0435 \u041F\u041A', + 'http://www.ixbt.com/export/sec_portopc.rss' + ), + ( + u'\u041C\u043E\u0431\u0438\u043B\u044C\u043D\u044B\u0435 \u0443\u0441\u0442\u0440\u043E\u0439\u0441\u0442\u0432\u0430', + 'http://www.ixbt.com/export/sec_pda.rss' + ), + ( + u'\u0412\u0441\u0435\u0433\u0434\u0430 \u043D\u0430 \u0441\u0432\u044F\u0437\u0438', + 'http://www.ixbt.com/export/sec_mobile.rss' + ), + ( + 'Корпуса, системы питания и охлаждения', + 'http://www.ixbt.com/export/sec_power.rss' + ), + ( + u'\u041A\u043E\u043B\u043E\u043D\u043A\u0430 \u0440\u0435\u0434\u0430\u043A\u0442\u043E\u0440\u0430', + 'http://www.ixbt.com/export/sec_editorial.rss' + ), (u'iXBT Live', 'https://www.ixbt.com/live/rss/index/') + ] diff --git a/recipes/izvestia.recipe b/recipes/izvestia.recipe index 604b071f9e..3a0a201e9e 100644 --- a/recipes/izvestia.recipe +++ b/recipes/izvestia.recipe @@ -1,3 +1,6 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 + __license__ = 'GPL v3' __copyright__ = '2010, Darko Miletic ' ''' @@ -9,22 +12,40 @@ from calibre.web.feeds.news import BasicNewsRecipe class Izvestia(BasicNewsRecipe): title = 'Izvestia' - __author__ = 'Darko Miletic' + __author__ = 'Darko Miletic (with fixes by bugmen00t)' description = 'News from Russia' publisher = 'Izvestia' category = 'news, politics, Russia' oldest_article = 5 max_articles_per_feed = 100 + auto_cleanup = False no_stylesheets = True use_embedded_content = False - encoding = 'cp1251' language = 'ru' publication_type = 'newspaper' - masthead_url = 'http://images.izvestia.ru/izv/sys/logo.gif' - extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Verdana,Arial,Helvetica,sans1,sans-serif} ' # noqa - keep_only_tags = [dict(name='div', attrs={'class': 'newsFull'})] - remove_tags = [dict(name=['iframe', 'object', 'img', 'link', 'base'])] - remove_tags_before = dict(name='h1', attrs={'class': 'statya'}) + cover_url = u'https://cdn.iz.ru/profiles/portal/themes/purple/images/favicons/apple-icon-180x180.png' - feeds = [ - (u'Daily edition', u'http://rss.feedsportal.com/c/32171/f/424076/index.rss')] + remove_tags_before = dict(name='div', attrs={'role': 'article'}) + + remove_tags_after = dict(name='div', attrs={'role': 'article'}) + + remove_tags = [ + dict(name='div', attrs={'class': 'article_page__left__top__views'}), + dict(name='div', attrs={'class': 'hash_tags'}), + dict(name='div', attrs={'class': 'get_yandex_subscription_links'}), + dict(name='div', attrs={'class': 'article_buttons_block'}), + dict(name='div', attrs={'class': 'rubrics_btn'}), + dict(name='div', attrs={'class': 'hidden'}), + dict(name='div', attrs={'class': 'share_bottom2'}), + dict(name='div', attrs={'class': 'recommendation-block'}), + dict(name='div', attrs={'class': 'plug-text'}), + dict(name='div', attrs={'class': 'get_news_link'}), + dict(name='div', attrs={'itemprop': 'address'}) + ] + + feeds = [(u'Новости', u'https://iz.ru/xml/rss/all.xml')] + + def preprocess_html(self, soup): + for img in soup.findAll('img', attrs={'data-src': True}): + img['src'] = img['data-src'] + return soup diff --git a/recipes/kommersant.recipe b/recipes/kommersant.recipe index 2d0ec7fc3f..3e888f4c76 100644 --- a/recipes/kommersant.recipe +++ b/recipes/kommersant.recipe @@ -1,3 +1,6 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 + __license__ = 'GPL v3' __copyright__ = '2010-2013, Darko Miletic ' ''' @@ -9,39 +12,172 @@ from calibre.web.feeds.news import BasicNewsRecipe class Kommersant_ru(BasicNewsRecipe): title = 'Kommersant' - __author__ = 'Darko Miletic' + __author__ = 'Darko Miletic (with fixes by bugmen00t)' description = 'News from Russia' publisher = 'Kommersant' category = 'news, politics, Russia' - oldest_article = 5 - max_articles_per_feed = 100 + oldest_article = 7 + max_articles_per_feed = 50 no_stylesheets = True use_embedded_content = False - encoding = 'cp1251' language = 'ru' publication_type = 'newspaper' - masthead_url = 'http://www.kommersant.ru/CorpPics/logo_daily_1.gif' - extra_css = """ - @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} - body{font-family: Tahoma, Arial, Helvetica, sans1, sans-serif} - .title{font-size: x-large; font-weight: bold; margin-bottom: 1em} - .subtitle{font-size: large; margin-bottom: 1em} - .document_vvodka{font-weight: bold; margin-bottom: 1em} - """ + cover_url = 'https://iv.kommersant.ru/ContentFlex/images/logo.png' - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } + remove_tags_before = dict(name='header', attrs={'class': 'doc_header'}) + + remove_tags_after = dict( + name='div', attrs={'class': 'doc__text document_authors'} + ) - keep_only_tags = [dict(attrs={'class': 'b-article'})] remove_tags = [ - dict(name=['iframe', 'object', 'link', 'img', 'base', 'meta'])] - remo_tags_after = dict(attrs={'class': 'hide1 hide2'}) + dict(name='ul', attrs={'class': 'crumbs'}), + dict(name='div', attrs={'class': 'hide_desktop'}), + dict(name='div', attrs={'class': 'incut incut--right'}), + dict(name='div', attrs={'class': 'incut incut--left'}), + dict(name='div', attrs={'class': 'incut incut--center'}), + dict(name='div', attrs={'class': 'ba'}), + dict(name='div', attrs={'id': 'lenta'}), + dict(name='div', attrs={'class': 'layout basement_news__body'}), + dict(name='footer', attrs={'class': 'footer'}), + dict(name='div', attrs={'class': 'ui-modal'}), + dict(name='section', attrs={'class': 'potd'}), + dict(name='footer', attrs={'class': 'doc_footer'}), + dict(name='div', attrs={'class': 'adv_interscroll hide_desktop'}) + ] - feeds = [(u'Articles', u'http://dynamic.feedsportal.com/pf/438800/http://feeds.kommersant.ru/RSS_Export/RU/daily.xml')] - - def get_article_url(self, article): - return article.get('guid', None) - - def print_version(self, url): - return url.replace('/doc-rss/', '/Doc/') + '/Print' + feeds = [ + ( + '\u0413\u043B\u0430\u0432\u043D\u043E\u0435', + 'https://www.kommersant.ru/rss/main.xml' + ), + ( + '\u0413\u0430\u0437\u0435\u0442\u0430 "\u041A\u043E\u043C\u043C\u0435\u0440\u0441\u0430\u043D\u0442"', + 'https://www.kommersant.ru/rss/daily.xml' + ), + ( + '\u041B\u0435\u043D\u0442\u0430 \u043D\u043E\u0432\u043E\u0441\u0442\u0435\u0439', + 'https://www.kommersant.ru/RSS/news.xml' + ), + ( + '\u041C\u0430\u0442\u0435\u0440\u0438\u0430\u043B\u044B \u0441 \u0441\u0430\u0439\u0442\u0430', + 'https://www.kommersant.ru/RSS/corp.xml' + ), + ( + '\u0420\u0430\u0434\u0438\u043E \u041A\u043E\u043C\u043C\u0435\u0440\u0441\u0430\u043D\u0442\u044A-FM', + 'https://www.kommersant.ru/RSS/radio.xml' + ), + ( + '\u0422\u0435\u043C\u0430\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0435 \u043F\u0440\u0438\u043B\u043E\u0436\u0435\u043D\u0438\u044F', + 'https://www.kommersant.ru/RSS/tema.xml' + ), + ( + '\u0416\u0443\u0440\u043D\u0430\u043B \u00AB\u041E\u0413\u041E\u041D\u0401\u041A\u00BB', + 'https://www.kommersant.ru/RSS/ogoniok.xml' + ), + ( + '\u0416\u0443\u0440\u043D\u0430\u043B \u00AB\u041A\u043E\u043C\u043C\u0435\u0440\u0441\u0430\u043D\u0442\u044A WEEKEND\u00BB', + 'https://www.kommersant.ru/RSS/weekend.xml' + ), + ( + 'Журнал «Коммерсантъ АВТОПИЛОТ»', + 'https://www.kommersant.ru/RSS/auto.xml' + ), + ( + '\u041F\u043E\u043B\u0438\u0442\u0438\u043A\u0430', + 'https://www.kommersant.ru/rss/section-politics.xml' + ), + ( + '\u042D\u043A\u043E\u043D\u043E\u043C\u0438\u043A\u0430', + 'https://www.kommersant.ru/RSS/section-economics.xml' + ), + ( + '\u0411\u0438\u0437\u043D\u0435\u0441', + 'https://www.kommersant.ru/rss/section-business.xml' + ), + ( + '\u0412 \u043C\u0438\u0440\u0435', + 'https://www.kommersant.ru/rss/section-world.xml' + ), + ( + '\u041F\u0440\u043E\u0438\u0441\u0448\u0435\u0441\u0442\u0432\u0438\u044F', + 'https://www.kommersant.ru/rss/section-accidents.xml' + ), + ( + '\u041E\u0431\u0449\u0435\u0441\u0442\u0432\u043E', + 'https://www.kommersant.ru/rss/section-society.xml' + ), + ( + '\u041A\u0443\u043B\u044C\u0442\u0443\u0440\u0430', + 'https://www.kommersant.ru/rss/section-culture.xml' + ), + ( + '\u0421\u043F\u043E\u0440\u0442', + 'https://www.kommersant.ru/rss/section-sport.xml' + ), ('Hi-Tech', 'https://www.kommersant.ru/RSS/section-hitech.xml'), + ( + '\u0410\u0432\u0442\u043E', + 'https://www.kommersant.ru/RSS/Autopilot_on.xml' + ), + ( + '\u0421\u0442\u0438\u043B\u044C', + 'https://www.kommersant.ru/RSS/section-style.xml' + ), + ( + '\u0421\u0430\u043D\u043A\u0442-\u041F\u0435\u0442\u0435\u0440\u0431\u0443\u0440\u0433', + 'https://www.kommersant.ru/rss/regions/piter_all.xml' + ), + ( + '\u0412\u043E\u0440\u043E\u043D\u0435\u0436', + 'https://www.kommersant.ru/rss/regions/vrn_all.xml' + ), + ( + '\u0415\u043A\u0430\u0442\u0435\u0440\u0438\u043D\u0431\u0443\u0440\u0433', + 'https://www.kommersant.ru/rss/regions/ekaterinburg_all.xml' + ), + ( + '\u0418\u0436\u0435\u0432\u0441\u043A', + 'https://www.kommersant.ru/rss/regions/izhevsk_all.xml' + ), + ( + '\u041A\u0430\u0437\u0430\u043D\u044C', + 'https://www.kommersant.ru/rss/regions/kazan_all.xml' + ), + ( + '\u041A\u0440\u0430\u0441\u043D\u043E\u0434\u0430\u0440', + 'https://www.kommersant.ru/rss/regions/krasnodar_all.xml' + ), + ( + '\u041A\u0440\u0430\u0441\u043D\u043E\u044F\u0440\u0441\u043A', + 'https://www.kommersant.ru/rss/regions/krasnoyarsk_all.xml' + ), + ( + '\u041D\u0438\u0436\u043D\u0438\u0439 \u041D\u043E\u0432\u0433\u043E\u0440\u043E\u0434', + 'https://www.kommersant.ru/rss/regions/nnov_all.xml' + ), + ( + '\u041D\u043E\u0432\u043E\u0441\u0438\u0431\u0438\u0440\u0441\u043A', + 'https://www.kommersant.ru/rss/regions/novosibirsk_all.xml' + ), + ( + '\u041F\u0435\u0440\u043C\u044C', + 'https://www.kommersant.ru/rss/regions/perm_all.xml' + ), + ( + '\u0420\u043E\u0441\u0442\u043E\u0432-\u043D\u0430-\u0414\u043E\u043D\u0443', + 'https://www.kommersant.ru/rss/regions/rostov_all.xml' + ), + ( + '\u0421\u0430\u043C\u0430\u0440\u0430', + 'https://www.kommersant.ru/rss/regions/samara_all.xml' + ), + ( + '\u0421\u0430\u0440\u0430\u0442\u043E\u0432', + 'https://www.kommersant.ru/rss/regions/saratov_all.xml' + ), + ('\u0423\u0444\u0430', 'https://www.kommersant.ru/rss/regions/ufa_all.xml'), + ( + '\u0427\u0435\u043B\u044F\u0431\u0438\u043D\u0441\u043A', + 'https://www.kommersant.ru/rss/regions/chelyabinsk_all.xml' + ) + ] diff --git a/recipes/kompiutierra.recipe b/recipes/kompiutierra.recipe index 7f4364108f..14d0480bef 100644 --- a/recipes/kompiutierra.recipe +++ b/recipes/kompiutierra.recipe @@ -1,5 +1,5 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- +# vim:fileencoding=utf-8 __license__ = 'GPL v3' __copyright__ = '2015, lcd1232, malexey1984@gmail.com' @@ -10,27 +10,25 @@ from calibre.web.feeds.news import BasicNewsRecipe class Computerra(BasicNewsRecipe): title = u'\u041a\u043e\u043c\u043f\u044c\u044e\u0442\u0435\u0440\u0440\u0430' + __author__ = 'lcd1232 (with fixes by bugmen00t)' + description = 'Компьютерра: все новости про компьютеры, железо, новые технологии, информационные технологии' + cover_url = 'https://yt3.ggpht.com/ytc/AKedOLRCMA71rKaP4HfL2W26A-VdvsBj9BcOo7S6poTR=s900-c-k-c0x00ffffff-no-rj' + language = 'ru' oldest_article = 100 - __author__ = 'lcd1232' max_articles_per_feed = 50 use_embedded_content = False remove_javascript = True - no_stylesheets = True + no_stylesheets = False conversion_options = {'linearize_tables': True} simultaneous_downloads = 5 - language = 'ru' - description = u'Компьютерра: все новости про компьютеры, железо, новые технологии, информационные технологии' - feeds = [(u'Компьютерра-Онлайн', 'http://feeds.feedburner.com/ct_news/'), ] + remove_tags_before = dict(name='div', attrs={'id': 'article'}) - remove_tags = [ - dict(name='div', attrs={'class': [ - 'article-soc', 'article-tags', 'also', 'item-article item-article-also', 'item-ban-700']}), - dict(name='div', attrs={'id': 'form'}) - ] + remove_tags_after = dict(name='div', attrs={'class': 'article-body'}) - keep_only_tags = [ - dict(name='div', attrs={'class': 'main-column main-column-article'}), - dict(name='div', attrs={'id': 'posts'}) - ] - cover_url = 'https://pp.vk.me/c628429/v628429830/19a22/mlm_LC_ZEa4.jpg' + remove_tags = [dict(name='div', attrs={'class': 'cta-row'})] + + feeds = [( + u'\u041A\u043E\u043C\u043F\u044C\u044E\u0442\u0435\u0440\u0440\u0430', + 'https://www.computerra.ru/feed/' + )] diff --git a/recipes/media_zone.recipe b/recipes/media_zone.recipe index 1204f8df90..23900b59de 100644 --- a/recipes/media_zone.recipe +++ b/recipes/media_zone.recipe @@ -1,32 +1,47 @@ #!/usr/bin/env python # vim:fileencoding=utf-8 + from __future__ import unicode_literals, division, absolute_import, print_function from calibre.web.feeds.news import BasicNewsRecipe -class AdvancedUserRecipe1467719487(BasicNewsRecipe): - title = '\u041c\u0435\u0434\u0438\u0430\u0417\u043e\u043d\u0430' +class MediaZona(BasicNewsRecipe): + title = '!!!\u041c\u0435\u0434\u0438\u0430\u0417\u043e\u043d\u0430' __author__ = 'bugmen00t' - description = '\u041E\u0431\u0449\u0435\u0441\u0442\u0432\u0435\u043D\u043D\u043E-\u043F\u043E\u043B\u0438\u0442\u0438\u0447\u0435\u0441\u043A\u043E\u0435 \u0438\u0437\u0434\u0430\u043D\u0438\u0435, \u0441\u0434\u0435\u043B\u0430\u0432\u0448\u0435\u0435 \u0430\u043A\u0446\u0435\u043D\u0442 \u043D\u0430 \u0444\u0443\u043D\u043A\u0446\u0438\u043E\u043D\u0438\u0440\u043E\u0432\u0430\u043D\u0438\u0438 \u0437\u0430\u043A\u043E\u043D\u0430 \u0432 \u0420\u043E\u0441\u0441\u0438\u0438. \u041F\u043E \u043C\u043D\u0435\u043D\u0438\u044E \u0430\u0432\u0442\u043E\u0440\u0438\u0442\u0435\u0442\u043D\u044B\u0445 \u043C\u0435\u0434\u0438\u0430\u044D\u043A\u0441\u043F\u0435\u0440\u0442\u043E\u0432, \u043F\u043E \u0446\u0438\u0442\u0438\u0440\u0443\u0435\u043C\u043E\u0441\u0442\u0438 \u0438 \u043F\u043E\u0441\u0435\u0449\u0430\u0435\u043C\u043E\u0441\u0442\u0438 \u0444\u043E\u0440\u043C\u0430\u0442 \u00AB\u041C\u0435\u0434\u0438\u0430\u0437\u043E\u043D\u044B\u00BB \u043E\u043A\u0430\u0437\u0430\u043B\u0441\u044F \u0432\u0435\u0434\u0443\u0449\u0438\u043C \u0444\u043E\u0440\u043C\u0430\u0442\u043E\u043C \u043D\u043E\u0432\u043E\u0441\u0442\u043D\u043E\u0433\u043E \u0438\u0437\u0434\u0430\u043D\u0438\u044F \u0432 \u0420\u043E\u0441\u0441\u0438\u0438 2015 \u0433\u043E\u0434\u0430. \u00AB\u041C\u0435\u0434\u0438\u0430\u0437\u043E\u043D\u0430\u00BB \u043F\u0438\u0448\u0435\u0442 \u043E \u0440\u0435\u0430\u043B\u044C\u043D\u043E \u043F\u0440\u043E\u0438\u0441\u0445\u043E\u0434\u044F\u0449\u0435\u043C \u0432 \u0420\u043E\u0441\u0441\u0438\u0438, \u043F\u0435\u0440\u0432\u043E\u0439 \u0443\u043B\u0430\u0432\u043B\u0438\u0432\u0430\u044F \u0432\u0435\u043A\u0442\u043E\u0440\u044B \u0440\u0430\u0437\u0432\u0438\u0442\u0438\u044F \u043E\u0431\u0449\u0435\u0441\u0442\u0432\u0430.' # noqa + description = 'Общественно-политическое издание, сделавшее акцент на функционировании закона в России. По мнению авторитетных медиаэкспертов, по цитируемости и посещаемости формат «Медиазоны» оказался ведущим форматом новостного издания в России 2015 года. «Медиазона» пишет о реально происходящем в России, первой улавливая векторы развития общества.' # noqa publisher = 'zona.media' category = 'news' - cover_url = u'https://zona.media//s/favicon/mstile-310x310.png' + cover_url = u'https://zona.media/s/share/default_mz.png' language = 'ru' no_stylesheets = False remove_javascript = True - auto_cleanup = False + auto_cleanup = True - oldest_article = 200 + oldest_article = 30 max_articles_per_feed = 100 - feeds = [ - ('\u041d\u043e\u0432\u043e\u0441\u0442\u0438', - 'https://zona.media/rss/news.php'), + remove_tags_before = dict( + name='section', attrs={'class': 'mz-layout-content__row pt0 clearfix'} + ) + + remove_tags_after = dict(name='div', attrs={'class': 'mz-publish__wrapper'}) + + remove_tags = [ + dict(name='div', attrs={'class': 'mz-agent-banner'}), + dict(name='section', attrs={'data-share-id': 'post'}) ] - remove_tags_before = dict(name='header', attrs={ - 'class': 'mz-publish__title'}) - remove_tags_after = dict(name='section', attrs={ - 'class': 'mz-publish__text'}) - remove_tags = [dict(name='div', attrs={'class': 'mz-publish-share__item'}), - dict(name='section', attrs={'class': 'mz-layout-content__col-main'})] + feeds = [ + ( + '\u041C\u0435\u0434\u0438\u0430\u0437\u043E\u043D\u0430 ', + 'https://zona.media/rss' + ), + ( + '\u0411\u0435\u043B\u0430\u0440\u0443\u0441\u044C', + 'https://mediazona.by/rss' + ), + ( + '\u0426\u0435\u043D\u0442\u0440\u0430\u043B\u044C\u043D\u0430\u044F \u0410\u0437\u0438\u044F', + 'https://mediazona.ca/rss' + ), + ] diff --git a/recipes/pravda_ru.recipe b/recipes/pravda_ru.recipe index 21ef1978dc..a92c1b49b3 100644 --- a/recipes/pravda_ru.recipe +++ b/recipes/pravda_ru.recipe @@ -1,49 +1,46 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 + +from __future__ import unicode_literals, division, absolute_import, print_function +from calibre.web.feeds.news import BasicNewsRecipe + __license__ = 'GPL v3' __copyright__ = '2012, Darko Miletic ' ''' www.pravda.ru ''' -from calibre.web.feeds.news import BasicNewsRecipe - class Pravda_ru(BasicNewsRecipe): - title = u'Правда' - __author__ = 'Darko Miletic' - description = u'Правда.Ру: Аналитика и новости' + title = u'\u041F\u0440\u0430\u0432\u0434\u0430' + __author__ = 'Darko Miletic (with fixes by bugmen00t)' + description = 'Правда.Ру: Аналитика и новости' publisher = 'PRAVDA.Ru' category = 'news, politics, Russia' - oldest_article = 2 - max_articles_per_feed = 200 - no_stylesheets = True - encoding = 'utf8' - use_embedded_content = False language = 'ru' - remove_empty_feeds = True publication_type = 'newspaper' - masthead_url = 'http://www.pravda.ru/pix/logo.gif' - extra_css = """ - body{font-family: Arial,sans-serif } - img{margin-bottom: 0.4em; display:block} - """ + cover_url = 'http://www.pravda.ru/pix/logo.gif' + oldest_article = 7 + max_articles_per_feed = 50 + auto_cleanup = True - conversion_options = { - 'comment': description, 'tags': category, 'publisher': publisher, 'language': language - } + remove_tags_before = dict( + name='div', attrs={'class': 'full article full-article'} + ) - remove_attributes = ['lang', 'style'] - keep_only_tags = [dict(name='div', attrs={'id': 'article'})] + remove_tags_after = dict(name='div', attrs={'class': 'authors-block'}) - feeds = [ + remove_tags = [dict(name='div', attrs={'class': 'breadcumbs'})] - (u'Мир', u'http://www.pravda.ru/world/export.xml'), - (u'Религия', u'http://www.pravda.ru/faith/export.xml'), - (u'Общество', u'http://www.pravda.ru/society/export.xml'), - (u'Происшествия', u'http://www.pravda.ru/accidents/export.xml'), - (u'Наука', u'http://www.pravda.ru/science/export.xml'), - (u'Экономика', u'http://www.pravda.ru/economics/export.xml'), - (u'Политика', u'http://www.pravda.ru/politics/export.xml') - ] - - def print_version(self, url): - return url + '?mode=print' + feeds = [( + u'\u041F\u0440\u0430\u0432\u0434\u0430.RU', + 'https://www.pravda.ru/export.xml' + ), + ( + u'\u0421\u0442\u0430\u0442\u044C\u0438', + 'https://www.pravda.ru/export-articles.xml' + ), + ( + u'\u041D\u043E\u0432\u043E\u0441\u0442\u0438', + 'https://www.pravda.ru/export-news.xml' + )] diff --git a/recipes/rbc_ru.recipe b/recipes/rbc_ru.recipe index 177ef9f8c5..271b842060 100644 --- a/recipes/rbc_ru.recipe +++ b/recipes/rbc_ru.recipe @@ -1,16 +1,14 @@ -# -*- coding: utf-8 -*- - from calibre.web.feeds.news import BasicNewsRecipe class RBC_ru(BasicNewsRecipe): title = u'RBC.ru' - __author__ = 'A. Chewi' - description = u'Российское информационное агентство «РосБизнесКонсалтинг» (РБК) - ленты новостей политики, экономики и финансов, аналитические материалы, комментарии и прогнозы, тематические статьи' # noqa + __author__ = 'A. Chewi (with fixes by bugmen00t)' + description = 'Российское информационное агентство «РосБизнесКонсалтинг» (РБК) - ленты новостей политики, экономики и финансов, аналитические материалы, комментарии и прогнозы, тематические статьи' # noqa needs_subscription = False - cover_url = 'http://pics.rbc.ru/img/fp_v4/skin/img/logo.gif' + cover_url = 'https://pics.rbc.ru/img/fp_v4/skin/img/logo.gif' cover_margins = (80, 160, '#ffffff') - oldest_article = 10 + oldest_article = 20 max_articles_per_feed = 50 summary_length = 200 remove_empty_feeds = True @@ -18,44 +16,17 @@ class RBC_ru(BasicNewsRecipe): remove_javascript = True use_embedded_content = False conversion_options = {'linearize_tables': True} + auto_cleanup = True language = 'ru' timefmt = ' [%a, %d %b, %Y]' - feeds = [(u'Главные новости', u'http://static.feed.rbc.ru/rbc/internal/rss.rbc.ru/rbc.ru/mainnews.rss'), - (u'Политика', u'http://static.feed.rbc.ru/rbc/internal/rss.rbc.ru/rbc.ru/politics.rss'), - (u'Экономика', u'http://static.feed.rbc.ru/rbc/internal/rss.rbc.ru/rbc.ru/economics.rss'), - (u'Общество', u'http://static.feed.rbc.ru/rbc/internal/rss.rbc.ru/rbc.ru/society.rss'), - (u'Происшествия', u'http://static.feed.rbc.ru/rbc/internal/rss.rbc.ru/rbc.ru/incidents.rss'), - (u'Финансовые новости Quote.rbc.ru', u'http://static.feed.rbc.ru/rbc/internal/rss.rbc.ru/quote.ru/mainnews.rss')] - - keep_only_tags = [dict(name='h2', attrs={}), - dict(name='div', attrs={'class': 'box _ga1_on_'}), - dict(name='h1', attrs={'class': 'news_section'}), - dict(name='div', attrs={ - 'class': 'news_body dotted_border_bottom'}), - dict(name='table', attrs={'class': 'newsBody'}), - dict(name='h2', attrs={'class': 'black'})] - - remove_tags = [dict(name='div', attrs={'class': "video-frame"}), - dict(name='div', attrs={ - 'class': "photo-container videoContainer videoSWFLinks videoPreviewSlideContainer notes"}), - dict(name='div', attrs={'class': "notes"}), - dict(name='div', attrs={'class': "publinks"}), - dict(name='a', attrs={'class': "print"}), - dict(name='div', attrs={ - 'class': "photo-report_new notes newslider"}), - dict(name='div', attrs={'class': "videoContainer"}), - dict(name='div', attrs={ - 'class': "videoPreviewSlideContainer"}), - dict(name='a', attrs={'class': "videoPreviewContainer"}), - dict(name='a', attrs={'class': "red"}), ] - - def preprocess_html(self, soup): - for alink in soup.findAll('a'): - if alink.string is not None: - tstr = alink.string - alink.replaceWith(tstr) - return soup - - def print_version(self, url): - return url + '?print=true' + feeds = [ + ( + u'RSS \u043D\u043E\u0432\u043E\u0441\u0442\u0438', + u'https://rssexport.rbc.ru/rbcnews/news/30/full.rss' + ), + ( + u'\u0413\u043B\u0430\u0432\u043D\u044B\u0435\u0020\u043D\u043E\u0432\u043E\u0441\u0442\u0438', + u'http://static.feed.rbc.ru/rbc/internal/rss.rbc.ru/rbc.ru/news.rss' + ), + ] diff --git a/recipes/ria_ru.recipe b/recipes/ria_ru.recipe index c6fb8ad3ee..bda155966a 100644 --- a/recipes/ria_ru.recipe +++ b/recipes/ria_ru.recipe @@ -1,7 +1,10 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 + __license__ = 'GPL v3' __copyright__ = '2010, Darko Miletic ' ''' -www.rian.ru +www.ria.ru ''' from calibre.web.feeds.news import BasicNewsRecipe @@ -9,36 +12,31 @@ from calibre.web.feeds.news import BasicNewsRecipe class RIANovosti(BasicNewsRecipe): title = 'RIA Novosti - Russian' - __author__ = 'Darko Miletic' + __author__ = 'Darko Miletic (with fixes by bugmen00t)' description = 'News from Russia' - publisher = 'RIA' + publisher = '\u041C\u0418\u0410 \u00AB\u0420\u043E\u0441\u0441\u0438\u044F \u0441\u0435\u0433\u043E\u0434\u043D\u044F\u00BB\u2028 (MIA Russia Today)' category = 'news, politics, Russia' - oldest_article = 2 + oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False encoding = 'utf8' language = 'ru' publication_type = 'newsportal' - masthead_url = 'http://img.beta.rian.ru/images/22868/43/228684314.jpg' - extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Arial,Helvetica,sans1,sans-serif} ' - remove_tags_before = dict(name='h1') - remove_tags_after = dict(name='div', attrs={'class': 'text'}) - remove_tags = [dict(name=['iframe', 'object', 'link', 'img', 'base'])] + cover_url = 'https://oldimg.ria.ru/i/ria_social.png' - feeds = [ + remove_tags_before = dict(name='div', attrs={'class': 'article__header'}) - (u'Frontpage', u'http://www.rian.ru/export/rss2/lenta/index.xml'), - (u'Politics', u'http://www.rian.ru/export/rss2/politics/index.xml'), - (u'World', u'http://www.rian.ru/export/rss2/world/index.xml'), - (u'Economy', u'http://www.rian.ru/export/rss2/economy/index.xml'), - (u'Society', u'http://www.rian.ru/export/rss2/society/index.xml'), - (u'Moscow', u'http://www.rian.ru/export/rss2/moscow/index.xml'), - (u'Defense', u'http://www.rian.ru/export/rss2/defense_safety/index.xml'), - (u'Science', u'http://www.rian.ru/export/rss2/science/index.xml'), - (u'Turism', u'http://www.rian.ru/export/rss2/tourism/index.xml'), - (u'Culture', u'http://www.rian.ru/export/rss2/culture/index.xml') + remove_tags_after = dict(name='div', attrs={'class': 'article__userbar'}) + + remove_tags = [ + dict(name='div', attrs={'class': 'article__userbar'}), + dict(name='div', attrs={'class': 'article__title'}), + dict(name='div', attrs={'class': 'article__aggr'}), + dict(name='div', attrs={'class': 'article__article-info'}) ] - def print_version(self, url): - return url.replace('.html', '-print.html') + feeds = [( + u'\u041B\u0435\u043D\u0442\u0430 \u043D\u043E\u0432\u043E\u0441\u0442\u0435\u0439', + u'https://ria.ru/export/rss2/archive/index.xml' + )] diff --git a/recipes/the_insider.recipe b/recipes/the_insider.recipe index f2063cd29e..1bab06c122 100644 --- a/recipes/the_insider.recipe +++ b/recipes/the_insider.recipe @@ -1,46 +1,34 @@ -#!/usr/bin/env python +#!/usr/bin/env python2 # vim:fileencoding=utf-8 from __future__ import unicode_literals, division, absolute_import, print_function from calibre.web.feeds.news import BasicNewsRecipe -class AdjectiveSpecies(BasicNewsRecipe): - title = u'The Insider' - cover_url = u'http://theins.ru/wp-content/uploads/2013/10/logo_insider.png' - description = ('\u0420\u0430\u0441\u0441\u043B\u0435\u0434\u043E\u0432\u0430' - '\u043D\u0438\u044F \u0420\u0435\u043F\u043E\u0440\u0442\u0430\u0436\u0438' - ' \u0410\u043D\u0430\u043B\u0438\u0442\u0438\u043A\u0430') - publisher = 'theins.ru' - category = 'news' - language = 'ru' - no_stylesheets = True - __author__ = 'bugmen00t' +class TheInsider(BasicNewsRecipe): + title = 'The Insider' + cover_url = 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_nologo400/10331708/10331708-1604408816914-d03520fb339d5.jpg' # noqa + __author__ = 'bugmen00t' + description = 'Расследования Репортажи Аналитика' + publisher = 'theins.ru' + category = 'news' + language = 'ru' + no_stylesheets = True remove_javascript = True oldest_article = 300 max_articles_per_feed = 100 -# auto_cleanup = True - remove_tags_before = dict(name='div', attrs={'id':'wrapper'}) - remove_tags_after = dict(name='p', attrs={'style':' color: #999999;'}) - remove_tags = [dict(name='div',attrs={'class':'post-share'}), - dict(name='div', attrs={'class':'post-share fixed-likes'}), - dict(name='div', attrs={'class':'topads'}), - dict(name='div', attrs={'class':'pre-content-line'}), - dict(name='div', attrs={'class':'author-opinions'}), - dict(name='div', attrs={'class':'content-banner'}), - dict(name='div', attrs={'id':'sidebar'}) - ] + remove_tags_before = dict(name='div', attrs={'id': 'wrapper'}) + remove_tags_after = dict(name='p', attrs={'style': ' color: #999999;'}) + remove_tags = [ + dict(name='div', attrs={'class': 'post-share'}), + dict(name='div', attrs={'class': 'post-share fixed-likes'}), + dict(name='div', attrs={'class': 'topads'}), + dict(name='div', attrs={'class': 'pre-content-line'}), + dict(name='div', attrs={'class': 'author-opinions'}), + dict(name='div', attrs={'class': 'content-banner'}), + dict(name='div', attrs={'id': 'sidebar'}) + ] - feeds = [ - (u'\u041D\u043E\u0432\u043E\u0441\u0442\u0438', u'http://theins.ru/category/news/feed'), - (u'\u041F\u043E\u043B\u0438\u0442\u0438\u043A\u0430', u'http://theins.ru/category/politika/feed'), - (u'\u0410\u043D\u0442\u0438\u0444\u0435\u0439\u043A', u'http://theins.ru/category/antifake/feed'), - (u'\u041A\u043E\u0440\u0440\u0443\u043F\u0446\u0438\u044F', u'http://theins.ru/category/korrupciya/feed'), - (u'\u0418\u0441\u043F\u043E\u0432\u0435\u0434\u044C', u'http://theins.ru/category/confession/feed'), - (u'\u041E\u0431\u0449\u0435\u0441\u0442\u0432\u043E', u'http://theins.ru/category/obshestvo/feed'), - (u'\u0418\u0441\u0442\u043E\u0440\u0438\u044F', u'http://theins.ru/category/history/feed'), - (u'\u042D\u043A\u043E\u043D\u043E\u043C\u0438\u043A\u0430', u'http://theins.ru/category/ekonomika/feed'), - (u'\u041C\u043D\u0435\u043D\u0438\u044F', u'http://theins.ru/category/opinions/feed'), - (u'\u041F\u0435\u0440\u0435\u0432\u043E\u0434\u044B', u'http://theins.ru/category/perevody/feed'), - (u'\u041B\u043E\u043D\u0433\u0440\u0438\u0434', u'http://theins.ru/category/longread/feed') - ] + feeds = [ + (u'\u041D\u043E\u0432\u043E\u0441\u0442\u0438', u'https://theins.ru/feed') + ] diff --git a/recipes/tjournal.recipe b/recipes/tjournal.recipe index 5e55741c60..d2f570cba8 100644 --- a/recipes/tjournal.recipe +++ b/recipes/tjournal.recipe @@ -1,34 +1,70 @@ +#!/usr/bin/env python # vim:fileencoding=utf-8 + from calibre.web.feeds.news import BasicNewsRecipe -class AdjectiveSpecies(BasicNewsRecipe): +class TJournal(BasicNewsRecipe): title = u'TJournal' - __author__ = 'bug_me_not' - description = 'TJOURNAL: издание о медиа, технологиях и трендах' + __author__ = 'bug_me_not (with fixes by bugmen00t)' + description = 'TJournal: издание о медиа, технологиях и трендах' publisher = 'tjournal.ru' category = 'news' language = 'ru' - no_stylesheets = True + no_stylesheets = False remove_javascript = True - oldest_article = 300 + oldest_article = 30 max_articles_per_feed = 100 + cover_url = 'https://tjournal.ru/static/build/tjournal.ru/images/search_logo.png' + + remove_tags_before = dict(name='div', attrs={'class': 'content-title"'}) - remove_tags_before = dict( - name='div', attrs={'class': 'article grid-block'}) remove_tags_after = dict( - name='div', attrs={'class': 'comments grid-block'}) - remove_tags = [dict(name='div', attrs={'class': 'likes'}), - dict(name='div', attrs={'class': 'adv'}), - dict(name='div', attrs={'class': 'side grid-block'}), - dict(name='div', attrs={'class': 'author'}), - dict(name='span', attrs={'class': 'count-visits'}), - dict(name='a', attrs={'class': 'count-comments'}), - dict(name='div', attrs={ - 'class': 'add-comment not-logined'}), - dict(name='div', attrs={'class': 'newrphus'}), - dict(name='div', attrs={'class': 'line-tags'}), - dict(name='div', attrs={'class': 'line-banner-1'}), - dict(name='div', attrs={'class': 'newrphus'})] + name='div', + attrs={'class': 'content-footer content-footer--full l-island-a'} + ) - feeds = [(u'TJournal: последние новости', u'http://tjournal.ru/rss')] + remove_tags = [ + dict( + name='div', + attrs={'class': 'content-footer content-footer--full l-island-a'} + ), + dict(name='div', attrs={'air-module': 'module.distributionFloating'}), + dict(name='span', attrs={'class': 'content-editorial-tick'}), + dict(name='vue'), + dict(name='div', attrs={'class': 'comments'}), + dict(name='div', attrs={'class': 'propaganda'}), + dict(name='div', attrs={'class': 'propaganda propaganda--with-footer'}), + dict(name='div', attrs={'air-module': 'module.gallery'}), + dict(name='div', attrs={'class': 'content-container'}), + dict( + name='div', + attrs={'class': 'content-header__item content-header-number'} + ), + dict(name='span', attrs={'class': 'views__value'}), + dict(name='span', attrs={'class': 'views__label'}) + ] + + feeds = [( + '\u041F\u043E\u043F\u0443\u043B\u044F\u0440\u043D\u043E\u0435', + 'https://tjournal.ru/rss' + ), ( + '\u041D\u043E\u0432\u043E\u0441\u0442\u0438', 'https://tjournal.ru/rss/news' + ), ('\u0421\u0432\u0435\u0436\u0435\u0435', 'https://tjournal.ru/rss/new'), + ( + '\u0422\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0438\u0438', + 'https://tjournal.ru/rss/tech' + ), + ( + '\u0420\u0430\u0437\u0431\u043E\u0440\u044B', + 'https://tjournal.ru/rss/analysis' + ), + ( + '\u0418\u043D\u0442\u0435\u0440\u043D\u0435\u0442', + 'https://tjournal.ru/rss/internet' + )] + + def preprocess_html(self, soup): + for img in soup.findAll('img', attrs={'data-image-src': True}): + img['src'] = img['data-image-src'] + return soup diff --git a/recipes/trv.recipe b/recipes/trv.recipe index ffbdefbd82..0fadf59f30 100644 --- a/recipes/trv.recipe +++ b/recipes/trv.recipe @@ -1,31 +1,53 @@ -__license__ = 'GPL v3' -__copyright__ = '2010, Vadim Dyadkin dyadkin@lns.pnpi.spb.ru' +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from __future__ import unicode_literals, division, absolute_import, print_function from calibre.web.feeds.news import BasicNewsRecipe -class Trv(BasicNewsRecipe): +class TrvScience(BasicNewsRecipe): - title = u'\u0422\u0440\u043e\u0438\u0446\u043a\u0438\u0439 \u0432\u0430\u0440\u0438\u0430\u043d\u0442' + title = u'!!!\u0422\u0440\u043e\u0438\u0446\u043a\u0438\u0439 \u0432\u0430\u0440\u0438\u0430\u043d\u0442' language = 'ru' - __author__ = 'Vadim Dyadkin' + __author__ = 'Vadim Dyadkin (with fixes by bugmen00t)' oldest_article = 30 max_articles_per_feed = 100 recursion = 4 no_stylesheets = True simultaneous_downloads = 1 + # cover_url = 'https://i0.wp.com/trv-science.ru/uploads/logo_trv2-e1573805568596-1.png' + cover_url = 'https://i0.wp.com/trv-science.ru/uploads/cropped-trv_neur-1024.png' - keep_only_tags = [dict(name='h1'), - dict(name='div', attrs={'id': 'content'}) - ] + remove_tags_before = dict(name='main', attrs={'id': 'main'}) - remove_tags = [dict(name='div', attrs={'class': ['dateright', - 'postmeta', 'adsense-post', 'comments', 'nocomments', 'widgetarea', - 'breadcrumb']}), {'id': ['sidebar', 'l_sidebar', 'r_sidebar', 'footer', - 'homepageright0']}, {'style': 'clear:both;'}, - dict(name='ul'), - dict(name='h2') - ] + remove_tags_after = dict( + name='div', attrs={'class': 'wpdiscuz-comment-pagination'} + ) - feeds = [(u'\u0422\u0440\u043e\u0438\u0446\u043a\u0438\u0439 \u0432\u0430\u0440\u0438\u0430\u043d\u0442', - u'http://trv-science.ru/feed/')] + remove_tags = [ + dict(name='span', attrs={'class': 'fa fa-user'}), + dict(name='h4'), + dict(name='svg'), + dict(name='ul', attrs={'class': 'st-related-posts'}), + dict(name='footer', attrs={'class': 'entry-meta'}), + # dict(name='div', attrs={'id': 'comments'}), + dict(name='div', attrs={'class': 'wpd-vote'}), + dict(name='div', attrs={'class': 'mistape_caption'}), + dict( + name='div', + attrs={'class': 'wpd-comment-share wpd-hidden wpd-tooltip wpd-top'} + ), + dict(name='div', attrs={'class': 'wpd-comment-left '}), + dict(name='div', attrs={'class': 'wpd-space'}), + dict(name='div', attrs={'class': 'wpd-reply-button'}), + dict(name='div', attrs={'class': 'wpd-comment-link wpd-hidden'}), + dict(name='div', attrs={'class': 'wpd-comment-last-edited'}), + dict(name='div', attrs={'class': 'wpd-comment-date'}), + dict(name='div', attrs={'class': 'wpd-comment-info-bar'}), + dict(name='div', attrs={'class': 'wpd-form-wrap'}) + ] + + feeds = [( + u'\u0422\u0440\u043e\u0438\u0446\u043a\u0438\u0439 \u0432\u0430\u0440\u0438\u0430\u043d\u0442', + u'https://trv-science.ru/feed/' + )]