diff --git a/recipes/echo_moskvy.recipe b/recipes/echo_moskvy.recipe index 5b208aa289..866fb95d90 100644 --- a/recipes/echo_moskvy.recipe +++ b/recipes/echo_moskvy.recipe @@ -1,26 +1,34 @@ -# vim:fileencoding=utf-8 +from __future__ import unicode_literals, division, absolute_import, print_function from calibre.web.feeds.news import BasicNewsRecipe +class EchoMsk(BasicNewsRecipe): + title = '\u042D\u0425\u041E' + __author__ = 'bugmen00t' + description = ('\u042D\u0425\u041E - \u043A\u0430\u043A \u043D\u0430 \u0441\u0442\u0430\u0440\u043E\u043C' + ' \u0434\u043E\u0431\u0440\u043E\u043C \u0440\u0430\u0434\u0438\u043E') + publisher = 'Radio Echo GmbH' + category = 'news' + cover_url = u'https://echofm.online/logo.png' + language = 'ru' + no_stylesheets = True + remove_javascript = False + auto_cleanup = False + oldest_article = 7 + max_articles_per_feed = 50 -class AdjectiveSpecies(BasicNewsRecipe): - title = u'Эхо Москвы' - __author__ = 'bug_me_not' - cover_url = u'http://echo.msk.ru/i/logo.png' - description = 'Радиостанция Эхо Москвы' - publisher = 'Эхо Москвы' - category = 'news' - language = 'ru' - no_stylesheets = True - remove_javascript = True - oldest_article = 300 - max_articles_per_feed = 100 + remove_tags_before = dict(name='article') - remove_tags_before = dict(name='div', attrs={'class': 'topic'}) - remove_tags_after = dict(name='div', attrs={'class': 'typical'}) - remove_tags = [dict(name='div', attrs={'class': 'addInNetBlock'}), - dict(name='div', attrs={'class': 'flash'})] + remove_tags_after = dict(name='article') + + remove_tags = [ + dict(name='span', attrs={'class': 'sc-7b4cbb79-0 guzUFC'}), + dict(name='div', attrs={'class': 'sc-f94c4ef5-0 frGiYu'}), + dict(name='div', attrs={'class': 'sc-f94c4ef5-0 frGiYu'}) + ] feeds = [ - (u'Интервью и передачи', u'http://echo.msk.ru/interview/rss-fulltext.xml'), - (u'Блоги', u'http://echo.msk.ru/blog/rss.xml') + ('\u0413\u043B\u0430\u0432\u043D\u043E\u0435', 'https://echofm.online/feed'), + ('\u041D\u043E\u0432\u043E\u0441\u0442\u0438', 'https://echofm.online/news/feed'), + ('\u041C\u043D\u0435\u043D\u0438\u044F', 'https://echofm.online/opinions/feed'), + ('\u0414\u043E\u043A\u0443\u043C\u0435\u043D\u0442\u044B', 'https://echofm.online/documents/feed') ] diff --git a/recipes/footballua.recipe b/recipes/footballua.recipe new file mode 100644 index 0000000000..ea6ff53dd8 --- /dev/null +++ b/recipes/footballua.recipe @@ -0,0 +1,34 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 + +from calibre.web.feeds.news import BasicNewsRecipe + +class FootballUA(BasicNewsRecipe): + title = 'Football.UA' + __author__ = 'bugmen00t' + description = ('\u0421\u043F\u043E\u0440\u0442\u0438\u0432\u043D\u0438\u0439 \u043F\u043E\u0440\u0442\u0430\u043B' + ' \u0432 \u0423\u043A\u0440\u0430\u0457\u043D\u0456,' + ' \u043F\u0440\u0438\u0441\u0432\u044F\u0447\u0435\u043D\u0438\u0439 \u043B\u0438\u0448\u0435 \u0444\u0443\u0442\u0431\u043E\u043B\u0443.') + publisher = 'United Media Holding group' + category = 'news' + cover_url = u'https://s.ill.in.ua/i/news/570x380/212/212438.jpg' + language = 'uk' + no_stylesheets = False + remove_javascript = False + auto_cleanup = False + remove_empty_feeds = True + oldest_article = 3 + max_articles_per_feed = 20 + + remove_tags_before = dict(name='article') + + remove_tags_after = dict(name='article') + + remove_tags = [ + dict(name='div', attrs={'class': 'bottom-info'}), + dict(name='div', attrs={'class': 'social-buttons'}) + ] + + feeds = [ + ('\u041D\u043E\u0432\u0438\u043D\u0438', 'https://football.ua/rss2.ashx'), + ] diff --git a/recipes/icons/echo_moskvy.png b/recipes/icons/echo_moskvy.png index b68b530954..ab8d7bfb59 100644 Binary files a/recipes/icons/echo_moskvy.png and b/recipes/icons/echo_moskvy.png differ diff --git a/recipes/icons/fooballua.png b/recipes/icons/fooballua.png new file mode 100644 index 0000000000..c9bdaf260c Binary files /dev/null and b/recipes/icons/fooballua.png differ diff --git a/recipes/icons/prosleduet.png b/recipes/icons/prosleduet.png new file mode 100644 index 0000000000..918203f752 Binary files /dev/null and b/recipes/icons/prosleduet.png differ diff --git a/recipes/icons/ua_fooball.png b/recipes/icons/ua_fooball.png new file mode 100644 index 0000000000..4e8b806c97 Binary files /dev/null and b/recipes/icons/ua_fooball.png differ diff --git a/recipes/icons/unian_net_en.png b/recipes/icons/unian_net_en.png new file mode 100644 index 0000000000..0d550ba7a1 Binary files /dev/null and b/recipes/icons/unian_net_en.png differ diff --git a/recipes/icons/unian_net_ua.png b/recipes/icons/unian_net_ua.png new file mode 100644 index 0000000000..0d550ba7a1 Binary files /dev/null and b/recipes/icons/unian_net_ua.png differ diff --git a/recipes/prosleduet.recipe b/recipes/prosleduet.recipe new file mode 100644 index 0000000000..755cf43956 --- /dev/null +++ b/recipes/prosleduet.recipe @@ -0,0 +1,39 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 + +from __future__ import unicode_literals, division, absolute_import, print_function +from calibre.web.feeds.news import BasicNewsRecipe + +class ProSleduet(BasicNewsRecipe): + title = '\u041F\u0440\u043E\u0434\u043E\u043B\u0436\u0435\u043D\u0438\u0435 \u0441\u043B\u0435\u0434\u0443\u0435\u0442' + __author__ = 'bugmen00t' + description = ('\u0414\u0438\u0434\u0436\u0438\u0442\u0430\u043B-\u043F\u0440\u043E\u0435\u043A\u0442' + ' \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u043E\u0432' + ' \u00AB\u041D\u043E\u0432\u043E\u0439 \u0433\u0430\u0437\u0435\u0442\u044B\u00BB') + publisher = 'Pavel Kanygin, Natalia Zhdanova' + category = 'news' + cover_url = u'https://prosleduet.media/wp-content/themes/prosle/assets/img/logo.svg' + language = 'ru' + no_stylesheets = True + remove_javascript = False + auto_cleanup = False + oldest_article = 7 + max_articles_per_feed = 20 + + remove_tags_before = dict(name='div', attrs={'class': 'container'}) + + remove_tags_after = dict(name='div', attrs={'class': 'container'}) + + remove_tags = [ + dict(name='div', attrs={'class': 'ya-share2 ya-share2_inited'}) + ] + + feeds = [ +# ('\u041F\u0440\u043E\u0434\u043E\u043B\u0436\u0435\u043D\u0438\u0435 \u0441\u043B\u0435\u0434\u0443\u0435\u0442', 'https://prosleduet.media/feed/'), + ('\u041D\u043E\u0432\u043E\u0441\u0442\u0438', 'https://prosleduet.media/category/news/feed/'), + ('\u041B\u044E\u0434\u0438', 'https://prosleduet.media/category/people/feed/'), + ('\u0421\u044E\u0436\u0435\u0442\u044B', 'https://prosleduet.media/category/syuzhety/feed/'), + ('\u041F\u043E\u0434\u043A\u0430\u0441\u0442\u044B', 'https://prosleduet.media/category/podcasts/feed/'), + ('\u0420\u0430\u0437\u0431\u043E\u0440\u044B', 'https://prosleduet.media/category/details/feed/'), + ('\u0413\u043B\u0443\u0431\u0438\u043D\u043D\u0430\u044F \u0420\u043E\u0441\u0441\u0438\u044F', 'https://prosleduet.media/category/glubinnaya-rossiya/feed/') + ] diff --git a/recipes/ua_fooball.recipe b/recipes/ua_fooball.recipe new file mode 100644 index 0000000000..c2efde73d8 --- /dev/null +++ b/recipes/ua_fooball.recipe @@ -0,0 +1,52 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 + +from calibre.web.feeds.news import BasicNewsRecipe + +class UAFootball(BasicNewsRecipe): + +#Russian version +# title = 'UA-\u0424\u0443\u0442\u0431\u043E\u043B' +# language = 'ru_UK' +# feeds = [ +# ('\u041D\u043E\u0432\u043E\u0441\u0442\u0438 \u0444\u0443\u0442\u0431\u043E\u043B\u0430', 'https://www.ua-football.com/rss/all.xml') +# ] + +#Ukrainian version + title = 'UA-\u0424\u0443\u0442\u0431\u043E\u043B' + description = ('\u0410\u043A\u0442\u0443\u0430\u043B\u044C\u043D\u0456 \u0442\u0435\u043C\u0438' + ' \u0444\u0443\u0442\u0431\u043E\u043B\u044C\u043D\u043E\u0433\u043E' + ' \u0436\u0438\u0442\u0442\u044F \u0423\u043A\u0440\u0430\u0457\u043D\u0438 \u0442\u0430' + ' \u0432\u0441\u044C\u043E\u0433\u043E \u0441\u0432\u0456\u0442\u0443.') + language = 'uk' + feeds = [ + ('\u041D\u043E\u0432\u0438\u043D\u0438', 'https://www.ua-football.com/ua/rss/all.xml') + ] + + __author__ = 'bugmen00t' + publisher = '1766 TEAM EOOD' + category = 'news' + cover_url = u'https://yt3.googleusercontent.com/11FSvKeWcjFhzKrO7nXZdc-I__UeZ0mhZwbwyOHtnx_1-q6d0zQ2LbOt2duNCY06JVg2cGXS-g=s900-c-k-c0x00ffffff-no-rj' + no_stylesheets = False + remove_javascript = False + auto_cleanup = False + remove_empty_feeds = True + oldest_article = 7 + max_articles_per_feed = 200 + + remove_tags_before = dict(name='h1') + + remove_tags_after = dict(name='div', attrs={'class': 'show-post'}) + + remove_tags = [ + dict(name='form'), + dict(name='iframe'), + dict(name='div', attrs={'class': 'language'}), + dict(name='div', attrs={'class': 'article__read-also'}), + dict(name='div', attrs={'class': 'card-player'}), + dict(name='div', attrs={'class': 'show-post-socials'}) + ] + +# Replacing articles in Ukraininan for RU-feed +# def print_version(self, url): +# return url.replace('ua-football.com/ua/', 'ua-football.com/') diff --git a/recipes/unian_net.recipe b/recipes/unian_net.recipe index c9333de626..61515412b8 100644 --- a/recipes/unian_net.recipe +++ b/recipes/unian_net.recipe @@ -11,7 +11,7 @@ class Unian(BasicNewsRecipe): publication_type = 'newspaper' oldest_article = 7 max_articles_per_feed = 100 - language = 'ru' + language = 'ru_UK' cover_url = 'https://www.unian.net/images/unian-512x512.png' auto_cleanup = False no_stylesheets = True @@ -21,9 +21,15 @@ class Unian(BasicNewsRecipe): remove_tags = [ dict(name='span', attrs={'class': 'article__info-item comments'}), dict(name='span', attrs={'class': 'article__info-item views'}), - dict(name='div', attrs={'class': 'read-also-slider'}) + dict(name='div', attrs={'class': 'read-also-slider'}), + dict(name='div', attrs={'class': 'nts-video-wrapper'}) ] feeds = [ (u'\u0423\u041D\u0418\u0410\u041D', u'https://rss.unian.net/site/news_rus.rss') ] + + def preprocess_html(self, soup): + for img in soup.findAll('img', attrs={'data-src': True}): + img['src'] = img['data-src'] + return soup diff --git a/recipes/unian_net_en.recipe b/recipes/unian_net_en.recipe new file mode 100644 index 0000000000..a15b32f5ef --- /dev/null +++ b/recipes/unian_net_en.recipe @@ -0,0 +1,36 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 + +from calibre.web.feeds.news import BasicNewsRecipe + +class Unian(BasicNewsRecipe): + title = 'UNIAN' + description = ('UNIAN (Ukrainian Independent News Agency of News) is the largest independent news agency,' + ' first in Ukraine, founded in 1993, remaining the leader among the country\'s news media,' + ' being the most cited source of news from across Ukraine.') + __author__ = 'bugmen00t' + publication_type = 'newspaper' + oldest_article = 30 + max_articles_per_feed = 100 + language = 'en_UK' + cover_url = 'https://www.unian.info/images/unian-512x512.png' + auto_cleanup = False + no_stylesheets = True + + remove_tags_before = dict(name='h1') + remove_tags_after = dict(name='div', attrs={'class': 'article-text'}) + remove_tags = [ + dict(name='span', attrs={'class': 'article__info-item comments'}), + dict(name='span', attrs={'class': 'article__info-item views'}), + dict(name='div', attrs={'class': 'read-also-slider'}), + dict(name='div', attrs={'class': 'nts-video-wrapper'}) + ] + + feeds = [ + (u'News Agency UNIAN', u'https://rss.unian.net/site/news_eng.rss') + ] + + def preprocess_html(self, soup): + for img in soup.findAll('img', attrs={'data-src': True}): + img['src'] = img['data-src'] + return soup diff --git a/recipes/unian_net_ua.recipe b/recipes/unian_net_ua.recipe new file mode 100644 index 0000000000..7aed261e26 --- /dev/null +++ b/recipes/unian_net_ua.recipe @@ -0,0 +1,46 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 + +from calibre.web.feeds.news import BasicNewsRecipe + + +class Unian(BasicNewsRecipe): + title = '\u0423\u041D\u0406\u0410\u041D' + description = ( + '\u0423\u041D\u0406\u0410\u041D (\u0423\u043A\u0440\u0430\u0457\u043D\u0441\u044C\u043A\u0435' + ' \u041D\u0435\u0437\u0430\u043B\u0435\u0436\u043D\u0435 \u0406\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0456\u0439\u043D\u0435' + ' \u0410\u0433\u0435\u043D\u0442\u0441\u0442\u0432\u043E \u041D\u043E\u0432\u0438\u043D) -' + ' \u043F\u0435\u0440\u0448\u0435 \u0432 \u0423\u043A\u0440\u0430\u0457\u043D\u0456 \u0442\u0430' + ' \u043D\u0430\u0439\u0431\u0456\u043B\u044C\u0448\u0435 \u043D\u0435\u0437\u0430\u043B\u0435\u0436\u043D\u0435' + ' \u0456\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0456\u0439\u043D\u0435 \u0430\u0433\u0435\u043D\u0442\u0441\u0442\u0432\u043E,' + ' \u0437\u0430\u0441\u043D\u043E\u0432\u0430\u043D\u0435 1993 \u0440\u043E\u043A\u0443, \u043B\u0456\u0434\u0435\u0440' + ' \u0441\u0435\u0440\u0435\u0434 \u043D\u043E\u0432\u0438\u043D\u043D\u0438\u0445 \u043C\u0435\u0434\u0456\u0430' + ' \u043A\u0440\u0430\u0457\u043D\u0438, \u043D\u0430\u0439\u0431\u0456\u043B\u044C\u0448' + ' \u0446\u0438\u0442\u043E\u0432\u0430\u043D\u0435 \u0434\u0436\u0435\u0440\u0435\u043B\u043E' + ' \u043D\u043E\u0432\u0438\u043D \u043F\u0440\u043E \u043F\u043E\u0434\u0456\u0457 \u0432 \u043A\u0440\u0430\u0457\u043D\u0456.') + __author__ = 'bugmen00t' + publication_type = 'newspaper' + oldest_article = 7 + max_articles_per_feed = 100 + language = 'uk' + cover_url = 'https://www.unian.ua/images/unian-512x512.png' + auto_cleanup = False + no_stylesheets = True + + remove_tags_before = dict(name='h1') + remove_tags_after = dict(name='div', attrs={'class': 'article-text'}) + remove_tags = [ + dict(name='span', attrs={'class': 'article__info-item comments'}), + dict(name='span', attrs={'class': 'article__info-item views'}), + dict(name='div', attrs={'class': 'read-also-slider'}), + dict(name='div', attrs={'class': 'nts-video-wrapper'}) + ] + + feeds = [ + (u'\u0423\u041D\u0406\u0410\u041D', u'https://rss.unian.net/site/news_ukr.rss') + ] + + def preprocess_html(self, soup): + for img in soup.findAll('img', attrs={'data-src': True}): + img['src'] = img['data-src'] + return soup diff --git a/src/calibre/gui2/store/stores/virtualo_plugin.py b/src/calibre/gui2/store/stores/virtualo_plugin.py index 0229a6c94f..d5da180538 100644 --- a/src/calibre/gui2/store/stores/virtualo_plugin.py +++ b/src/calibre/gui2/store/stores/virtualo_plugin.py @@ -74,7 +74,8 @@ class VirtualoStore(BasicStoreConfig, StorePlugin): if not id: continue - price = ''.join(data.xpath('.//div[@class="info"]//div[@class="price"]/div/text()|.//div[@class="info"]//div[@class="price price--no-promo"]/div/text()')) + price = ''.join(data.xpath( + './/div[@class="info"]//div[@class="price"]/div/text()|.//div[@class="info"]//div[@class="price price--no-promo"]/div/text()')) cover_url = ''.join(data.xpath('.//img[@class="cover"]/@src')) title = ''.join(data.xpath('.//h3[@class="title"]/a//text()')) author = ', '.join(data.xpath('.//div[@class="info"]//div[@class="authors"]/a//text()'))