diff --git a/resources/recipes/rbc_ru.recipe b/resources/recipes/rbc_ru.recipe new file mode 100644 index 0000000000..4c377a334b --- /dev/null +++ b/resources/recipes/rbc_ru.recipe @@ -0,0 +1,49 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1286819935(BasicNewsRecipe): + title = u'RBC.ru' + __author__ = 'A. Chewi' + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + conversion_options = {'linearize_tables' : True} + remove_attributes = ['style'] + language = 'ru' + timefmt = ' [%a, %d %b, %Y]' + + keep_only_tags = [dict(name='h2', attrs={}), + dict(name='div', attrs={'class': 'box _ga1_on_'}), + dict(name='h1', attrs={'class': 'news_section'}), + dict(name='div', attrs={'class': 'news_body dotted_border_bottom'}), + dict(name='table', attrs={'class': 'newsBody'}), + dict(name='h2', attrs={'class': 'black'})] + + feeds = [(u'Главные новости', u'http://static.feed.rbc.ru/rbc/internal/rss.rbc.ru/rbc.ru/mainnews.rss'), + (u'Политика', u'http://static.feed.rbc.ru/rbc/internal/rss.rbc.ru/rbc.ru/politics.rss'), + (u'Экономика', u'http://static.feed.rbc.ru/rbc/internal/rss.rbc.ru/rbc.ru/economics.rss'), + (u'Общество', u'http://static.feed.rbc.ru/rbc/internal/rss.rbc.ru/rbc.ru/society.rss'), + (u'Происшествия', u'http://static.feed.rbc.ru/rbc/internal/rss.rbc.ru/rbc.ru/incidents.rss'), + (u'Финансовые новости Quote.rbc.ru', u'http://static.feed.rbc.ru/rbc/internal/rss.rbc.ru/quote.ru/mainnews.rss')] + + + remove_tags = [dict(name='div', attrs={'class': "video-frame"}), + dict(name='div', attrs={'class': "photo-container videoContainer videoSWFLinks videoPreviewSlideContainer notes"}), + dict(name='div', attrs={'class': "notes"}), + dict(name='div', attrs={'class': "publinks"}), + dict(name='a', attrs={'class': "print"}), + dict(name='div', attrs={'class': "photo-report_new notes newslider"}), + dict(name='div', attrs={'class': "videoContainer"}), + dict(name='div', attrs={'class': "videoPreviewSlideContainer"}), + dict(name='a', attrs={'class': "videoPreviewContainer"}), + dict(name='a', attrs={'class': "red"}),] + + def preprocess_html(self, soup): + for alink in soup.findAll('a'): + if alink.string is not None: + tstr = alink.string + alink.replaceWith(tstr) + return soup + + def print_version(self, url): + return url + '?print=true'