RBC.ru by Chewi

This commit is contained in:
Kovid Goyal 2011-03-02 09:35:36 -07:00
parent 95da860ee2
commit 36fba3beac

View File

@ -0,0 +1,49 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1286819935(BasicNewsRecipe):
title = u'RBC.ru'
__author__ = 'A. Chewi'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
conversion_options = {'linearize_tables' : True}
remove_attributes = ['style']
language = 'ru'
timefmt = ' [%a, %d %b, %Y]'
keep_only_tags = [dict(name='h2', attrs={}),
dict(name='div', attrs={'class': 'box _ga1_on_'}),
dict(name='h1', attrs={'class': 'news_section'}),
dict(name='div', attrs={'class': 'news_body dotted_border_bottom'}),
dict(name='table', attrs={'class': 'newsBody'}),
dict(name='h2', attrs={'class': 'black'})]
feeds = [(u'Главные новости', u'http://static.feed.rbc.ru/rbc/internal/rss.rbc.ru/rbc.ru/mainnews.rss'),
(u'Политика', u'http://static.feed.rbc.ru/rbc/internal/rss.rbc.ru/rbc.ru/politics.rss'),
(u'Экономика', u'http://static.feed.rbc.ru/rbc/internal/rss.rbc.ru/rbc.ru/economics.rss'),
(u'Общество', u'http://static.feed.rbc.ru/rbc/internal/rss.rbc.ru/rbc.ru/society.rss'),
(u'Происшествия', u'http://static.feed.rbc.ru/rbc/internal/rss.rbc.ru/rbc.ru/incidents.rss'),
(u'Финансовые новости Quote.rbc.ru', u'http://static.feed.rbc.ru/rbc/internal/rss.rbc.ru/quote.ru/mainnews.rss')]
remove_tags = [dict(name='div', attrs={'class': "video-frame"}),
dict(name='div', attrs={'class': "photo-container videoContainer videoSWFLinks videoPreviewSlideContainer notes"}),
dict(name='div', attrs={'class': "notes"}),
dict(name='div', attrs={'class': "publinks"}),
dict(name='a', attrs={'class': "print"}),
dict(name='div', attrs={'class': "photo-report_new notes newslider"}),
dict(name='div', attrs={'class': "videoContainer"}),
dict(name='div', attrs={'class': "videoPreviewSlideContainer"}),
dict(name='a', attrs={'class': "videoPreviewContainer"}),
dict(name='a', attrs={'class': "red"}),]
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup
def print_version(self, url):
return url + '?print=true'