__license__ = 'GPL v3' __copyright__ = '2011, Darko Miletic ' ''' rt.com ''' from calibre.web.feeds.news import BasicNewsRecipe class RT_eng(BasicNewsRecipe): title = 'RT in English' __author__ = 'Darko Miletic' description = 'RT is the first Russian 24/7 English-language news channel which brings the Russian view on global news.' publisher = 'Autonomous Nonprofit Organization "TV-Novosti"' category = 'news, politics, economy, finances, Russia, world' oldest_article = 2 no_stylesheets = True encoding = 'utf8' masthead_url = 'http://rt.com/s/css/img/printlogo.gif' use_embedded_content = False remove_empty_feeds = True language = 'en_RU' publication_type = 'newsportal' extra_css = """ body{font-family: Arial,Helvetica,sans-serif} h1{font-family: Georgia,"Times New Roman",Times,serif} .grey{color: gray} .fs12{font-size: small} """ conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language } keep_only_tags = [dict(name='div', attrs={'class': 'all'})] remove_tags = [ dict(name=['object', 'link', 'embed', 'iframe', 'meta', 'link']), dict( attrs={'class': 'crumbs oh'}) ] remove_attributes = ['clear'] feeds = [ (u'Politics', u'http://rt.com/politics/rss/'), (u'USA', u'http://rt.com/usa/news/rss/'), (u'Business', u'http://rt.com/business/news/rss/'), (u'Sport', u'http://rt.com/sport/rss/'), (u'Art&Culture', u'http://rt.com/art-and-culture/news/rss/') ] def print_version(self, url): return url + 'print/' def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] for item in soup.findAll('a'): str = item.string if str is None: str = self.tag_to_string(item) item.replaceWith(str) return soup