#!/usr/bin/env python # -*- coding: utf-8 -*- __license__ = 'GPL v3' __copyright__ = '2009, Darko Miletic ' ''' politico.com ''' from calibre.web.feeds.news import BasicNewsRecipe class Politico(BasicNewsRecipe): title = 'Politico' __author__ = 'Darko Miletic and Sujata Raman' description = 'Political news from USA' publisher = 'Capitol News Company, LLC' category = 'news, politics, USA' oldest_article = 7 max_articles_per_feed = 100 use_embedded_content = False no_stylesheets = True remove_javascript = True encoding = 'UTF-8' language = 'en' html2lrf_options = [ '--comment', description, '--category', category, '--publisher', publisher, '--ignore-tables' ] html2epub_options = 'publisher="' + publisher + '"\ncomments="' + \ description + '"\ntags="' + category + '"\nlinearize_tables=True' keep_only_tags = [ dict(name=['article']), ] remove_tags = [ dict(name=['notags', 'embed', 'aside', 'object', 'link', 'img', 'figure']), dict( attrs={'class': lambda x: x and 'story-tools' in x.split()}), dict( attrs={'class': lambda x: x and 'story-continued' in x.split()}), dict( attrs={'class': lambda x: x and 'story-supplement' in x.split()}), dict( attrs={'class': lambda x: x and 'story-share' in x.split()}), dict( attrs={'class': lambda x: x and 'suggested' in x.split()}), ] extra_css = ''' body{font-family:Arial,Sans-serif;} element.style{color:#FF0000;font-family:Arial,Sans-serif;} .author{color:#808080;font-size:x-small;} a{ color:#003399;} .byline{color:#696969 ; font-size:x-small;} .story{color:#000000;} td{color:#000000;} ''' feeds = [ (u'Top Stories', u'http://www.politico.com/rss/politicopicks.xml'), (u'Congress', u'http://www.politico.com/rss/congress.xml'), (u'Ideas', u'http://www.politico.com/rss/ideas.xml'), (u'Life', u'http://www.politico.com/rss/life.xml'), (u'Lobbyists', u'http://www.politico.com/rss/lobbyists.xml'), (u'Pitboss', u'http://www.politico.com/rss/pitboss.xml'), (u'Politics', u'http://www.politico.com/rss/politics.xml'), (u'Roger Simon', u'http://www.politico.com/rss/rogersimon.xml'), (u'Suite Talk', u'http://www.politico.com/rss/suitetalk.xml'), (u'Playbook', u'http://www.politico.com/rss/playbook.xml') ] def preprocess_html(self, soup): mtag = '' soup.head.insert(0, mtag) for item in soup.findAll(style=True): del item['style'] return soup def postprocess_html(self, soup, first): for tag in soup.findAll(name=['table', 'tr', 'td']): tag.name = 'div' return soup