#!/usr/bin/env python2 # -*- coding: cp1252 -*- __license__ = 'GPL v3' __copyright__ = '2009, Darko Miletic ' ''' politico.com ''' import re, traceback from calibre.web.feeds.news import BasicNewsRecipe class Politico(BasicNewsRecipe): title = 'Politico' __author__ = 'Darko Miletic and Sujata Raman' description = 'Political news from USA' publisher = 'Capitol News Company, LLC' category = 'news, politics, USA' oldest_article = 7 max_articles_per_feed = 100 use_embedded_content = False no_stylesheets = True remove_javascript = True encoding = 'UTF-8' language = 'en' html2lrf_options = [ '--comment', description , '--category', category , '--publisher', publisher , '--ignore-tables' ] html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' remove_tags = [ dict(name=['notags','embed','object','link','img']), ] extra_css = ''' body{font-family:Arial,Sans-serif;} element.style{color:#FF0000;font-family:Arial,Sans-serif;} .author{color:#808080;font-size:x-small;} a{ color:#003399;} .byline{color:#696969 ; font-size:x-small;} .story{color:#000000;} td{color:#000000;} ''' feeds = [ (u'Top Stories' , u'http://www.politico.com/rss/politicopicks.xml' ) ,(u'Congress' , u'http://www.politico.com/rss/congress.xml' ) ,(u'Ideas' , u'http://www.politico.com/rss/ideas.xml' ) ,(u'Life' , u'http://www.politico.com/rss/life.xml' ) ,(u'Lobbyists' , u'http://www.politico.com/rss/lobbyists.xml' ) ,(u'Pitboss' , u'http://www.politico.com/rss/pitboss.xml' ) ,(u'Politics' , u'http://www.politico.com/rss/politics.xml' ) ,(u'Roger Simon' , u'http://www.politico.com/rss/rogersimon.xml' ) ,(u'Suite Talk' , u'http://www.politico.com/rss/suitetalk.xml' ) ,(u'Playbook' , u'http://www.politico.com/rss/playbook.xml' ) #(u'The Huddle' , u'http://www.politico.com/rss/huddle.xml' ) ] def preprocess_html(self, soup): mtag = '' soup.head.insert(0,mtag) for item in soup.findAll(style=True): del item['style'] return soup url_pat = re.compile(r'