calibre/recipes/financial_times.recipe
2011-04-24 08:59:36 -06:00

66 lines
2.3 KiB
Python

#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
'''
ft.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class FinancialTimes(BasicNewsRecipe):
title = u'Financial Times'
__author__ = 'Darko Miletic and Sujata Raman'
description = ('Financial world news. Available after 5AM '
'GMT, daily.')
oldest_article = 2
language = 'en'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
needs_subscription = True
simultaneous_downloads= 1
delay = 1
LOGIN = 'https://registration.ft.com/registration/barrier/login'
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open(self.LOGIN)
br.select_form(name='loginForm')
br['username'] = self.username
br['password'] = self.password
br.submit()
return br
keep_only_tags = [ dict(name='div', attrs={'id':'cont'}) ]
remove_tags_after = dict(name='p', attrs={'class':'copyright'})
remove_tags = [
dict(name='div', attrs={'id':'floating-con'})
]
extra_css = '''
body{font-family:Arial,Helvetica,sans-serif;}
h2(font-size:large;}
.ft-story-header(font-size:xx-small;}
.ft-story-body(font-size:small;}
a{color:#003399;}
.container{font-size:x-small;}
h3{font-size:x-small;color:#003399;}
'''
feeds = [
(u'UK' , u'http://www.ft.com/rss/home/uk' )
,(u'US' , u'http://www.ft.com/rss/home/us' )
,(u'Europe' , u'http://www.ft.com/rss/home/europe' )
,(u'Asia' , u'http://www.ft.com/rss/home/asia' )
,(u'Middle East', u'http://www.ft.com/rss/home/middleeast')
]
def preprocess_html(self, soup):
content_type = soup.find('meta', {'http-equiv':'Content-Type'})
if content_type:
content_type['content'] = 'text/html; charset=utf-8'
return soup