__license__ = 'GPL v3' __copyright__ = '2010-2011, Darko Miletic ' ''' ft.com ''' from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe class FinancialTimes(BasicNewsRecipe): title = u'Financial Times - UK printed edition' __author__ = 'Darko Miletic' description = 'Financial world news' oldest_article = 2 language = 'en_GB' max_articles_per_feed = 250 no_stylesheets = True use_embedded_content = False needs_subscription = True encoding = 'utf8' simultaneous_downloads= 1 delay = 1 LOGIN = 'https://registration.ft.com/registration/barrier/login' INDEX = 'http://www.ft.com/uk-edition' PREFIX = 'http://www.ft.com' def get_browser(self): br = BasicNewsRecipe.get_browser() if self.username is not None and self.password is not None: br.open(self.LOGIN) br.select_form(name='loginForm') br['username'] = self.username br['password'] = self.password br.submit() return br keep_only_tags = [ dict(name='div', attrs={'id':'cont'}) ] remove_tags_after = dict(name='p', attrs={'class':'copyright'}) remove_tags = [ dict(name='div', attrs={'id':'floating-con'}) ,dict(name=['meta','iframe','base','object','embed','link']) ] remove_attributes = ['width','height','lang'] extra_css = """ body{font-family:Arial,Helvetica,sans-serif;} h2{font-size:large;} .ft-story-header{font-size:xx-small;} .ft-story-body{font-size:small;} a{color:#003399;} .container{font-size:x-small;} h3{font-size:x-small;color:#003399;} .copyright{font-size: x-small} """ def get_artlinks(self, elem): articles = [] for item in elem.findAll('a',href=True): url = self.PREFIX + item['href'] title = self.tag_to_string(item) date = strftime(self.timefmt) articles.append({ 'title' :title ,'date' :date ,'url' :url ,'description':'' }) return articles def parse_index(self): feeds = [] soup = self.index_to_soup(self.INDEX) wide = soup.find('div',attrs={'class':'wide'}) if not wide: return feeds strest = wide.findAll('h3', attrs={'class':'section'}) if not strest: return feeds st = wide.find('h4',attrs={'class':'section-no-arrow'}) if st: strest.insert(0,st) for item in strest: ftitle = self.tag_to_string(item) self.report_progress(0, _('Fetching feed')+' %s...'%(ftitle)) feedarts = self.get_artlinks(item.parent.ul) feeds.append((ftitle,feedarts)) return feeds def preprocess_html(self, soup): return self.adeify_images(soup)