Update Financial Times UK

This commit is contained in:
Kovid Goyal 2013-04-04 21:36:02 +05:30
parent d8aebe1c28
commit 78320d754f

View File

@ -8,6 +8,7 @@ import datetime
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile
from calibre import strftime from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from collections import OrderedDict
class FinancialTimes(BasicNewsRecipe): class FinancialTimes(BasicNewsRecipe):
title = 'Financial Times (UK)' title = 'Financial Times (UK)'
@ -105,29 +106,28 @@ class FinancialTimes(BasicNewsRecipe):
return articles return articles
def parse_index(self): def parse_index(self):
feeds = [] feeds = OrderedDict()
soup = self.index_to_soup(self.INDEX) soup = self.index_to_soup(self.INDEX)
dates= self.tag_to_string(soup.find('div', attrs={'class':'btm-links'}).find('div')) #dates= self.tag_to_string(soup.find('div', attrs={'class':'btm-links'}).find('div'))
self.timefmt = ' [%s]'%dates #self.timefmt = ' [%s]'%dates
wide = soup.find('div',attrs={'class':'wide'})
if not wide: for column in soup.findAll('div', attrs = {'class':'feedBoxes clearfix'}):
return feeds for section in column. findAll('div', attrs = {'class':'feedBox'}):
allsections = wide.findAll(attrs={'class':lambda x: x and 'footwell' in x.split()}) section_title=self.tag_to_string(section.find('h4'))
if not allsections: for article in section.ul.findAll('li'):
return feeds articles = []
count = 0 title=self.tag_to_string(article.a)
for item in allsections: url=article.a['href']
count = count + 1 articles.append({'title':title, 'url':url, 'description':'', 'date':''})
if self.test and count > 2:
return feeds if articles:
fitem = item.h3 if section_title not in feeds:
if not fitem: feeds[section_title] = []
fitem = item.h4 feeds[section_title] += articles
ftitle = self.tag_to_string(fitem)
self.report_progress(0, _('Fetching feed')+' %s...'%(ftitle))
feedarts = self.get_artlinks(item.ul) ans = [(key, val) for key, val in feeds.iteritems()]
feeds.append((ftitle,feedarts)) return ans
return feeds
def preprocess_html(self, soup): def preprocess_html(self, soup):
items = ['promo-box','promo-title', items = ['promo-box','promo-title',
@ -177,6 +177,3 @@ class FinancialTimes(BasicNewsRecipe):
tfile.close() tfile.close()
self.temp_files.append(tfile) self.temp_files.append(tfile)
return tfile.name return tfile.name
def cleanup(self):
self.browser.open('https://registration.ft.com/registration/login/logout?location=')