Fix #1169590 (Updated recipe for Financial Times, UK and US edition)

This commit is contained in:
Kovid Goyal 2013-04-20 09:35:49 +05:30
parent a90b9106ad
commit a9e3e679e2
2 changed files with 34 additions and 35 deletions

View File

@ -1,7 +1,7 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010-2012, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2010-2013, Darko Miletic <darko.miletic at gmail.com>'
''' '''
www.ft.com/uk-edition www.ft.com/intl/uk-edition
''' '''
import datetime import datetime
@ -29,7 +29,7 @@ class FinancialTimes(BasicNewsRecipe):
masthead_url = 'http://im.media.ft.com/m/img/masthead_main.jpg' masthead_url = 'http://im.media.ft.com/m/img/masthead_main.jpg'
LOGIN = 'https://registration.ft.com/registration/barrier/login' LOGIN = 'https://registration.ft.com/registration/barrier/login'
LOGIN2 = 'http://media.ft.com/h/subs3.html' LOGIN2 = 'http://media.ft.com/h/subs3.html'
INDEX = 'http://www.ft.com/uk-edition' INDEX = 'http://www.ft.com/intl/uk-edition'
PREFIX = 'http://www.ft.com' PREFIX = 'http://www.ft.com'
conversion_options = { conversion_options = {

View File

@ -1,20 +1,21 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2013, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2010-2013, Darko Miletic <darko.miletic at gmail.com>'
''' '''
http://www.ft.com/intl/us-edition www.ft.com/intl/international-edition
''' '''
import datetime import datetime
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile
from calibre import strftime from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from collections import OrderedDict
class FinancialTimes(BasicNewsRecipe): class FinancialTimes(BasicNewsRecipe):
title = 'Financial Times (US) printed edition' title = 'Financial Times (International) printed edition'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
description = "The Financial Times (FT) is one of the world's leading business news and information organisations, recognised internationally for its authority, integrity and accuracy." description = "The Financial Times (FT) is one of the world's leading business news and information organisations, recognised internationally for its authority, integrity and accuracy."
publisher = 'The Financial Times Ltd.' publisher = 'The Financial Times Ltd.'
category = 'news, finances, politics, UK, World' category = 'news, finances, politics, World'
oldest_article = 2 oldest_article = 2
language = 'en' language = 'en'
max_articles_per_feed = 250 max_articles_per_feed = 250
@ -28,7 +29,7 @@ class FinancialTimes(BasicNewsRecipe):
masthead_url = 'http://im.media.ft.com/m/img/masthead_main.jpg' masthead_url = 'http://im.media.ft.com/m/img/masthead_main.jpg'
LOGIN = 'https://registration.ft.com/registration/barrier/login' LOGIN = 'https://registration.ft.com/registration/barrier/login'
LOGIN2 = 'http://media.ft.com/h/subs3.html' LOGIN2 = 'http://media.ft.com/h/subs3.html'
INDEX = 'http://www.ft.com/intl/us-edition' INDEX = 'http://www.ft.com/intl/international-edition'
PREFIX = 'http://www.ft.com' PREFIX = 'http://www.ft.com'
conversion_options = { conversion_options = {
@ -105,29 +106,30 @@ class FinancialTimes(BasicNewsRecipe):
return articles return articles
def parse_index(self): def parse_index(self):
feeds = [] feeds = OrderedDict()
soup = self.index_to_soup(self.INDEX) soup = self.index_to_soup(self.INDEX)
dates= self.tag_to_string(soup.find('div', attrs={'class':'btm-links'}).find('div')) #dates= self.tag_to_string(soup.find('div', attrs={'class':'btm-links'}).find('div'))
self.timefmt = ' [%s]'%dates #self.timefmt = ' [%s]'%dates
wide = soup.find('div',attrs={'class':'wide'}) section_title = 'Untitled'
if not wide:
return feeds for column in soup.findAll('div', attrs = {'class':'feedBoxes clearfix'}):
allsections = wide.findAll(attrs={'class':lambda x: x and 'footwell' in x.split()}) for section in column. findAll('div', attrs = {'class':'feedBox'}):
if not allsections: sectiontitle=self.tag_to_string(section.find('h4'))
return feeds if '...' not in sectiontitle: section_title=sectiontitle
count = 0 for article in section.ul.findAll('li'):
for item in allsections: articles = []
count = count + 1 title=self.tag_to_string(article.a)
if self.test and count > 2: url=article.a['href']
return feeds articles.append({'title':title, 'url':url, 'description':'', 'date':''})
fitem = item.h3
if not fitem: if articles:
fitem = item.h4 if section_title not in feeds:
ftitle = self.tag_to_string(fitem) feeds[section_title] = []
self.report_progress(0, _('Fetching feed')+' %s...'%(ftitle)) feeds[section_title] += articles
feedarts = self.get_artlinks(item.ul)
feeds.append((ftitle,feedarts))
return feeds ans = [(key, val) for key, val in feeds.iteritems()]
return ans
def preprocess_html(self, soup): def preprocess_html(self, soup):
items = ['promo-box','promo-title', items = ['promo-box','promo-title',
@ -177,6 +179,3 @@ class FinancialTimes(BasicNewsRecipe):
tfile.close() tfile.close()
self.temp_files.append(tfile) self.temp_files.append(tfile)
return tfile.name return tfile.name
def cleanup(self):
self.browser.open('https://registration.ft.com/registration/login/logout?location=')