Fix some obvious bugs in the FT recipes

This commit is contained in:
Kovid Goyal 2016-05-20 12:50:15 +05:30
parent 3ca4670ce9
commit e226eaaf5f
3 changed files with 5 additions and 17 deletions

View File

@ -4,7 +4,6 @@ __copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
www.ft.com
'''
import datetime
from calibre.web.feeds.news import BasicNewsRecipe
class FinancialTimes_rss(BasicNewsRecipe):
@ -22,7 +21,7 @@ class FinancialTimes_rss(BasicNewsRecipe):
encoding = 'utf8'
publication_type = 'newspaper'
masthead_url = 'http://im.media.ft.com/m/img/masthead_main.jpg'
LOGIN = 'https://registration.ft.com/registration/barrier/login'
LOGIN = 'https://accounts.ft.com/login'
INDEX = 'http://www.ft.com'
conversion_options = {
@ -38,8 +37,8 @@ class FinancialTimes_rss(BasicNewsRecipe):
br.open(self.INDEX)
if self.username is not None and self.password is not None:
br.open(self.LOGIN)
br.select_form(name='loginForm')
br['username'] = self.username
br.select_form(name='login')
br['email'] = self.username
br['password'] = self.password
br.submit()
return br
@ -97,10 +96,3 @@ class FinancialTimes_rss(BasicNewsRecipe):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup
def get_cover_url(self):
cdate = datetime.date.today()
if cdate.isoweekday() == 7:
cdate -= datetime.timedelta(days=1)
return cdate.strftime('http://specials.ft.com/vtf_pdf/%d%m%y_FRONT1_USA.pdf')

View File

@ -40,9 +40,7 @@ class FinancialTimes(BasicNewsRecipe):
br.submit()
return br
keep_only_tags = [
dict(name='div' , attrs={'class':['master-row editorialSection']})
]
keep_only_tags = [dict(name='div', attrs={'class':['fullstory fullstoryHeader','fullstory fullstoryBody','ft-story-header','ft-story-body','index-detail']})]
remove_tags = [
dict(name='style', attrs={'id':'antiClickjack'}),
dict(name='div', attrs={'id':'floating-con'}),

View File

@ -49,9 +49,7 @@ class FinancialTimes(BasicNewsRecipe):
br.submit()
return br
keep_only_tags = [
dict(name='div' , attrs={'class':['master-row editorialSection']})
]
keep_only_tags = [dict(name='div', attrs={'class':['fullstory fullstoryHeader','fullstory fullstoryBody','ft-story-header','ft-story-body','index-detail']})]
remove_tags = [
dict(name='div', attrs={'id':'floating-con'})
,dict(name=['meta','iframe','base','object','embed','link'])