mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #831695 (Updated recipe for Financial times UK edition)
This commit is contained in:
parent
161644a752
commit
ac30f8edd4
@ -5,6 +5,7 @@ www.ft.com/uk-edition
|
||||
'''
|
||||
|
||||
import datetime
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
@ -22,6 +23,8 @@ class FinancialTimes(BasicNewsRecipe):
|
||||
needs_subscription = True
|
||||
encoding = 'utf8'
|
||||
publication_type = 'newspaper'
|
||||
articles_are_obfuscated = True
|
||||
temp_files = []
|
||||
masthead_url = 'http://im.media.ft.com/m/img/masthead_main.jpg'
|
||||
LOGIN = 'https://registration.ft.com/registration/barrier/login'
|
||||
LOGIN2 = 'http://media.ft.com/h/subs3.html'
|
||||
@ -47,7 +50,12 @@ class FinancialTimes(BasicNewsRecipe):
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['fullstory fullstoryHeader','fullstory fullstoryBody','ft-story-header','ft-story-body','index-detail']})]
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':['fullstory fullstoryHeader', 'ft-story-header']})
|
||||
,dict(name='div', attrs={'class':'standfirst'})
|
||||
,dict(name='div', attrs={'id' :'storyContent'})
|
||||
,dict(name='div', attrs={'class':['ft-story-body','index-detail']})
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':'floating-con'})
|
||||
,dict(name=['meta','iframe','base','object','embed','link'])
|
||||
@ -69,18 +77,23 @@ class FinancialTimes(BasicNewsRecipe):
|
||||
|
||||
def get_artlinks(self, elem):
|
||||
articles = []
|
||||
count = 0
|
||||
for item in elem.findAll('a',href=True):
|
||||
count = count + 1
|
||||
if self.test and count > 2:
|
||||
return articles
|
||||
rawlink = item['href']
|
||||
if rawlink.startswith('http://'):
|
||||
url = rawlink
|
||||
else:
|
||||
url = self.PREFIX + rawlink
|
||||
urlverified = self.browser.open_novisit(url).geturl() # resolve redirect.
|
||||
title = self.tag_to_string(item)
|
||||
date = strftime(self.timefmt)
|
||||
articles.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'url' :urlverified
|
||||
,'description':''
|
||||
})
|
||||
return articles
|
||||
@ -97,7 +110,11 @@ class FinancialTimes(BasicNewsRecipe):
|
||||
st = wide.find('h4',attrs={'class':'section-no-arrow'})
|
||||
if st:
|
||||
strest.insert(0,st)
|
||||
count = 0
|
||||
for item in strest:
|
||||
count = count + 1
|
||||
if self.test and count > 2:
|
||||
return feeds
|
||||
ftitle = self.tag_to_string(item)
|
||||
self.report_progress(0, _('Fetching feed')+' %s...'%(ftitle))
|
||||
feedarts = self.get_artlinks(item.parent.ul)
|
||||
@ -136,4 +153,19 @@ class FinancialTimes(BasicNewsRecipe):
|
||||
if cdate.isoweekday() == 7:
|
||||
cdate -= datetime.timedelta(days=1)
|
||||
return cdate.strftime('http://specials.ft.com/vtf_pdf/%d%m%y_FRONT1_LON.pdf')
|
||||
|
||||
def get_obfuscated_article(self, url):
|
||||
count = 0
|
||||
while (count < 10):
|
||||
try:
|
||||
response = self.browser.open(url)
|
||||
html = response.read()
|
||||
count = 10
|
||||
except:
|
||||
print "Retrying download..."
|
||||
count += 1
|
||||
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||
self.temp_files[-1].write(html)
|
||||
self.temp_files[-1].close()
|
||||
return self.temp_files[-1].name
|
||||
|
Loading…
x
Reference in New Issue
Block a user