mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #831695 (Updated recipe for Financial times UK edition)
This commit is contained in:
parent
161644a752
commit
ac30f8edd4
@ -5,6 +5,7 @@ www.ft.com/uk-edition
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
from calibre import strftime
|
from calibre import strftime
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
@ -22,6 +23,8 @@ class FinancialTimes(BasicNewsRecipe):
|
|||||||
needs_subscription = True
|
needs_subscription = True
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
publication_type = 'newspaper'
|
publication_type = 'newspaper'
|
||||||
|
articles_are_obfuscated = True
|
||||||
|
temp_files = []
|
||||||
masthead_url = 'http://im.media.ft.com/m/img/masthead_main.jpg'
|
masthead_url = 'http://im.media.ft.com/m/img/masthead_main.jpg'
|
||||||
LOGIN = 'https://registration.ft.com/registration/barrier/login'
|
LOGIN = 'https://registration.ft.com/registration/barrier/login'
|
||||||
LOGIN2 = 'http://media.ft.com/h/subs3.html'
|
LOGIN2 = 'http://media.ft.com/h/subs3.html'
|
||||||
@ -47,7 +50,12 @@ class FinancialTimes(BasicNewsRecipe):
|
|||||||
br.submit()
|
br.submit()
|
||||||
return br
|
return br
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':['fullstory fullstoryHeader','fullstory fullstoryBody','ft-story-header','ft-story-body','index-detail']})]
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs={'class':['fullstory fullstoryHeader', 'ft-story-header']})
|
||||||
|
,dict(name='div', attrs={'class':'standfirst'})
|
||||||
|
,dict(name='div', attrs={'id' :'storyContent'})
|
||||||
|
,dict(name='div', attrs={'class':['ft-story-body','index-detail']})
|
||||||
|
]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'id':'floating-con'})
|
dict(name='div', attrs={'id':'floating-con'})
|
||||||
,dict(name=['meta','iframe','base','object','embed','link'])
|
,dict(name=['meta','iframe','base','object','embed','link'])
|
||||||
@ -69,18 +77,23 @@ class FinancialTimes(BasicNewsRecipe):
|
|||||||
|
|
||||||
def get_artlinks(self, elem):
|
def get_artlinks(self, elem):
|
||||||
articles = []
|
articles = []
|
||||||
|
count = 0
|
||||||
for item in elem.findAll('a',href=True):
|
for item in elem.findAll('a',href=True):
|
||||||
|
count = count + 1
|
||||||
|
if self.test and count > 2:
|
||||||
|
return articles
|
||||||
rawlink = item['href']
|
rawlink = item['href']
|
||||||
if rawlink.startswith('http://'):
|
if rawlink.startswith('http://'):
|
||||||
url = rawlink
|
url = rawlink
|
||||||
else:
|
else:
|
||||||
url = self.PREFIX + rawlink
|
url = self.PREFIX + rawlink
|
||||||
|
urlverified = self.browser.open_novisit(url).geturl() # resolve redirect.
|
||||||
title = self.tag_to_string(item)
|
title = self.tag_to_string(item)
|
||||||
date = strftime(self.timefmt)
|
date = strftime(self.timefmt)
|
||||||
articles.append({
|
articles.append({
|
||||||
'title' :title
|
'title' :title
|
||||||
,'date' :date
|
,'date' :date
|
||||||
,'url' :url
|
,'url' :urlverified
|
||||||
,'description':''
|
,'description':''
|
||||||
})
|
})
|
||||||
return articles
|
return articles
|
||||||
@ -97,7 +110,11 @@ class FinancialTimes(BasicNewsRecipe):
|
|||||||
st = wide.find('h4',attrs={'class':'section-no-arrow'})
|
st = wide.find('h4',attrs={'class':'section-no-arrow'})
|
||||||
if st:
|
if st:
|
||||||
strest.insert(0,st)
|
strest.insert(0,st)
|
||||||
|
count = 0
|
||||||
for item in strest:
|
for item in strest:
|
||||||
|
count = count + 1
|
||||||
|
if self.test and count > 2:
|
||||||
|
return feeds
|
||||||
ftitle = self.tag_to_string(item)
|
ftitle = self.tag_to_string(item)
|
||||||
self.report_progress(0, _('Fetching feed')+' %s...'%(ftitle))
|
self.report_progress(0, _('Fetching feed')+' %s...'%(ftitle))
|
||||||
feedarts = self.get_artlinks(item.parent.ul)
|
feedarts = self.get_artlinks(item.parent.ul)
|
||||||
@ -137,3 +154,18 @@ class FinancialTimes(BasicNewsRecipe):
|
|||||||
cdate -= datetime.timedelta(days=1)
|
cdate -= datetime.timedelta(days=1)
|
||||||
return cdate.strftime('http://specials.ft.com/vtf_pdf/%d%m%y_FRONT1_LON.pdf')
|
return cdate.strftime('http://specials.ft.com/vtf_pdf/%d%m%y_FRONT1_LON.pdf')
|
||||||
|
|
||||||
|
def get_obfuscated_article(self, url):
|
||||||
|
count = 0
|
||||||
|
while (count < 10):
|
||||||
|
try:
|
||||||
|
response = self.browser.open(url)
|
||||||
|
html = response.read()
|
||||||
|
count = 10
|
||||||
|
except:
|
||||||
|
print "Retrying download..."
|
||||||
|
count += 1
|
||||||
|
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||||
|
self.temp_files[-1].write(html)
|
||||||
|
self.temp_files[-1].close()
|
||||||
|
return self.temp_files[-1].name
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user