From 792901d690a206f470143a27e077bf87b8a3a5c4 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 27 Oct 2012 19:24:57 +0530 Subject: [PATCH] Update Financial Times (UK) --- recipes/financial_times_uk.recipe | 77 ++++++++++++++++--------------- 1 file changed, 39 insertions(+), 38 deletions(-) diff --git a/recipes/financial_times_uk.recipe b/recipes/financial_times_uk.recipe index 4e5b522ae9..e2b69f4987 100644 --- a/recipes/financial_times_uk.recipe +++ b/recipes/financial_times_uk.recipe @@ -1,5 +1,5 @@ __license__ = 'GPL v3' -__copyright__ = '2010-2012, Darko Miletic ' +__copyright__ = '2010-2011, Darko Miletic ' ''' www.ft.com/uk-edition ''' @@ -42,24 +42,18 @@ class FinancialTimes(BasicNewsRecipe): def get_browser(self): br = BasicNewsRecipe.get_browser() br.open(self.INDEX) - if self.username is not None and self.password is not None: - br.open(self.LOGIN2) - br.select_form(name='loginForm') - br['username'] = self.username - br['password'] = self.password - br.submit() + br.open(self.LOGIN) + br.select_form(name='loginForm') + br['username'] = self.username + br['password'] = self.password + br.submit() return br keep_only_tags = [ - dict(name='div' , attrs={'class':['fullstory fullstoryHeader', 'ft-story-header']}) - ,dict(name='div' , attrs={'class':'standfirst'}) - ,dict(name='div' , attrs={'id' :'storyContent'}) - ,dict(name='div' , attrs={'class':['ft-story-body','index-detail']}) - ,dict(name='div' , attrs={'class':['ft-story-body','index-detail']}) - ,dict(name='h2' , attrs={'class':'entry-title'} ) - ,dict(name='span', attrs={'class':lambda x: x and 'posted-on' in x.split()} ) - ,dict(name='span', attrs={'class':'author_byline'} ) - ,dict(name='div' , attrs={'class':'entry-content'} ) + dict(name='div', attrs={'class':['fullstory fullstoryHeader', 'ft-story-header']}) + ,dict(name='div', attrs={'class':'standfirst'}) + ,dict(name='div', attrs={'id' :'storyContent'}) + ,dict(name='div', attrs={'class':['ft-story-body','index-detail']}) ] remove_tags = [ dict(name='div', attrs={'id':'floating-con'}) @@ -88,17 +82,21 @@ class FinancialTimes(BasicNewsRecipe): if self.test and count > 2: return articles rawlink = item['href'] - url = rawlink - if not rawlink.startswith('http://'): - url = self.PREFIX + rawlink - urlverified = self.browser.open_novisit(url).geturl() # resolve redirect. + if rawlink.startswith('http://'): + url = rawlink + else: + url = self.PREFIX + rawlink + try: + urlverified = self.browser.open_novisit(url).geturl() # resolve redirect. + except: + continue title = self.tag_to_string(item) date = strftime(self.timefmt) articles.append({ - 'title' :title - ,'date' :date - ,'url' :urlverified - ,'description':'' + 'title' :title + ,'date' :date + ,'url' :urlverified + ,'description':'' }) return articles @@ -110,20 +108,21 @@ class FinancialTimes(BasicNewsRecipe): wide = soup.find('div',attrs={'class':'wide'}) if not wide: return feeds - allsections = wide.findAll(attrs={'class':lambda x: x and 'footwell' in x.split()}) - if not allsections: + strest = wide.findAll('h3', attrs={'class':'section'}) + if not strest: return feeds + st = wide.findAll('h4',attrs={'class':'section-no-arrow'}) + if st: + st.extend(strest) count = 0 - for item in allsections: + for item in st: count = count + 1 if self.test and count > 2: return feeds - fitem = item.h3 - if not fitem: - fitem = item.h4 - ftitle = self.tag_to_string(fitem) + ftitle = self.tag_to_string(item) self.report_progress(0, _('Fetching feed')+' %s...'%(ftitle)) - feedarts = self.get_artlinks(item.ul) + if item.parent.ul is not None: + feedarts = self.get_artlinks(item.parent.ul) feeds.append((ftitle,feedarts)) return feeds @@ -157,7 +156,7 @@ class FinancialTimes(BasicNewsRecipe): def get_cover_url(self): cdate = datetime.date.today() if cdate.isoweekday() == 7: - cdate -= datetime.timedelta(days=1) + cdate -= datetime.timedelta(days=1) return cdate.strftime('http://specials.ft.com/vtf_pdf/%d%m%y_FRONT1_LON.pdf') def get_obfuscated_article(self, url): @@ -170,8 +169,10 @@ class FinancialTimes(BasicNewsRecipe): except: print "Retrying download..." count += 1 - tfile = PersistentTemporaryFile('_fa.html') - tfile.write(html) - tfile.close() - self.temp_files.append(tfile) - return tfile.name + self.temp_files.append(PersistentTemporaryFile('_fa.html')) + self.temp_files[-1].write(html) + self.temp_files[-1].close() + return self.temp_files[-1].name + + def cleanup(self): + self.browser.open('https://registration.ft.com/registration/login/logout?location=')