From c6dbbf488fd63264fdeb0bed57f3cdfe5118b754 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 29 Jun 2015 11:02:36 +0530
Subject: [PATCH] ...

---
 recipes/financial_times_uk.recipe | 56 +++++++------------------------
 1 file changed, 12 insertions(+), 44 deletions(-)

diff --git a/recipes/financial_times_uk.recipe b/recipes/financial_times_uk.recipe
index df6d10d831..87e898929d 100644
--- a/recipes/financial_times_uk.recipe
+++ b/recipes/financial_times_uk.recipe
@@ -5,7 +5,6 @@ www.ft.com/intl/uk-edition
 '''
 
 from calibre.ptempfile import PersistentTemporaryFile
-from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 from collections import OrderedDict
 
@@ -31,14 +30,6 @@ class FinancialTimes(BasicNewsRecipe):
     INDEX                 = 'http://www.ft.com/intl/uk-edition'
     PREFIX                = 'http://www.ft.com'
 
-    conversion_options = {
-                          'comment'          : description
-                        , 'tags'             : category
-                        , 'publisher'        : publisher
-                        , 'language'         : language
-                        , 'linearize_tables' : True
-                        }
-
     def get_browser(self):
         br = BasicNewsRecipe.get_browser(self)
         br.open(self.INDEX)
@@ -54,11 +45,11 @@ class FinancialTimes(BasicNewsRecipe):
                         dict(name='div' , attrs={'class':['master-row editorialSection']})
                      ]
     remove_tags = [
-                      dict(name='div', attrs={'id':'floating-con'})
-                     ,dict(name=['meta','iframe','base','object','embed','link'])
-                     ,dict(attrs={'class':['storyTools','story-package','screen-copy','story-package separator','expandable-image','promobox']})
-                     ,dict(name='div', attrs={'class':lambda x: x and 'insideArticleRelatedTopics' in x.split()})
-                  ]
+        dict(name='div', attrs={'id':'floating-con'}),
+        dict(name=['meta','iframe','base','object','embed','link']),
+        dict(attrs={'class':['storyTools','story-package','screen-copy','story-package separator','expandable-image','promobox']}),
+        dict(name='div', attrs={'class':lambda x: x and 'insideArticleRelatedTopics' in x.split()})
+    ]
     remove_attributes = ['width','height','lang']
 
     extra_css = """
@@ -73,36 +64,11 @@ class FinancialTimes(BasicNewsRecipe):
                 .byline,.ft-story-body,.ft-story-header{font-family: Arial,Helvetica,sans-serif}
                 """
 
-    def get_artlinks(self, elem):
-        articles = []
-        count = 0
-        for item in elem.findAll('a',href=True):
-            count = count + 1
-            if self.test and count > 2:
-                return articles
-            rawlink = item['href']
-            url = rawlink
-            if not rawlink.startswith('http://'):
-                url = self.PREFIX + rawlink
-            try:
-                urlverified = self.browser.open_novisit(url).geturl()  # resolve redirect.
-            except:
-                continue
-            title = self.tag_to_string(item)
-            date = strftime(self.timefmt)
-            articles.append({
-                              'title'      :title
-                             ,'date'       :date
-                             ,'url'        :urlverified
-                             ,'description':''
-                            })
-        return articles
-
     def parse_index(self):
         feeds = OrderedDict()
         soup = self.index_to_soup(self.INDEX)
-        #dates= self.tag_to_string(soup.find('div', attrs={'class':'btm-links'}).find('div'))
-        #self.timefmt = ' [%s]'%dates
+        # dates= self.tag_to_string(soup.find('div', attrs={'class':'btm-links'}).find('div'))
+        # self.timefmt = ' [%s]'%dates
         section_title = 'Untitled'
 
         for column in soup.findAll('div', attrs={'class':'feedBoxes clearfix'}):
@@ -110,11 +76,13 @@ class FinancialTimes(BasicNewsRecipe):
                 sectiontitle=self.tag_to_string(section.find('h4'))
                 if '...' not in sectiontitle:
                     section_title=sectiontitle
+                    self.log('Found section:', sectiontitle)
                 for article in section.ul.findAll('li'):
                     articles = []
                     title=self.tag_to_string(article.a)
                     url=article.a['href']
                     articles.append({'title':title, 'url':url, 'description':'', 'date':''})
+                    self.log('\tFound article:', title)
 
                     if articles:
                         if section_title not in feeds:
@@ -134,6 +102,9 @@ class FinancialTimes(BasicNewsRecipe):
                 it.attrs = []
         for item in soup.findAll(style=True):
             del item['style']
+        for img in soup.findAll('img', src=True):
+            if 'track/track.js' in img['src']:
+                img.extract()
         for item in soup.findAll('a'):
             limg = item.find('img')
             if item.string is not None:
@@ -146,9 +117,6 @@ class FinancialTimes(BasicNewsRecipe):
                 else:
                     str = self.tag_to_string(item)
                     item.replaceWith(str)
-        for item in soup.findAll('img'):
-            if not item.has_key('alt'):
-                item['alt'] = 'image'
         return soup
 
     def get_obfuscated_article(self, url):