From e25426ad5a4f67cf180e1de91bac766274055b5a Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 21 Aug 2017 00:31:35 +0530
Subject: [PATCH] Update Financial Times

Fixes #1711934 [Financial times download uk/us is broken](https://bugs.launchpad.net/calibre/+bug/1711934)
---
 recipes/financial_times_uk.recipe | 52 +++++++++++--------------------
 recipes/financial_times_us.recipe | 46 +++++++++++----------------
 2 files changed, 36 insertions(+), 62 deletions(-)

diff --git a/recipes/financial_times_uk.recipe b/recipes/financial_times_uk.recipe
index 490624bfd2..b3cae18735 100644
--- a/recipes/financial_times_uk.recipe
+++ b/recipes/financial_times_uk.recipe
@@ -5,11 +5,10 @@
 __license__ = 'GPL v3'
 __copyright__ = '2010-2017, Darko Miletic <darko.miletic at gmail.com>'
 '''
-www.ft.com/uk-edition
+www.ft.com/todaysnewspaper/uk
 '''
 
 from calibre.web.feeds.news import BasicNewsRecipe
-from collections import OrderedDict
 from urllib import unquote
 
 
@@ -34,22 +33,26 @@ class FinancialTimes(BasicNewsRecipe):
     encoding = 'utf8'
     publication_type = 'newspaper'
     handle_gzip = True
+    compress_news_images = True
+    scale_news_images_to_device = True
+    ignore_duplicate_articles = {'url'}
     LOGIN = 'https://accounts.ft.com/login?location=https%3A%2F%2Fwww.ft.com%2F'
     LOGOUT = 'https://myaccount.ft.com/logout'
-    INDEX = 'http://www.ft.com/uk-edition'
-    PREFIX = 'http://www.ft.com'
+    INDEX = 'https://www.ft.com/todaysnewspaper/uk'
+    PREFIX = 'https://www.ft.com'
 
     keep_only_tags = [
-        classes(
-            'article__header--wrapper article__time-byline article__body'
-            'n-content-image barrier-grid__heading article__time-byline topper__headline topper__standfirst')
+        classes('topper__headline topper__standfirst n-content-image--full article__time-byline article__body')
     ]
 
     remove_tags = [
-        classes('n-content-related-box tour-tip')
+        classes('n-content-related-box tour-tip n-content-recommended n-content-video')
     ]
 
-    remove_attributes = ['width', 'height', 'lang', 'style']
+    extra_css = '''
+                body {font-family: Georgia,serif;}
+                img {display:block;}
+                '''
 
     def get_browser(self):
         br = BasicNewsRecipe.get_browser(self)
@@ -81,33 +84,14 @@ class FinancialTimes(BasicNewsRecipe):
         return cover
 
     def parse_index(self):
-        feeds = OrderedDict()
+        articles = []
         soup = self.index_to_soup(self.INDEX)
-        # dates= self.tag_to_string(soup.find('div', attrs={'class':'btm-links'}).find('div'))
-        # self.timefmt = ' [%s]'%dates
-        section_title = 'Untitled'
 
-        for column in soup.findAll('div', attrs={'class': 'feedBoxes clearfix'}):
-            for section in column.findAll('div', attrs={'class': 'feedBox'}):
-                sectiontitle = self.tag_to_string(section.find('h4'))
-                if '...' not in sectiontitle:
-                    section_title = sectiontitle
-                    self.log('Found section:', sectiontitle)
-                for article in section.ul.findAll('li'):
-                    articles = []
-                    title = self.tag_to_string(article.a)
-                    url = article.a['href']
-                    articles.append(
-                        {'title': title, 'url': url, 'description': '', 'date': ''})
-                    self.log('\tFound article:', title)
-
-                    if articles:
-                        if section_title not in feeds:
-                            feeds[section_title] = []
-                        feeds[section_title] += articles
-
-        ans = [(key, val) for key, val in feeds.iteritems()]
-        return ans
+        for article in soup.findAll('a', href=True, attrs={'data-trackable':'main-link'}):
+            url = self.PREFIX + article['href']
+            title = self.tag_to_string(article)
+            articles.append({'title': title, 'url': url, 'description': '', 'date': ''})
+        return [("Articles", articles)]
 
     def preprocess_html(self, soup):
         for img in soup.findAll('img', srcset=True):
diff --git a/recipes/financial_times_us.recipe b/recipes/financial_times_us.recipe
index 6b6c8338a9..6642861e55 100644
--- a/recipes/financial_times_us.recipe
+++ b/recipes/financial_times_us.recipe
@@ -5,11 +5,10 @@
 __license__ = 'GPL v3'
 __copyright__ = '2010-2017, Darko Miletic <darko.miletic at gmail.com>'
 '''
-www.ft.com/international-edition
+www.ft.com/todaysnewspaper/international
 '''
 
 from calibre.web.feeds.news import BasicNewsRecipe
-from collections import OrderedDict
 from urllib import unquote
 
 
@@ -34,20 +33,26 @@ class FinancialTimes(BasicNewsRecipe):
     encoding = 'utf8'
     publication_type = 'newspaper'
     handle_gzip = True
+    compress_news_images = True
+    scale_news_images_to_device = True
+    ignore_duplicate_articles = {'url'}
     LOGIN = 'https://accounts.ft.com/login?location=https%3A%2F%2Fwww.ft.com%2F'
     LOGOUT = 'https://myaccount.ft.com/logout'
-    INDEX = 'http://www.ft.com/international-edition'
-    PREFIX = 'http://www.ft.com'
+    INDEX = 'https://www.ft.com/todaysnewspaper/international'
+    PREFIX = 'https://www.ft.com'
 
     keep_only_tags = [
-        classes('article__header--wrapper article__time-byline article__body n-content-image barrier-grid__heading')
+        classes('topper__headline topper__standfirst n-content-image--full article__time-byline article__body')
     ]
 
     remove_tags = [
-        classes('n-content-related-box tour-tip')
+        classes('n-content-related-box tour-tip n-content-recommended n-content-video')
     ]
 
-    remove_attributes = ['width', 'height', 'lang', 'style']
+    extra_css = '''
+                body {font-family: Georgia,serif;}
+                img {display:block;}
+                '''
 
     def get_browser(self):
         br = BasicNewsRecipe.get_browser(self)
@@ -63,29 +68,14 @@ class FinancialTimes(BasicNewsRecipe):
         return br
 
     def parse_index(self):
-        feeds = OrderedDict()
+        articles = []
         soup = self.index_to_soup(self.INDEX)
-        section_title = 'Untitled'
 
-        for column in soup.findAll('div', attrs={'class': 'feedBoxes clearfix'}):
-            for section in column.findAll('div', attrs={'class': 'feedBox'}):
-                sectiontitle = self.tag_to_string(section.find('h4'))
-                if '...' not in sectiontitle:
-                    section_title = sectiontitle
-                for article in section.ul.findAll('li'):
-                    articles = []
-                    title = self.tag_to_string(article.a)
-                    url = article.a['href']
-                    articles.append(
-                        {'title': title, 'url': url, 'description': '', 'date': ''})
-
-                    if articles:
-                        if section_title not in feeds:
-                            feeds[section_title] = []
-                        feeds[section_title] += articles
-
-        ans = [(key, val) for key, val in feeds.iteritems()]
-        return ans
+        for article in soup.findAll('a', href=True, attrs={'data-trackable':'main-link'}):
+            url = self.PREFIX + article['href']
+            title = self.tag_to_string(article)
+            articles.append({'title': title, 'url': url, 'description': '', 'date': ''})
+        return [("Articles", articles)]
 
     def preprocess_html(self, soup):
         for img in soup.findAll('img', srcset=True):