diff --git a/recipes/financial_times.recipe b/recipes/financial_times.recipe index c8ff6f7e50..a67fbedd5a 100644 --- a/recipes/financial_times.recipe +++ b/recipes/financial_times.recipe @@ -16,6 +16,7 @@ class ft(BasicNewsRecipe): remove_empty_feeds = True ignore_duplicate_articles = {'url'} remove_attributes = ['style', 'width', 'height'] + masthead_url = 'https://im.ft-static.com/m/img/masthead_main.jpg' def get_cover_url(self): soup = self.index_to_soup( @@ -35,7 +36,7 @@ class ft(BasicNewsRecipe): ('Climate', 'https://www.ft.com/climate-capital?format=rss'), ('Opinion', 'https://www.ft.com/opinion?format=rss'), ('Life & Arts', 'https://www.ft.com/life-arts?format=rss'), - ('how to spend it', 'https://www.ft.com/htsi?format=rss'), + ('How to spend it', 'https://www.ft.com/htsi?format=rss'), ] def preprocess_raw_html(self, raw, *a): @@ -50,17 +51,25 @@ class ft(BasicNewsRecipe): title = data['headline'] body = data['articleBody'] body = body.replace('\n\n', '
') - # remove embedded image links - body = re.sub(r'\[https://\S+?\]', '', body) author = '' if 'author' in data: try: author = data['author']['name'] except TypeError: author = ' and '.join(x['name'] for x in data['author']) - image = desc = '' + image = desc = title_image_url = '' if data.get('image'): - image = '
'.format(data['image']['url'])
+ title_image_url = data['image']['url']
+ image = '
'.format(title_image_url)
+
+ # embedded image links
+ def insert_image(m):
+ url = m.group()[1:-1]
+ if url == title_image_url:
+ return ''
+ return '
'.format(url)
+
+ body = re.sub(r'\[https://\S+?\]', insert_image, body)
if data.get('description'):
desc = '
' + body