From f4b6ca4b27c33da2539e5f7d7feb626545ae82e9 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 1 Jun 2016 05:02:29 +0530 Subject: [PATCH] Update Financial Times Fixes #1587578 [Update to financial times recipes](https://bugs.launchpad.net/calibre/+bug/1587578) --- recipes/financial_times_uk.recipe | 7 ++++++- recipes/financial_times_us.recipe | 10 ++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/recipes/financial_times_uk.recipe b/recipes/financial_times_uk.recipe index 5f2409348d..54399fde4d 100644 --- a/recipes/financial_times_uk.recipe +++ b/recipes/financial_times_uk.recipe @@ -40,7 +40,12 @@ class FinancialTimes(BasicNewsRecipe): br.submit() return br - keep_only_tags = [dict(name='div', attrs={'class':['fullstory fullstoryHeader','fullstory fullstoryBody','ft-story-header','ft-story-body','index-detail']})] + keep_only_tags = [ + dict(name='p' , attrs={'class':lambda x: x and 'lastUpdated' in x.split()}), + dict(name='div', attrs={'class':lambda x: x and 'syndicationHeadline' in x.split()}), + dict(name='p' , attrs={'class':lambda x: x and 'byline' in x.split()}), + dict(name='div', attrs={'class':['fullstory fullstoryBody','ft-story-header','ft-story-body','index-detail']}) + ] remove_tags = [ dict(name='style', attrs={'id':'antiClickjack'}), dict(name='div', attrs={'id':'floating-con'}), diff --git a/recipes/financial_times_us.recipe b/recipes/financial_times_us.recipe index f2d365018d..8358537f22 100644 --- a/recipes/financial_times_us.recipe +++ b/recipes/financial_times_us.recipe @@ -49,9 +49,15 @@ class FinancialTimes(BasicNewsRecipe): br.submit() return br - keep_only_tags = [dict(name='div', attrs={'class':['fullstory fullstoryHeader','fullstory fullstoryBody','ft-story-header','ft-story-body','index-detail']})] + keep_only_tags = [ + dict(name='p' , attrs={'class':lambda x: x and 'lastUpdated' in x.split()}), + dict(name='div', attrs={'class':lambda x: x and 'syndicationHeadline' in x.split()}), + dict(name='p' , attrs={'class':lambda x: x and 'byline' in x.split()}), + dict(name='div', attrs={'class':['fullstory fullstoryBody','ft-story-header','ft-story-body','index-detail']}) + ] remove_tags = [ - dict(name='div', attrs={'id':'floating-con'}) + dict(name='style', attrs={'id':'antiClickjack'}) + ,dict(name='div', attrs={'id':'floating-con'}) ,dict(name=['meta','iframe','base','object','embed','link']) ,dict(attrs={'class':['storyTools','story-package','screen-copy','story-package separator','expandable-image','promobox']}) ,dict(name='div', attrs={'class':lambda x: x and 'insideArticleRelatedTopics' in x.split()})