From 4f13ef6aaa0e458cb543a29f31626f5a201267b8 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 5 Dec 2009 09:51:38 -0700 Subject: [PATCH] Remove various bad tags from Economist downloads. --- resources/recipes/economist.recipe | 3 ++- resources/recipes/economist_free.recipe | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/resources/recipes/economist.recipe b/resources/recipes/economist.recipe index 75cb86863a..cadf2964ed 100644 --- a/resources/recipes/economist.recipe +++ b/resources/recipes/economist.recipe @@ -22,7 +22,8 @@ class Economist(BasicNewsRecipe): oldest_article = 7.0 cover_url = 'http://www.economist.com/images/covers/currentcovereu_large.jpg' - remove_tags = [dict(name=['script', 'noscript', 'title'])] + remove_tags = [dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']), + dict(attrs={'class':['dblClkTrk']})] remove_tags_before = dict(name=lambda tag: tag.name=='title' and tag.parent.name=='body') needs_subscription = True diff --git a/resources/recipes/economist_free.recipe b/resources/recipes/economist_free.recipe index effda489c9..f7c1b3816f 100644 --- a/resources/recipes/economist_free.recipe +++ b/resources/recipes/economist_free.recipe @@ -16,7 +16,8 @@ class Economist(BasicNewsRecipe): oldest_article = 6.5 cover_url = 'http://www.economist.com/images/covers/currentcovereu_large.jpg' - remove_tags = [dict(name=['script', 'noscript', 'title'])] + remove_tags = [dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']), + dict(attrs={'class':['dblClkTrk']})] remove_tags_before = dict(name=lambda tag: tag.name=='title' and tag.parent.name=='body') def parse_index(self):