Remove various bad tags from Economist downloads.

This commit is contained in:
Kovid Goyal 2009-12-05 09:51:38 -07:00
parent f58c59ca0a
commit 4f13ef6aaa
2 changed files with 4 additions and 2 deletions

View File

@ -22,7 +22,8 @@ class Economist(BasicNewsRecipe):
oldest_article = 7.0
cover_url = 'http://www.economist.com/images/covers/currentcovereu_large.jpg'
remove_tags = [dict(name=['script', 'noscript', 'title'])]
remove_tags = [dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
dict(attrs={'class':['dblClkTrk']})]
remove_tags_before = dict(name=lambda tag: tag.name=='title' and tag.parent.name=='body')
needs_subscription = True

View File

@ -16,7 +16,8 @@ class Economist(BasicNewsRecipe):
oldest_article = 6.5
cover_url = 'http://www.economist.com/images/covers/currentcovereu_large.jpg'
remove_tags = [dict(name=['script', 'noscript', 'title'])]
remove_tags = [dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent']),
dict(attrs={'class':['dblClkTrk']})]
remove_tags_before = dict(name=lambda tag: tag.name=='title' and tag.parent.name=='body')
def parse_index(self):