From babe84e4e54836dca658b896bb07975ad64a61cd Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 15 Aug 2009 12:15:24 -0600 Subject: [PATCH] Recipe susbsytem: Add a remove_attributes property to make it easy to specify that some attributes should be removed from all tags --- src/calibre/web/feeds/news.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index ee3d48ead7..80f8563771 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -210,6 +210,12 @@ class BasicNewsRecipe(Recipe): #: tags before the first element with `id="content"`. remove_tags_before = None + #: List of attributes to remove from all tags + #: For example:: + #: + #: remove_attributes = ['style', 'font'] + remove_attributes = [] + #: Keep only the specified tags and their children. #: For the format for specifying a tag see :attr:`BasicNewsRecipe.remove_tags`. #: If this list is not empty, then the `` tag will be emptied and re-filled with @@ -562,6 +568,9 @@ class BasicNewsRecipe(Recipe): script.extract() for script in list(soup.findAll('noscript')): script.extract() + for attr in self.remove_attributes: + for x in soup.findAll(attrs={attr:True}): + del x[attr] return self.postprocess_html(soup, first_fetch)