mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Recipe susbsytem: Add a remove_attributes property to make it easy to specify that some attributes should be removed from all tags
This commit is contained in:
parent
d49f21d47e
commit
babe84e4e5
@ -210,6 +210,12 @@ class BasicNewsRecipe(Recipe):
|
|||||||
#: tags before the first element with `id="content"`.
|
#: tags before the first element with `id="content"`.
|
||||||
remove_tags_before = None
|
remove_tags_before = None
|
||||||
|
|
||||||
|
#: List of attributes to remove from all tags
|
||||||
|
#: For example::
|
||||||
|
#:
|
||||||
|
#: remove_attributes = ['style', 'font']
|
||||||
|
remove_attributes = []
|
||||||
|
|
||||||
#: Keep only the specified tags and their children.
|
#: Keep only the specified tags and their children.
|
||||||
#: For the format for specifying a tag see :attr:`BasicNewsRecipe.remove_tags`.
|
#: For the format for specifying a tag see :attr:`BasicNewsRecipe.remove_tags`.
|
||||||
#: If this list is not empty, then the `<body>` tag will be emptied and re-filled with
|
#: If this list is not empty, then the `<body>` tag will be emptied and re-filled with
|
||||||
@ -562,6 +568,9 @@ class BasicNewsRecipe(Recipe):
|
|||||||
script.extract()
|
script.extract()
|
||||||
for script in list(soup.findAll('noscript')):
|
for script in list(soup.findAll('noscript')):
|
||||||
script.extract()
|
script.extract()
|
||||||
|
for attr in self.remove_attributes:
|
||||||
|
for x in soup.findAll(attrs={attr:True}):
|
||||||
|
del x[attr]
|
||||||
return self.postprocess_html(soup, first_fetch)
|
return self.postprocess_html(soup, first_fetch)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user