News fetching: Fix a regression that broke removal of attributes/javascript/srcset because of bugs in the new BeautifulSoup

This commit is contained in:
Kovid Goyal 2019-04-23 12:15:47 +05:30
parent 7270eaa91f
commit bc661a812d
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -972,6 +972,9 @@ class BasicNewsRecipe(Recipe):
extra_css=self.get_extra_css() or '')
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
body.insert(0, elem)
# This is needed because otherwise inserting elements into
# the soup breaks find()
soup = BeautifulSoup(soup.decode_contents())
if self.remove_javascript:
for script in list(soup.findAll('script')):
script.extract()