From bc661a812d8bf10397c98fb092bed34bc28d9761 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 23 Apr 2019 12:15:47 +0530 Subject: [PATCH] News fetching: Fix a regression that broke removal of attributes/javascript/srcset because of bugs in the new BeautifulSoup --- src/calibre/web/feeds/news.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index dc7d9ce883..cfaa91245a 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -972,6 +972,9 @@ class BasicNewsRecipe(Recipe): extra_css=self.get_extra_css() or '') elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div') body.insert(0, elem) + # This is needed because otherwise inserting elements into + # the soup breaks find() + soup = BeautifulSoup(soup.decode_contents()) if self.remove_javascript: for script in list(soup.findAll('script')): script.extract()