From 636cb5e6546bdfa5a5887f98785d184399c8565c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 9 Jul 2017 08:57:16 +0530 Subject: [PATCH] Replace html5lib in news download subsytem --- src/calibre/web/feeds/news.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 29f966ec5e..8c750e43c1 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -686,14 +686,14 @@ class BasicNewsRecipe(Recipe): else: _raw = _raw.decode(self.encoding, 'replace') if as_tree: - import html5lib + from html5parser import parse from calibre.ebooks.chardet import strip_encoding_declarations, xml_to_unicode from calibre.utils.cleantext import clean_xml_chars if isinstance(_raw, unicode): _raw = strip_encoding_declarations(_raw) else: _raw = xml_to_unicode(_raw, strip_encoding_pats=True, resolve_entities=True)[0] - return html5lib.parse(clean_xml_chars(_raw), treebuilder='lxml', namespaceHTMLElements=False) + return parse(clean_xml_chars(_raw)) massage = list(BeautifulSoup.MARKUP_MASSAGE) enc = 'cp1252' if callable(self.encoding) or self.encoding is None else self.encoding