From 37bd4825dc8dcab97f3c40e733fbb10a8b5cf7a7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 15 Jul 2017 10:32:48 +0530 Subject: [PATCH] Fix a few failling tests from the transition to html5-parser --- src/calibre/ebooks/oeb/parse_utils.py | 3 ++- src/calibre/ebooks/oeb/polish/tests/parsing.py | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/calibre/ebooks/oeb/parse_utils.py b/src/calibre/ebooks/oeb/parse_utils.py index cf4dc02a33..5883241883 100644 --- a/src/calibre/ebooks/oeb/parse_utils.py +++ b/src/calibre/ebooks/oeb/parse_utils.py @@ -93,7 +93,8 @@ def node_depth(node): def html5_parse(data, max_nesting_depth=100): from html5_parser import parse - data = parse(data, maybe_xhtml=True, keep_doctype=False, sanitize_names=True) + from calibre.utils.cleantext import clean_xml_chars + data = parse(clean_xml_chars(data), maybe_xhtml=True, keep_doctype=False, sanitize_names=True) # Check that the asinine HTML 5 algorithm did not result in a tree with # insane nesting depths for x in data.iterdescendants(): diff --git a/src/calibre/ebooks/oeb/polish/tests/parsing.py b/src/calibre/ebooks/oeb/polish/tests/parsing.py index da496d9090..43c81550dc 100644 --- a/src/calibre/ebooks/oeb/polish/tests/parsing.py +++ b/src/calibre/ebooks/oeb/polish/tests/parsing.py @@ -63,14 +63,14 @@ def namespaces(test, parse_function): root = parse_function(markup) err = 'Incorrect parsing, parsed markup:\n' + etree.tostring(root) match_and_prefix(root, '//h:body[@id="test"]', None, err) - match_and_prefix(root, '//svg:svg', None if parse_function is parse else 'svg', err) - match_and_prefix(root, '//svg:image[@xl:href]', None if parse_function is parse else 'svg', err) + match_and_prefix(root, '//svg:svg', None, err) + match_and_prefix(root, '//svg:image[@xl:href]', None, err) markup = '' root = parse_function(markup) err = 'Namespaces not created, parsed markup:\n' + etree.tostring(root) - match_and_prefix(root, '//svg:svg', None if parse_function is parse else 'svg', err) - match_and_prefix(root, '//svg:image[@xl:href]', None if parse_function is parse else 'svg', err) + match_and_prefix(root, '//svg:svg', None, err) + match_and_prefix(root, '//svg:image[@xl:href]', None, err) if parse_function is parse: image = XPath('//svg:image')(root)[0] ae(image.nsmap, {'xlink':XLINK_NS, None:SVG_NS})