Fix a few failling tests from the transition to html5-parser

This commit is contained in:
Kovid Goyal 2017-07-15 10:32:48 +05:30
parent 7aa05e70d3
commit 37bd4825dc
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 6 additions and 5 deletions

View File

@ -93,7 +93,8 @@ def node_depth(node):
def html5_parse(data, max_nesting_depth=100):
from html5_parser import parse
data = parse(data, maybe_xhtml=True, keep_doctype=False, sanitize_names=True)
from calibre.utils.cleantext import clean_xml_chars
data = parse(clean_xml_chars(data), maybe_xhtml=True, keep_doctype=False, sanitize_names=True)
# Check that the asinine HTML 5 algorithm did not result in a tree with
# insane nesting depths
for x in data.iterdescendants():

View File

@ -63,14 +63,14 @@ def namespaces(test, parse_function):
root = parse_function(markup)
err = 'Incorrect parsing, parsed markup:\n' + etree.tostring(root)
match_and_prefix(root, '//h:body[@id="test"]', None, err)
match_and_prefix(root, '//svg:svg', None if parse_function is parse else 'svg', err)
match_and_prefix(root, '//svg:image[@xl:href]', None if parse_function is parse else 'svg', err)
match_and_prefix(root, '//svg:svg', None, err)
match_and_prefix(root, '//svg:image[@xl:href]', None, err)
markup = '<html><body><svg><image xlink:href="xxx"></svg>'
root = parse_function(markup)
err = 'Namespaces not created, parsed markup:\n' + etree.tostring(root)
match_and_prefix(root, '//svg:svg', None if parse_function is parse else 'svg', err)
match_and_prefix(root, '//svg:image[@xl:href]', None if parse_function is parse else 'svg', err)
match_and_prefix(root, '//svg:svg', None, err)
match_and_prefix(root, '//svg:image[@xl:href]', None, err)
if parse_function is parse:
image = XPath('//svg:image')(root)[0]
ae(image.nsmap, {'xlink':XLINK_NS, None:SVG_NS})