mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Ensure that svg, mathml tags and xlink attrs have the standard namespace prefixes
This commit is contained in:
parent
280add23ca
commit
d93467b3a3
@ -23,8 +23,9 @@ from calibre.utils.cleantext import clean_xml_chars
|
||||
|
||||
infoset_filter = InfosetFilter()
|
||||
to_xml_name = infoset_filter.toXmlName
|
||||
known_namespaces = {namespaces[k]:k for k in ('mathml', 'svg')}
|
||||
known_namespaces = {namespaces[k]:k for k in ('mathml', 'svg', 'xlink')}
|
||||
html_ns = namespaces['html']
|
||||
xlink_ns = namespaces['xlink']
|
||||
|
||||
class NamespacedHTMLPresent(ValueError):
|
||||
|
||||
@ -213,6 +214,11 @@ def process_attribs(attrs, nsmap):
|
||||
else:
|
||||
nsmap[name] = v
|
||||
else:
|
||||
if k[2] == xlink_ns and 'xlink' not in nsmap:
|
||||
for prefix, ns in tuple(nsmap.iteritems()):
|
||||
if ns == xlink_ns:
|
||||
del nsmap[prefix]
|
||||
nsmap['xlink'] = xlink_ns
|
||||
attribs['{%s}%s' % (k[2], k[1])] = v
|
||||
else:
|
||||
if ':' in k:
|
||||
@ -283,13 +289,14 @@ class TreeBuilder(BaseTreeBuilder):
|
||||
attribs = {to_xml_name(k):v for k, v in attribs.iteritems()}
|
||||
elem = self.lxml_context.makeelement('{%s}%s' % (namespace, to_xml_name(name)), attrib=attribs, nsmap=nsmap)
|
||||
|
||||
# Ensure that svg and mathml elements get nice namespace prefixes if
|
||||
# the input document is HTML 5 with no namespace information
|
||||
if elem.prefix is not None and elem.prefix.startswith('ns') and namespace not in set(nsmap.itervalues()) and namespace in known_namespaces:
|
||||
prefix = known_namespaces[namespace]
|
||||
if prefix not in nsmap:
|
||||
nsmap[prefix] = namespace
|
||||
# Ensure that svg and mathml elements get no namespace prefixes
|
||||
if elem.prefix is not None and namespace in known_namespaces:
|
||||
for k, v in tuple(nsmap.iteritems()):
|
||||
if v == namespace:
|
||||
del nsmap[k]
|
||||
nsmap[None] = namespace
|
||||
elem = self.lxml_context.makeelement(elem.tag, attrib=elem.attrib, nsmap=nsmap)
|
||||
|
||||
# Keep a reference to elem so that lxml does not delete and re-create
|
||||
# it, losing the name related attributes
|
||||
self.proxy_cache.append(elem)
|
||||
@ -422,7 +429,7 @@ def parse(raw, decoder=None, log=None, discard_namespaces=False):
|
||||
if __name__ == '__main__':
|
||||
from lxml import etree
|
||||
# root = parse('\n<html><head><title>a\n</title><p> \n<b>b', discard_namespaces=False)
|
||||
root = parse('\n<html><p><svg><image /><b></svg> \n<b>xxx', discard_namespaces=True)
|
||||
root = parse('\n<html><p><svg viewbox="0 0 0 0"><image xlink:href="xxx"/><b></svg> \n<b>xxx', discard_namespaces=False)
|
||||
print (etree.tostring(root, encoding='utf-8'))
|
||||
print()
|
||||
|
||||
|
@ -48,8 +48,8 @@ def namespaces(test, parse_function):
|
||||
root = parse_function(markup)
|
||||
err = 'Incorrect parsing, parsed markup:\n' + etree.tostring(root)
|
||||
match_and_prefix(root, '//h:body[@id="test"]', None, err)
|
||||
match_and_prefix(root, '//svg:svg', 'svg', err)
|
||||
match_and_prefix(root, '//svg:image[@xl:href]', 'svg', err)
|
||||
match_and_prefix(root, '//svg:svg', None if parse_function is parse else 'svg', err)
|
||||
match_and_prefix(root, '//svg:image[@xl:href]', None if parse_function is parse else 'svg', err)
|
||||
|
||||
markup = '''
|
||||
<html xmlns="{xhtml}"><head><body id="test">
|
||||
@ -64,8 +64,11 @@ def namespaces(test, parse_function):
|
||||
markup = '<html><body><svg><image xlink:href="xxx"></svg>'
|
||||
root = parse_function(markup)
|
||||
err = 'Namespaces not created, parsed markup:\n' + etree.tostring(root)
|
||||
match_and_prefix(root, '//svg:svg', 'svg', err)
|
||||
match_and_prefix(root, '//svg:image[@xl:href]', 'svg', err)
|
||||
match_and_prefix(root, '//svg:svg', None if parse_function is parse else 'svg', err)
|
||||
match_and_prefix(root, '//svg:image[@xl:href]', None if parse_function is parse else 'svg', err)
|
||||
if parse_function is parse:
|
||||
image = XPath('//svg:image')(root)[0]
|
||||
ae(image.nsmap, {'xlink':XLINK_NS, None:SVG_NS})
|
||||
|
||||
root = parse_function('<html id="a"><p><html xmlns:x="y" lang="en"><p>')
|
||||
err = 'Multiple HTML tags not handled, parsed markup:\n' + etree.tostring(root)
|
||||
|
Loading…
x
Reference in New Issue
Block a user