mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Handle attributes from multiple body tags
This commit is contained in:
parent
3e986bccf3
commit
6310c2feac
@ -367,6 +367,18 @@ class TreeBuilder(BaseTreeBuilder):
|
|||||||
for child in html:
|
for child in html:
|
||||||
newroot.append(copy.copy(child))
|
newroot.append(copy.copy(child))
|
||||||
|
|
||||||
|
def apply_body_attributes(self, attrs):
|
||||||
|
body = self.openElements[1]
|
||||||
|
nsmap = body.nsmap.copy()
|
||||||
|
attribs = process_attribs(attrs, nsmap)
|
||||||
|
for k, v in attribs.iteritems():
|
||||||
|
if k not in body.attrib:
|
||||||
|
try:
|
||||||
|
body.set(k, v)
|
||||||
|
except ValueError:
|
||||||
|
body.set(to_xml_name(k), v)
|
||||||
|
# We ignore xmlns attributes on non-first <body> tags
|
||||||
|
|
||||||
def insertComment(self, token, parent=None):
|
def insertComment(self, token, parent=None):
|
||||||
if parent is None:
|
if parent is None:
|
||||||
parent = self.openElements[-1]
|
parent = self.openElements[-1]
|
||||||
@ -425,6 +437,18 @@ class NoNamespaceTreeBuilder(TreeBuilder):
|
|||||||
except ValueError:
|
except ValueError:
|
||||||
html.set(to_xml_name(k), v)
|
html.set(to_xml_name(k), v)
|
||||||
|
|
||||||
|
def apply_body_attributes(self, attrs):
|
||||||
|
if not attrs:
|
||||||
|
return
|
||||||
|
body = self.openElements[1]
|
||||||
|
attribs = process_namespace_free_attribs(attrs)
|
||||||
|
for k, v in attribs.iteritems():
|
||||||
|
if k not in body.attrib:
|
||||||
|
try:
|
||||||
|
body.set(k, v)
|
||||||
|
except ValueError:
|
||||||
|
body.set(to_xml_name(k), v)
|
||||||
|
|
||||||
# Input Stream {{{
|
# Input Stream {{{
|
||||||
_regex_cache = {}
|
_regex_cache = {}
|
||||||
|
|
||||||
|
@ -126,7 +126,16 @@ def entities(test, parse_function):
|
|||||||
err = 'Entities not handled, parsed markup:\n' + etree.tostring(root)
|
err = 'Entities not handled, parsed markup:\n' + etree.tostring(root)
|
||||||
test.assertEqual('\xa0\'', root.xpath('//*[local-name()="p"]')[0].text, err)
|
test.assertEqual('\xa0\'', root.xpath('//*[local-name()="p"]')[0].text, err)
|
||||||
|
|
||||||
basic_checks = (nonvoid_cdata_elements, namespaces, space_characters, case_insensitive_element_names, entities)
|
def multiple_html_and_body(test, parse_function):
|
||||||
|
markup = '<html id="1"><body id="2"><p><html lang="en"><body lang="de"></p>'
|
||||||
|
root = parse_function(markup)
|
||||||
|
err = 'multiple html and body not handled, parsed markup:\n' + etree.tostring(root)
|
||||||
|
test.assertEqual(len(XPath('//h:html')(root)), 1, err)
|
||||||
|
test.assertEqual(len(XPath('//h:body')(root)), 1, err)
|
||||||
|
test.assertEqual(len(XPath('//h:html[@id and @lang]')(root)), 1, err)
|
||||||
|
test.assertEqual(len(XPath('//h:body[@id and @lang]')(root)), 1, err)
|
||||||
|
|
||||||
|
basic_checks = (nonvoid_cdata_elements, namespaces, space_characters, case_insensitive_element_names, entities, multiple_html_and_body)
|
||||||
|
|
||||||
class ParsingTests(BaseTest):
|
class ParsingTests(BaseTest):
|
||||||
|
|
||||||
|
@ -1019,9 +1019,7 @@ def getPhases(debug):
|
|||||||
assert self.parser.innerHTML
|
assert self.parser.innerHTML
|
||||||
else:
|
else:
|
||||||
self.parser.framesetOK = False
|
self.parser.framesetOK = False
|
||||||
for attr, value in token["data"].items():
|
self.tree.apply_body_attributes(token['data'])
|
||||||
if attr not in self.tree.openElements[1].attributes:
|
|
||||||
self.tree.openElements[1].attributes[attr] = value
|
|
||||||
|
|
||||||
def startTagFrameset(self, token):
|
def startTagFrameset(self, token):
|
||||||
self.parser.parseError("unexpected-start-tag", {"name": "frameset"})
|
self.parser.parseError("unexpected-start-tag", {"name": "frameset"})
|
||||||
|
@ -274,6 +274,11 @@ class TreeBuilder(object):
|
|||||||
if attr not in self.openElements[0].attributes:
|
if attr not in self.openElements[0].attributes:
|
||||||
self.openElements[0].attributes[attr] = value
|
self.openElements[0].attributes[attr] = value
|
||||||
|
|
||||||
|
def apply_body_attributes(self, attrs):
|
||||||
|
for attr, value in attrs.items():
|
||||||
|
if attr not in self.tree.openElements[1].attributes:
|
||||||
|
self.tree.openElements[1].attributes[attr] = value
|
||||||
|
|
||||||
def _getInsertFromTable(self):
|
def _getInsertFromTable(self):
|
||||||
return self._insertFromTable
|
return self._insertFromTable
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user