Handle attributes from multiple body tags

This commit is contained in:
Kovid Goyal 2013-10-26 13:41:16 +05:30
parent 3e986bccf3
commit 6310c2feac
4 changed files with 40 additions and 4 deletions

View File

@ -367,6 +367,18 @@ class TreeBuilder(BaseTreeBuilder):
for child in html:
newroot.append(copy.copy(child))
def apply_body_attributes(self, attrs):
body = self.openElements[1]
nsmap = body.nsmap.copy()
attribs = process_attribs(attrs, nsmap)
for k, v in attribs.iteritems():
if k not in body.attrib:
try:
body.set(k, v)
except ValueError:
body.set(to_xml_name(k), v)
# We ignore xmlns attributes on non-first <body> tags
def insertComment(self, token, parent=None):
if parent is None:
parent = self.openElements[-1]
@ -425,6 +437,18 @@ class NoNamespaceTreeBuilder(TreeBuilder):
except ValueError:
html.set(to_xml_name(k), v)
def apply_body_attributes(self, attrs):
if not attrs:
return
body = self.openElements[1]
attribs = process_namespace_free_attribs(attrs)
for k, v in attribs.iteritems():
if k not in body.attrib:
try:
body.set(k, v)
except ValueError:
body.set(to_xml_name(k), v)
# Input Stream {{{
_regex_cache = {}

View File

@ -126,7 +126,16 @@ def entities(test, parse_function):
err = 'Entities not handled, parsed markup:\n' + etree.tostring(root)
test.assertEqual('\xa0\'', root.xpath('//*[local-name()="p"]')[0].text, err)
basic_checks = (nonvoid_cdata_elements, namespaces, space_characters, case_insensitive_element_names, entities)
def multiple_html_and_body(test, parse_function):
markup = '<html id="1"><body id="2"><p><html lang="en"><body lang="de"></p>'
root = parse_function(markup)
err = 'multiple html and body not handled, parsed markup:\n' + etree.tostring(root)
test.assertEqual(len(XPath('//h:html')(root)), 1, err)
test.assertEqual(len(XPath('//h:body')(root)), 1, err)
test.assertEqual(len(XPath('//h:html[@id and @lang]')(root)), 1, err)
test.assertEqual(len(XPath('//h:body[@id and @lang]')(root)), 1, err)
basic_checks = (nonvoid_cdata_elements, namespaces, space_characters, case_insensitive_element_names, entities, multiple_html_and_body)
class ParsingTests(BaseTest):

View File

@ -1019,9 +1019,7 @@ def getPhases(debug):
assert self.parser.innerHTML
else:
self.parser.framesetOK = False
for attr, value in token["data"].items():
if attr not in self.tree.openElements[1].attributes:
self.tree.openElements[1].attributes[attr] = value
self.tree.apply_body_attributes(token['data'])
def startTagFrameset(self, token):
self.parser.parseError("unexpected-start-tag", {"name": "frameset"})

View File

@ -274,6 +274,11 @@ class TreeBuilder(object):
if attr not in self.openElements[0].attributes:
self.openElements[0].attributes[attr] = value
def apply_body_attributes(self, attrs):
for attr, value in attrs.items():
if attr not in self.tree.openElements[1].attributes:
self.tree.openElements[1].attributes[attr] = value
def _getInsertFromTable(self):
return self._insertFromTable