Fix #1720. Fix HTML comments which aren't valid XML comments.

This commit is contained in:
Marshall T. Vandegrift 2009-01-28 12:04:22 -05:00
parent e2a2701e23
commit 763ceacafb

View File

@ -353,9 +353,13 @@ class Manifest(object):
try:
data = etree.fromstring(data)
except etree.XMLSyntaxError:
# TODO: Factor out HTML->XML coercion
self.oeb.logger.warn('Parsing file %r as HTML' % self.href)
data = html.fromstring(data)
data.attrib.pop('xmlns', None)
for elem in data.iter(tag=etree.Comment):
if elem.text:
elem.text = elem.text.strip('-')
data = etree.tostring(data, encoding=unicode)
data = etree.fromstring(data)
# Force into the XHTML namespace