Conversion pipeline: If the input HTML document uses uppercase tag and attribute names, convert them to lowercase

This commit is contained in:
Kovid Goyal 2011-03-10 10:48:40 -07:00
parent 04e8972d02
commit f2d9d40146

View File

@ -908,6 +908,19 @@ class Manifest(object):
pass pass
data = first_pass(data) data = first_pass(data)
if data.tag == 'HTML':
# Lower case all tag and attribute names
data.tag = data.tag.lower()
for x in data.iterdescendants():
try:
x.tag = x.tag.lower()
for key, val in list(x.attrib.iteritems()):
del x.attrib[key]
key = key.lower()
x.attrib[key] = val
except:
pass
# Handle weird (non-HTML/fragment) files # Handle weird (non-HTML/fragment) files
if barename(data.tag) != 'html': if barename(data.tag) != 'html':
self.oeb.log.warn('File %r does not appear to be (X)HTML'%self.href) self.oeb.log.warn('File %r does not appear to be (X)HTML'%self.href)