From f2d9d401465ffc40e3c877ef0bbc3630016a7fb4 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 10 Mar 2011 10:48:40 -0700 Subject: [PATCH] Conversion pipeline: If the input HTML document uses uppercase tag and attribute names, convert them to lowercase --- src/calibre/ebooks/oeb/base.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 7e99916fc3..7f3f40184c 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -908,6 +908,19 @@ class Manifest(object): pass data = first_pass(data) + if data.tag == 'HTML': + # Lower case all tag and attribute names + data.tag = data.tag.lower() + for x in data.iterdescendants(): + try: + x.tag = x.tag.lower() + for key, val in list(x.attrib.iteritems()): + del x.attrib[key] + key = key.lower() + x.attrib[key] = val + except: + pass + # Handle weird (non-HTML/fragment) files if barename(data.tag) != 'html': self.oeb.log.warn('File %r does not appear to be (X)HTML'%self.href)