From 3219cb3aa57d1fba8ed1e67c8e84a21ddde839db Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 8 Aug 2009 14:47:14 -0600 Subject: [PATCH] Fix #3106 (Mobi book will not convert to ePub) --- src/calibre/ebooks/oeb/base.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 993edea279..6ef95f62d7 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -795,14 +795,18 @@ class Manifest(object): def first_pass(data): try: data = etree.fromstring(data) - except etree.XMLSyntaxError: + except etree.XMLSyntaxError, err: repl = lambda m: ENTITYDEFS.get(m.group(1), m.group(0)) data = ENTITY_RE.sub(repl, data) try: data = etree.fromstring(data) - except etree.XMLSyntaxError: + except etree.XMLSyntaxError, err: self.oeb.logger.warn('Parsing file %r as HTML' % self.href) - data = html.fromstring(data) + if err.args and err.args[0].startswith('Excessive depth'): + from lxml.html import soupparser + data = soupparser.fromstring(data) + else: + data = html.fromstring(data) data.attrib.pop('xmlns', None) for elem in data.iter(tag=etree.Comment): if elem.text: