From 915fa2f8e0c8440d877abe3e88b0955eb1b1b49f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 31 Dec 2011 15:38:23 +0530 Subject: [PATCH] ... --- src/calibre/ebooks/oeb/parse_utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/calibre/ebooks/oeb/parse_utils.py b/src/calibre/ebooks/oeb/parse_utils.py index a10d352d6d..3b50301a5b 100644 --- a/src/calibre/ebooks/oeb/parse_utils.py +++ b/src/calibre/ebooks/oeb/parse_utils.py @@ -80,7 +80,9 @@ def node_depth(node): def html5_parse(data, max_nesting_depth=100): import html5lib + # html5lib bug: http://code.google.com/p/html5lib/issues/detail?id=195 data = re.sub(r'<\s*title\s*/\s*>', '', data) + data = html5lib.parse(data, treebuilder='lxml').getroot() # Check that the asinine HTML 5 algorithm did not result in a tree with