This commit is contained in:
Kovid Goyal 2011-12-31 15:38:23 +05:30
parent b005840838
commit 915fa2f8e0

View File

@ -80,7 +80,9 @@ def node_depth(node):
def html5_parse(data, max_nesting_depth=100):
import html5lib
# html5lib bug: http://code.google.com/p/html5lib/issues/detail?id=195
data = re.sub(r'<\s*title\s*/\s*>', '<title></title>', data)
data = html5lib.parse(data, treebuilder='lxml').getroot()
# Check that the asinine HTML 5 algorithm did not result in a tree with