mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
...
This commit is contained in:
parent
b005840838
commit
915fa2f8e0
@ -80,7 +80,9 @@ def node_depth(node):
|
|||||||
|
|
||||||
def html5_parse(data, max_nesting_depth=100):
|
def html5_parse(data, max_nesting_depth=100):
|
||||||
import html5lib
|
import html5lib
|
||||||
|
# html5lib bug: http://code.google.com/p/html5lib/issues/detail?id=195
|
||||||
data = re.sub(r'<\s*title\s*/\s*>', '<title></title>', data)
|
data = re.sub(r'<\s*title\s*/\s*>', '<title></title>', data)
|
||||||
|
|
||||||
data = html5lib.parse(data, treebuilder='lxml').getroot()
|
data = html5lib.parse(data, treebuilder='lxml').getroot()
|
||||||
|
|
||||||
# Check that the asinine HTML 5 algorithm did not result in a tree with
|
# Check that the asinine HTML 5 algorithm did not result in a tree with
|
||||||
|
Loading…
x
Reference in New Issue
Block a user