This commit is contained in:
Kovid Goyal 2011-12-31 16:30:22 +05:30
parent 915fa2f8e0
commit b6d14d49c1
2 changed files with 3 additions and 2 deletions

View File

@ -81,7 +81,7 @@ def node_depth(node):
def html5_parse(data, max_nesting_depth=100):
import html5lib
# html5lib bug: http://code.google.com/p/html5lib/issues/detail?id=195
data = re.sub(r'<\s*title\s*/\s*>', '<title></title>', data)
data = re.sub(r'<\s*title\s*[^>]*/\s*>', '<title></title>', data)
data = html5lib.parse(data, treebuilder='lxml').getroot()

View File

@ -47,7 +47,8 @@ class PMLInput(InputFormatPlugin):
self.log.debug('Converting PML to HTML...')
hizer = PML_HTMLizer()
html = hizer.parse_pml(pml_stream.read().decode(ienc), html_path)
html_stream.write('<html><head><title /></head><body>%s</body></html>' % html.encode('utf-8', 'replace'))
html = '<html><head><title></title></head><body>%s</body></html>'%html
html_stream.write(html.encode('utf-8', 'replace'))
if pclose:
pml_stream.close()