Fix regression in converting HTML files that have non ASCII characters inside their <style> tags. Apparently Word generates these. Fixes #8494 (trouble converting htm files (for john))

This commit is contained in:
Kovid Goyal 2011-01-22 11:28:47 -07:00
parent e42664da72
commit 57883c120e

View File

@ -221,7 +221,10 @@ def rewrite_links(root, link_repl_func, resolve_base_href=False):
el.text):
stylesheet = parseString(el.text)
replaceUrls(stylesheet, link_repl_func)
el.text = '\n'+stylesheet.cssText + '\n'
repl = stylesheet.cssText
if isbytestring(repl):
repl = repl.decode('utf-8')
el.text = '\n'+ repl + '\n'
if 'style' in el.attrib:
text = el.attrib['style']
@ -234,8 +237,11 @@ def rewrite_links(root, link_repl_func, resolve_base_href=False):
set_property(item)
elif v.CSS_PRIMITIVE_VALUE == v.cssValueType:
set_property(v)
el.attrib['style'] = stext.cssText.replace('\n', ' ').replace('\r',
repl = stext.cssText.replace('\n', ' ').replace('\r',
' ')
if isbytestring(repl):
repl = repl.decode('utf-8')
el.attrib['style'] = repl