Fix regression in converting HTML files that have non ASCII characters inside their <style> tags. Apparently Word generates these. Fixes #8494 (trouble converting htm files (for john))

2025-07-09 03:04:10 -04:00 · 2011-01-22 11:28:47 -07:00 · 2011-01-22 11:28:47 -07:00 · 57883c120e
commit 57883c120e
parent e42664da72
1 changed files with 8 additions and 2 deletions
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -221,7 +221,10 @@ def rewrite_links(root, link_repl_func, resolve_base_href=False):
                        el.text):
            stylesheet = parseString(el.text)
            replaceUrls(stylesheet, link_repl_func)
-            el.text = '\n'+stylesheet.cssText + '\n'
+            repl = stylesheet.cssText
+            if isbytestring(repl):
+                repl = repl.decode('utf-8')
+            el.text = '\n'+ repl + '\n'

        if 'style' in el.attrib:
            text = el.attrib['style']
@ -234,8 +237,11 @@ def rewrite_links(root, link_repl_func, resolve_base_href=False):
                            set_property(item)
                    elif v.CSS_PRIMITIVE_VALUE == v.cssValueType:
                        set_property(v)
-                el.attrib['style'] = stext.cssText.replace('\n', ' ').replace('\r',
+                repl = stext.cssText.replace('\n', ' ').replace('\r',
                        ' ')
+                if isbytestring(repl):
+                    repl = repl.decode('utf-8')
+                el.attrib['style'] = repl