py3: More fixes to RTF input

Embedded WMF image processing now works
2025-08-30 23:00:21 -04:00 · 2019-05-20 15:50:36 +05:30 · 2019-05-20 15:50:36 +05:30 · cb5ac309fa
commit cb5ac309fa
parent 037e28a442
2 changed files with 15 additions and 11 deletions
--- a/src/calibre/ebooks/conversion/plugins/rtf_input.py
+++ b/src/calibre/ebooks/conversion/plugins/rtf_input.py
@ -1,4 +1,4 @@
-from __future__ import with_statement
+from __future__ import with_statement, unicode_literals
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
@ -118,20 +118,21 @@ class RTFInput(InputFormatPlugin):
    def extract_images(self, picts):
        from calibre.utils.imghdr import what
        from binascii import unhexlify
        self.log('Extracting images...')
        with open(picts, 'rb') as f:
            raw = f.read()
-        picts = filter(len, re.findall(r'\{\\pict([^}]+)\}', raw))
+        picts = filter(len, re.findall(br'\{\\pict([^}]+)\}', raw))
-        hex = re.compile(r'[^a-fA-F0-9]')
+        hex_pat = re.compile(br'[^a-fA-F0-9]')
-        encs = [hex.sub('', pict) for pict in picts]
+        encs = [hex_pat.sub(b'', pict) for pict in picts]
        count = 0
        imap = {}
        for enc in encs:
            if len(enc) % 2 == 1:
                enc = enc[:-1]
-            data = enc.decode('hex')
+            data = unhexlify(enc)
            fmt = what(None, data)
            if fmt is None:
                fmt = 'wmf'
@ -158,7 +159,7 @@ class RTFInput(InputFormatPlugin):
            return name
        try:
            return self.rasterize_wmf(name)
-        except:
+        except Exception:
            self.log.exception('Failed to convert WMF image %r'%name)
        return self.replace_wmf(name)
@ -168,7 +169,7 @@ class RTFInput(InputFormatPlugin):
            return '__REMOVE_ME__'
        from calibre.ebooks.covers import message_image
        if self.default_img is None:
-            self.default_img = message_image('Conversion of WMF images is not supported.',
+            self.default_img = message_image('Conversion of WMF images is not supported.'
            ' Use Microsoft Word or OpenOffice to save this RTF file'
            ' as HTML and convert that in calibre.')
        name = name.replace('.wmf', '.jpg')
@ -287,15 +288,15 @@ class RTFInput(InputFormatPlugin):
        result = transform(doc)
        html = u'index.xhtml'
        with open(html, 'wb') as f:
-            res = transform.tostring(result)
+            res = as_bytes(transform.tostring(result))
            # res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
            # clean multiple \n
-            res = re.sub('\n+', '\n', res)
+            res = re.sub(b'\n+', b'\n', res)
            # Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines
            # res = re.sub('\s*<body>', '<body>', res)
            # res = re.sub('(?<=\n)\n{2}',
            # u'<p>\u00a0</p>\n'.encode('utf-8'), res)
-            f.write(as_bytes(res))
+            f.write(res)
        self.write_inline_css(inline_class, border_styles)
        stream.seek(0)
        mi = get_metadata(stream, 'rtf')
--- a/src/calibre/utils/wmf/parse.py
+++ b/src/calibre/utils/wmf/parse.py
@ -1,5 +1,6 @@
 #!/usr/bin/env python2
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import absolute_import, division, print_function, unicode_literals
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
@ -145,7 +146,7 @@ class WMF(object):
            size, func = struct.unpack_from('<IH', data, offset)
            size *= 2  # Convert to bytes
            offset += hsize
-            params = ''
+            params = b''
            delta = size - hsize
            if delta > 0:
                params = data[offset:offset+delta]
@ -158,6 +159,8 @@ class WMF(object):
            self.records.append((func, params))
        for rec in self.records:
            if not hasattr(rec[0], 'split'):
                continue
            f = getattr(self, rec[0], None)
            if callable(f):
                f(rec[1])