diff --git a/src/calibre/ebooks/conversion/plugins/rtf_input.py b/src/calibre/ebooks/conversion/plugins/rtf_input.py index b8b4d50604..6093c5a6c3 100644 --- a/src/calibre/ebooks/conversion/plugins/rtf_input.py +++ b/src/calibre/ebooks/conversion/plugins/rtf_input.py @@ -1,4 +1,4 @@ -from __future__ import with_statement +from __future__ import with_statement, unicode_literals __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' @@ -118,20 +118,21 @@ class RTFInput(InputFormatPlugin): def extract_images(self, picts): from calibre.utils.imghdr import what + from binascii import unhexlify self.log('Extracting images...') with open(picts, 'rb') as f: raw = f.read() - picts = filter(len, re.findall(r'\{\\pict([^}]+)\}', raw)) - hex = re.compile(r'[^a-fA-F0-9]') - encs = [hex.sub('', pict) for pict in picts] + picts = filter(len, re.findall(br'\{\\pict([^}]+)\}', raw)) + hex_pat = re.compile(br'[^a-fA-F0-9]') + encs = [hex_pat.sub(b'', pict) for pict in picts] count = 0 imap = {} for enc in encs: if len(enc) % 2 == 1: enc = enc[:-1] - data = enc.decode('hex') + data = unhexlify(enc) fmt = what(None, data) if fmt is None: fmt = 'wmf' @@ -158,7 +159,7 @@ class RTFInput(InputFormatPlugin): return name try: return self.rasterize_wmf(name) - except: + except Exception: self.log.exception('Failed to convert WMF image %r'%name) return self.replace_wmf(name) @@ -168,7 +169,7 @@ class RTFInput(InputFormatPlugin): return '__REMOVE_ME__' from calibre.ebooks.covers import message_image if self.default_img is None: - self.default_img = message_image('Conversion of WMF images is not supported.', + self.default_img = message_image('Conversion of WMF images is not supported.' ' Use Microsoft Word or OpenOffice to save this RTF file' ' as HTML and convert that in calibre.') name = name.replace('.wmf', '.jpg') @@ -287,15 +288,15 @@ class RTFInput(InputFormatPlugin): result = transform(doc) html = u'index.xhtml' with open(html, 'wb') as f: - res = transform.tostring(result) + res = as_bytes(transform.tostring(result)) # res = res[:100].replace('xmlns:html', 'xmlns') + res[100:] # clean multiple \n - res = re.sub('\n+', '\n', res) + res = re.sub(b'\n+', b'\n', res) # Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines # res = re.sub('\s*', '', res) # res = re.sub('(?<=\n)\n{2}', # u'

\u00a0

\n'.encode('utf-8'), res) - f.write(as_bytes(res)) + f.write(res) self.write_inline_css(inline_class, border_styles) stream.seek(0) mi = get_metadata(stream, 'rtf') diff --git a/src/calibre/utils/wmf/parse.py b/src/calibre/utils/wmf/parse.py index 4573e71044..196970ed6a 100644 --- a/src/calibre/utils/wmf/parse.py +++ b/src/calibre/utils/wmf/parse.py @@ -1,5 +1,6 @@ #!/usr/bin/env python2 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import absolute_import, division, print_function, unicode_literals __license__ = 'GPL v3' __copyright__ = '2011, Kovid Goyal ' @@ -145,7 +146,7 @@ class WMF(object): size, func = struct.unpack_from(' 0: params = data[offset:offset+delta] @@ -158,6 +159,8 @@ class WMF(object): self.records.append((func, params)) for rec in self.records: + if not hasattr(rec[0], 'split'): + continue f = getattr(self, rec[0], None) if callable(f): f(rec[1])