mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
py3: More fixes to RTF input
Embedded WMF image processing now works
This commit is contained in:
parent
037e28a442
commit
cb5ac309fa
@ -1,4 +1,4 @@
|
|||||||
from __future__ import with_statement
|
from __future__ import with_statement, unicode_literals
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
@ -118,20 +118,21 @@ class RTFInput(InputFormatPlugin):
|
|||||||
|
|
||||||
def extract_images(self, picts):
|
def extract_images(self, picts):
|
||||||
from calibre.utils.imghdr import what
|
from calibre.utils.imghdr import what
|
||||||
|
from binascii import unhexlify
|
||||||
self.log('Extracting images...')
|
self.log('Extracting images...')
|
||||||
|
|
||||||
with open(picts, 'rb') as f:
|
with open(picts, 'rb') as f:
|
||||||
raw = f.read()
|
raw = f.read()
|
||||||
picts = filter(len, re.findall(r'\{\\pict([^}]+)\}', raw))
|
picts = filter(len, re.findall(br'\{\\pict([^}]+)\}', raw))
|
||||||
hex = re.compile(r'[^a-fA-F0-9]')
|
hex_pat = re.compile(br'[^a-fA-F0-9]')
|
||||||
encs = [hex.sub('', pict) for pict in picts]
|
encs = [hex_pat.sub(b'', pict) for pict in picts]
|
||||||
|
|
||||||
count = 0
|
count = 0
|
||||||
imap = {}
|
imap = {}
|
||||||
for enc in encs:
|
for enc in encs:
|
||||||
if len(enc) % 2 == 1:
|
if len(enc) % 2 == 1:
|
||||||
enc = enc[:-1]
|
enc = enc[:-1]
|
||||||
data = enc.decode('hex')
|
data = unhexlify(enc)
|
||||||
fmt = what(None, data)
|
fmt = what(None, data)
|
||||||
if fmt is None:
|
if fmt is None:
|
||||||
fmt = 'wmf'
|
fmt = 'wmf'
|
||||||
@ -158,7 +159,7 @@ class RTFInput(InputFormatPlugin):
|
|||||||
return name
|
return name
|
||||||
try:
|
try:
|
||||||
return self.rasterize_wmf(name)
|
return self.rasterize_wmf(name)
|
||||||
except:
|
except Exception:
|
||||||
self.log.exception('Failed to convert WMF image %r'%name)
|
self.log.exception('Failed to convert WMF image %r'%name)
|
||||||
return self.replace_wmf(name)
|
return self.replace_wmf(name)
|
||||||
|
|
||||||
@ -168,7 +169,7 @@ class RTFInput(InputFormatPlugin):
|
|||||||
return '__REMOVE_ME__'
|
return '__REMOVE_ME__'
|
||||||
from calibre.ebooks.covers import message_image
|
from calibre.ebooks.covers import message_image
|
||||||
if self.default_img is None:
|
if self.default_img is None:
|
||||||
self.default_img = message_image('Conversion of WMF images is not supported.',
|
self.default_img = message_image('Conversion of WMF images is not supported.'
|
||||||
' Use Microsoft Word or OpenOffice to save this RTF file'
|
' Use Microsoft Word or OpenOffice to save this RTF file'
|
||||||
' as HTML and convert that in calibre.')
|
' as HTML and convert that in calibre.')
|
||||||
name = name.replace('.wmf', '.jpg')
|
name = name.replace('.wmf', '.jpg')
|
||||||
@ -287,15 +288,15 @@ class RTFInput(InputFormatPlugin):
|
|||||||
result = transform(doc)
|
result = transform(doc)
|
||||||
html = u'index.xhtml'
|
html = u'index.xhtml'
|
||||||
with open(html, 'wb') as f:
|
with open(html, 'wb') as f:
|
||||||
res = transform.tostring(result)
|
res = as_bytes(transform.tostring(result))
|
||||||
# res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
|
# res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
|
||||||
# clean multiple \n
|
# clean multiple \n
|
||||||
res = re.sub('\n+', '\n', res)
|
res = re.sub(b'\n+', b'\n', res)
|
||||||
# Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines
|
# Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines
|
||||||
# res = re.sub('\s*<body>', '<body>', res)
|
# res = re.sub('\s*<body>', '<body>', res)
|
||||||
# res = re.sub('(?<=\n)\n{2}',
|
# res = re.sub('(?<=\n)\n{2}',
|
||||||
# u'<p>\u00a0</p>\n'.encode('utf-8'), res)
|
# u'<p>\u00a0</p>\n'.encode('utf-8'), res)
|
||||||
f.write(as_bytes(res))
|
f.write(res)
|
||||||
self.write_inline_css(inline_class, border_styles)
|
self.write_inline_css(inline_class, border_styles)
|
||||||
stream.seek(0)
|
stream.seek(0)
|
||||||
mi = get_metadata(stream, 'rtf')
|
mi = get_metadata(stream, 'rtf')
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
@ -145,7 +146,7 @@ class WMF(object):
|
|||||||
size, func = struct.unpack_from('<IH', data, offset)
|
size, func = struct.unpack_from('<IH', data, offset)
|
||||||
size *= 2 # Convert to bytes
|
size *= 2 # Convert to bytes
|
||||||
offset += hsize
|
offset += hsize
|
||||||
params = ''
|
params = b''
|
||||||
delta = size - hsize
|
delta = size - hsize
|
||||||
if delta > 0:
|
if delta > 0:
|
||||||
params = data[offset:offset+delta]
|
params = data[offset:offset+delta]
|
||||||
@ -158,6 +159,8 @@ class WMF(object):
|
|||||||
self.records.append((func, params))
|
self.records.append((func, params))
|
||||||
|
|
||||||
for rec in self.records:
|
for rec in self.records:
|
||||||
|
if not hasattr(rec[0], 'split'):
|
||||||
|
continue
|
||||||
f = getattr(self, rec[0], None)
|
f = getattr(self, rec[0], None)
|
||||||
if callable(f):
|
if callable(f):
|
||||||
f(rec[1])
|
f(rec[1])
|
||||||
|
Loading…
x
Reference in New Issue
Block a user