mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
DOCX Input: Add support for EMF images
DOCX Input: Add support for embedded EMF images that are just wrappers around an actual raster image. See #1224849 (DOCX to EPUB conversion failed with devide by zero)
This commit is contained in:
parent
c6c17ef976
commit
beea0a48b5
@ -91,12 +91,13 @@ def get_hpos(anchor, page_width):
|
||||
|
||||
class Images(object):
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, log):
|
||||
self.rid_map = {}
|
||||
self.used = {}
|
||||
self.names = set()
|
||||
self.all_images = set()
|
||||
self.links = []
|
||||
self.log = log
|
||||
|
||||
def __call__(self, relationships_by_id):
|
||||
self.rid_map = relationships_by_id
|
||||
@ -109,6 +110,17 @@ class Images(object):
|
||||
raw = self.docx.read(fname)
|
||||
base = base or ascii_filename(rid_map[rid].rpartition('/')[-1]).replace(' ', '_') or 'image'
|
||||
ext = what(None, raw) or base.rpartition('.')[-1] or 'jpeg'
|
||||
if ext == 'emf':
|
||||
# For an example, see: https://bugs.launchpad.net/bugs/1224849
|
||||
self.log('Found an EMF image: %s, trying to extract embedded raster image' % base)
|
||||
from calibre.utils.wmf.emf import emf_unwrap
|
||||
try:
|
||||
raw = emf_unwrap(raw)
|
||||
except Exception as e:
|
||||
self.log.exception('Failed to extract embedded raster image from EMF')
|
||||
else:
|
||||
ext = 'png'
|
||||
|
||||
base = base.rpartition('.')[0]
|
||||
if not base:
|
||||
base = 'image'
|
||||
|
@ -57,7 +57,7 @@ class Convert(object):
|
||||
self.tables = Tables()
|
||||
self.fields = Fields()
|
||||
self.styles = Styles(self.tables)
|
||||
self.images = Images()
|
||||
self.images = Images(self.log)
|
||||
self.object_map = OrderedDict()
|
||||
self.html = HTML(
|
||||
HEAD(
|
||||
|
@ -5,6 +5,8 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import struct
|
||||
|
||||
class Unavailable(Exception):
|
||||
pass
|
||||
|
||||
@ -12,3 +14,66 @@ class NoRaster(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class DIBHeader(object):
|
||||
|
||||
'''
|
||||
See http://en.wikipedia.org/wiki/BMP_file_format
|
||||
'''
|
||||
|
||||
def __init__(self, raw):
|
||||
hsize = struct.unpack(b'<I', raw[:4])[0]
|
||||
if hsize == 40:
|
||||
parts = struct.unpack(b'<IiiHHIIIIII', raw[:hsize])
|
||||
for i, attr in enumerate((
|
||||
'header_size', 'width', 'height', 'color_planes',
|
||||
'bits_per_pixel', 'compression', 'image_size',
|
||||
'hres', 'vres', 'ncols', 'nimpcols'
|
||||
)):
|
||||
setattr(self, attr, parts[i])
|
||||
elif hsize == 12:
|
||||
parts = struct.unpack(b'<IHHHH', raw[:hsize])
|
||||
for i, attr in enumerate((
|
||||
'header_size', 'width', 'height', 'color_planes',
|
||||
'bits_per_pixel')):
|
||||
setattr(self, attr, parts[i])
|
||||
else:
|
||||
raise ValueError('Unsupported DIB header type of size: %d'%hsize)
|
||||
|
||||
self.bitmasks_size = 12 if getattr(self, 'compression', 0) == 3 else 0
|
||||
self.color_table_size = 0
|
||||
if self.bits_per_pixel != 24:
|
||||
# See http://support.microsoft.com/kb/q81498/
|
||||
# for all the gory Micro and soft details
|
||||
self.color_table_size = getattr(self, 'ncols', 0) * 4
|
||||
|
||||
|
||||
def create_bmp_from_dib(raw):
|
||||
size = len(raw) + 14
|
||||
dh = DIBHeader(raw)
|
||||
pixel_array_offset = dh.header_size + dh.bitmasks_size + \
|
||||
dh.color_table_size
|
||||
parts = [b'BM', struct.pack(b'<I', size), b'\0'*4, struct.pack(b'<I',
|
||||
pixel_array_offset)]
|
||||
return b''.join(parts) + raw
|
||||
|
||||
def to_png(bmp):
|
||||
# ImageMagick does not convert some bmp files correctly, while Qt does,
|
||||
# so try Qt first. See for instance:
|
||||
# https://bugs.launchpad.net/calibre/+bug/934167
|
||||
# ImageMagick bug report:
|
||||
# http://www.imagemagick.org/discourse-server/viewtopic.php?f=3&t=20350
|
||||
from PyQt4.Qt import QImage, QByteArray, QBuffer
|
||||
i = QImage()
|
||||
if i.loadFromData(bmp):
|
||||
ba = QByteArray()
|
||||
buf = QBuffer(ba)
|
||||
buf.open(QBuffer.WriteOnly)
|
||||
i.save(buf, 'png')
|
||||
return bytes(ba.data())
|
||||
|
||||
from calibre.utils.magick import Image
|
||||
img = Image()
|
||||
img.load(bmp)
|
||||
return img.export('png')
|
||||
|
||||
|
||||
|
93
src/calibre/utils/wmf/emf.py
Normal file
93
src/calibre/utils/wmf/emf.py
Normal file
@ -0,0 +1,93 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import sys
|
||||
from struct import unpack_from
|
||||
from collections import namedtuple
|
||||
|
||||
from calibre.utils.wmf import create_bmp_from_dib, to_png
|
||||
|
||||
# Record types {{{
|
||||
RECORD_TYPES = {
|
||||
'EMR_BITBLT' : 0x4C,
|
||||
'EMR_STRETCHBLT' : 0x4D,
|
||||
'EMR_MASKBLT' : 0x4E,
|
||||
'EMR_PLGBLT' : 0x4F,
|
||||
'EMR_SETDIBITSTODEVICE' : 0x50,
|
||||
'EMR_STRETCHDIBITS' : 0x51,
|
||||
'EMR_ALPHABLEND' : 0x72,
|
||||
'EMR_TRANSPARENTBLT' : 0x74,
|
||||
'EOF' : 0xe,
|
||||
'HEADER' : 0x1,
|
||||
}
|
||||
RECORD_RMAP = {v:k for k, v in RECORD_TYPES.iteritems()}
|
||||
|
||||
StretchDiBits = namedtuple(
|
||||
'StretchDiBits', 'left top right bottom x_dest y_dest x_src y_src cx_src'
|
||||
' cy_src bmp_hdr_offset bmp_header_size bmp_bits_offset'
|
||||
' bmp_bits_size usage op dest_width dest_height')
|
||||
# }}}
|
||||
|
||||
class EMF(object):
|
||||
|
||||
def __init__(self, raw, verbose=0):
|
||||
self.pos = 0
|
||||
self.found_eof = False
|
||||
self.verbose = verbose
|
||||
self.func_map = {v:getattr(self, 'handle_%s' % (k.replace('EMR_', '').lower()), self.handle_unknown) for k, v in RECORD_TYPES.iteritems()}
|
||||
self.bitmaps = []
|
||||
while self.pos < len(raw) and not self.found_eof:
|
||||
self.read_record(raw)
|
||||
self.has_raster_image = bool(self.bitmaps)
|
||||
|
||||
def handle_unknown(self, rtype, size, raw):
|
||||
if self.verbose:
|
||||
print ('Ignoring unknown record:', RECORD_RMAP.get(rtype, hex(rtype).upper()))
|
||||
|
||||
def handle_header(self, rtype, size, raw):
|
||||
pass
|
||||
|
||||
def handle_stretchdibits(self, rtype, size, raw):
|
||||
data = StretchDiBits(*unpack_from(b'<18I', raw, 8))
|
||||
hdr = raw[data.bmp_hdr_offset:data.bmp_hdr_offset + data.bmp_header_size]
|
||||
bits = raw[data.bmp_bits_offset:data.bmp_bits_offset + data.bmp_bits_size]
|
||||
bmp = create_bmp_from_dib(hdr + bits)
|
||||
self.bitmaps.append(bmp)
|
||||
|
||||
def handle_eof(self, rtype, size, raw):
|
||||
self.found_eof = True
|
||||
|
||||
def read_record(self, raw):
|
||||
rtype, size = unpack_from(b'<II', raw, self.pos)
|
||||
record = raw[self.pos:self.pos+size]
|
||||
self.pos += size
|
||||
self.func_map.get(rtype, self.handle_unknown)(rtype, size, record)
|
||||
|
||||
def to_png(self):
|
||||
bmps = list(sorted(self.bitmaps, key=lambda x: len(x)))
|
||||
bmp = bmps[-1]
|
||||
return to_png(bmp)
|
||||
|
||||
def emf_unwrap(raw, verbose=0):
|
||||
'''
|
||||
Return the largest embedded raster image in the EMF.
|
||||
The returned data is in PNG format.
|
||||
'''
|
||||
w = EMF(raw, verbose=verbose)
|
||||
if not w.has_raster_image:
|
||||
raise ValueError('No raster image found in the EMF')
|
||||
return w.to_png()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
with open(sys.argv[-1], 'rb') as f:
|
||||
raw = f.read()
|
||||
emf = EMF(raw, verbose=4)
|
||||
open('/t/test.bmp', 'wb').write(emf.bitmaps[0])
|
||||
open('/t/test.png', 'wb').write(emf.to_png())
|
||||
|
@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
import sys, struct
|
||||
|
||||
|
||||
from calibre.utils.wmf import create_bmp_from_dib, to_png
|
||||
|
||||
class WMFHeader(object):
|
||||
|
||||
@ -34,39 +34,6 @@ class WMFHeader(object):
|
||||
|
||||
self.records_start_at = header_size * 2
|
||||
|
||||
class DIBHeader(object):
|
||||
|
||||
'''
|
||||
See http://en.wikipedia.org/wiki/BMP_file_format
|
||||
'''
|
||||
|
||||
def __init__(self, raw):
|
||||
hsize = struct.unpack('<I', raw[:4])[0]
|
||||
if hsize == 40:
|
||||
parts = struct.unpack('<IiiHHIIIIII', raw[:hsize])
|
||||
for i, attr in enumerate((
|
||||
'header_size', 'width', 'height', 'color_planes',
|
||||
'bits_per_pixel', 'compression', 'image_size',
|
||||
'hres', 'vres', 'ncols', 'nimpcols'
|
||||
)):
|
||||
setattr(self, attr, parts[i])
|
||||
elif hsize == 12:
|
||||
parts = struct.unpack('<IHHHH', raw[:hsize])
|
||||
for i, attr in enumerate((
|
||||
'header_size', 'width', 'height', 'color_planes',
|
||||
'bits_per_pixel')):
|
||||
setattr(self, attr, parts[i])
|
||||
else:
|
||||
raise ValueError('Unsupported DIB header type of size: %d'%hsize)
|
||||
|
||||
self.bitmasks_size = 12 if getattr(self, 'compression', 0) == 3 else 0
|
||||
self.color_table_size = 0
|
||||
if self.bits_per_pixel != 24:
|
||||
# See http://support.microsoft.com/kb/q81498/
|
||||
# for all the gory Micro and soft details
|
||||
self.color_table_size = getattr(self, 'ncols', 0) * 4
|
||||
|
||||
|
||||
class WMF(object):
|
||||
|
||||
def __init__(self, log=None, verbose=0):
|
||||
@ -80,7 +47,7 @@ class WMF(object):
|
||||
self.window_extent = None
|
||||
self.bitmaps = []
|
||||
|
||||
self.function_map = { # {{{
|
||||
self.function_map = { # {{{
|
||||
30: 'SaveDC',
|
||||
53: 'RealizePalette',
|
||||
55: 'SetPalEntries',
|
||||
@ -160,7 +127,7 @@ class WMF(object):
|
||||
2851: 'StretchBlt',
|
||||
2881: 'DibStretchBlt',
|
||||
3907: 'StretchDIBits'
|
||||
} # }}}
|
||||
} # }}}
|
||||
|
||||
def __call__(self, stream_or_data):
|
||||
data = stream_or_data
|
||||
@ -174,7 +141,7 @@ class WMF(object):
|
||||
self.records = []
|
||||
while offset < len(data)-6:
|
||||
size, func = struct.unpack_from('<IH', data, offset)
|
||||
size *= 2 # Convert to bytes
|
||||
size *= 2 # Convert to bytes
|
||||
offset += hsize
|
||||
params = ''
|
||||
delta = size - hsize
|
||||
@ -197,7 +164,6 @@ class WMF(object):
|
||||
|
||||
self.has_raster_image = len(self.bitmaps) > 0
|
||||
|
||||
|
||||
def SetMapMode(self, params):
|
||||
if len(params) == 2:
|
||||
self.map_mode = struct.unpack('<H', params)[0]
|
||||
@ -231,40 +197,13 @@ class WMF(object):
|
||||
dest_width, y_dest, x_dest = struct.unpack_from('<IHHHHHHHH', raw, offset)
|
||||
offset += struct.calcsize(fmt)
|
||||
bmp_data = raw[offset:]
|
||||
bmp = self.create_bmp_from_dib(bmp_data)
|
||||
bmp = create_bmp_from_dib(bmp_data)
|
||||
self.bitmaps.append(bmp)
|
||||
|
||||
def create_bmp_from_dib(self, raw):
|
||||
size = len(raw) + 14
|
||||
dh = DIBHeader(raw)
|
||||
pixel_array_offset = dh.header_size + dh.bitmasks_size + \
|
||||
dh.color_table_size
|
||||
parts = ['BM', struct.pack('<I', size), '\0'*4, struct.pack('<I',
|
||||
pixel_array_offset)]
|
||||
return ''.join(parts) + raw
|
||||
|
||||
def to_png(self):
|
||||
bmps = list(sorted(self.bitmaps, key=lambda x: len(x)))
|
||||
bmp = bmps[-1]
|
||||
|
||||
# ImageMagick does not convert some bmp files correctly, while Qt does,
|
||||
# so try Qt first. See for instance:
|
||||
# https://bugs.launchpad.net/calibre/+bug/934167
|
||||
# ImageMagick bug report:
|
||||
# http://www.imagemagick.org/discourse-server/viewtopic.php?f=3&t=20350
|
||||
from PyQt4.Qt import QImage, QByteArray, QBuffer
|
||||
i = QImage()
|
||||
if i.loadFromData(bmp):
|
||||
ba = QByteArray()
|
||||
buf = QBuffer(ba)
|
||||
buf.open(QBuffer.WriteOnly)
|
||||
i.save(buf, 'png')
|
||||
return bytes(ba.data())
|
||||
|
||||
from calibre.utils.magick import Image
|
||||
img = Image()
|
||||
img.load(bmp)
|
||||
return img.export('png')
|
||||
return to_png(bmp)
|
||||
|
||||
def wmf_unwrap(wmf_data, verbose=0):
|
||||
'''
|
||||
|
Loading…
x
Reference in New Issue
Block a user