DOCX Input: Automatically resize large images used as bullets in lists, to compensate for the fact that HTML renderers do not do this.

This commit is contained in:
Kovid Goyal 2015-04-14 10:40:55 +05:30
parent 0c320ae75d
commit 93e8d21e7f
2 changed files with 42 additions and 11 deletions

View File

@ -10,10 +10,12 @@ import os
from lxml.html.builder import IMG, HR from lxml.html.builder import IMG, HR
from calibre import fit_image
from calibre.constants import iswindows from calibre.constants import iswindows
from calibre.ebooks.docx.names import barename from calibre.ebooks.docx.names import barename
from calibre.utils.filenames import ascii_filename from calibre.utils.filenames import ascii_filename
from calibre.utils.imghdr import what from calibre.utils.imghdr import what
from calibre.utils.magick import Image
class LinkedImageNotFound(ValueError): class LinkedImageNotFound(ValueError):
@ -105,6 +107,7 @@ class Images(object):
self.namespace = namespace self.namespace = namespace
self.rid_map = {} self.rid_map = {}
self.used = {} self.used = {}
self.resized = {}
self.names = set() self.names = set()
self.all_images = set() self.all_images = set()
self.links = [] self.links = []
@ -113,11 +116,7 @@ class Images(object):
def __call__(self, relationships_by_id): def __call__(self, relationships_by_id):
self.rid_map = relationships_by_id self.rid_map = relationships_by_id
def generate_filename(self, rid, base=None, rid_map=None): def read_image_data(self, fname, base=None):
rid_map = self.rid_map if rid_map is None else rid_map
fname = rid_map[rid]
if fname in self.used:
return self.used[fname]
if fname.startswith('file://'): if fname.startswith('file://'):
src = fname[len('file://'):] src = fname[len('file://'):]
if iswindows and src and src[0] == '/': if iswindows and src and src[0] == '/':
@ -128,23 +127,25 @@ class Images(object):
raw = rawsrc.read() raw = rawsrc.read()
else: else:
raw = self.docx.read(fname) raw = self.docx.read(fname)
base = base or ascii_filename(rid_map[rid].rpartition('/')[-1]).replace(' ', '_') or 'image' base = base or ascii_filename(fname.rpartition('/')[-1]).replace(' ', '_') or 'image'
ext = what(None, raw) or base.rpartition('.')[-1] or 'jpeg' ext = what(None, raw) or base.rpartition('.')[-1] or 'jpeg'
if ext == 'emf': if ext == 'emf':
# For an example, see: https://bugs.launchpad.net/bugs/1224849 # For an example, see: https://bugs.launchpad.net/bugs/1224849
self.log('Found an EMF image: %s, trying to extract embedded raster image' % base) self.log('Found an EMF image: %s, trying to extract embedded raster image' % fname)
from calibre.utils.wmf.emf import emf_unwrap from calibre.utils.wmf.emf import emf_unwrap
try: try:
raw = emf_unwrap(raw) raw = emf_unwrap(raw)
except Exception as e: except Exception:
self.log.exception('Failed to extract embedded raster image from EMF') self.log.exception('Failed to extract embedded raster image from EMF')
else: else:
ext = 'png' ext = 'png'
base = base.rpartition('.')[0] base = base.rpartition('.')[0]
if not base: if not base:
base = 'image' base = 'image'
base += '.' + ext base += '.' + ext
return raw, base
def unique_name(self, base):
exists = frozenset(self.used.itervalues()) exists = frozenset(self.used.itervalues())
c = 1 c = 1
name = base name = base
@ -152,7 +153,37 @@ class Images(object):
n, e = base.rpartition('.')[0::2] n, e = base.rpartition('.')[0::2]
name = '%s-%d.%s' % (n, c, e) name = '%s-%d.%s' % (n, c, e)
c += 1 c += 1
self.used[fname] = name return name
def resize_image(self, raw, base, max_width, max_height):
img = Image()
img.load(raw)
resized, nwidth, nheight = fit_image(img.size[0], img.size[1], max_width, max_height)
if resized:
img.size = (nwidth, nheight)
base, ext = os.path.splitext(base)
base = base + '-%dx%d%s' % (max_width, max_height, ext)
raw = img.export(ext[1:])
return raw, base, resized
def generate_filename(self, rid, base=None, rid_map=None, max_width=None, max_height=None):
rid_map = self.rid_map if rid_map is None else rid_map
fname = rid_map[rid]
key = (fname, max_width, max_height)
ans = self.used.get(key)
if ans is not None:
return ans
raw, base = self.read_image_data(fname, base=base)
resized = False
if max_width is not None and max_height is not None:
raw, base, resized = self.resize_image(raw, base, max_width, max_height)
name = self.unique_name(base)
self.used[key] = name
if max_width is not None and max_height is not None and not resized:
okey = (fname, None, None)
if okey in self.used:
return self.used[okey]
self.used[okey] = name
with open(os.path.join(self.dest_dir, name), 'wb') as f: with open(os.path.join(self.dest_dir, name), 'wb') as f:
f.write(raw) f.write(raw)
self.all_images.add('images/' + name) self.all_images.add('images/' + name)

View File

@ -119,7 +119,7 @@ class Level(object):
rid = pic_map.get(self.pic_id, None) rid = pic_map.get(self.pic_id, None)
if rid: if rid:
try: try:
fname = images.generate_filename(rid, rid_map=rid_map) fname = images.generate_filename(rid, rid_map=rid_map, max_width=20, max_height=20)
except Exception: except Exception:
fname = None fname = None
else: else: