From 93e8d21e7f1e7d44984bde57e8bf3292c475773d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 14 Apr 2015 10:40:55 +0530 Subject: [PATCH] DOCX Input: Automatically resize large images used as bullets in lists, to compensate for the fact that HTML renderers do not do this. --- src/calibre/ebooks/docx/images.py | 51 ++++++++++++++++++++++------ src/calibre/ebooks/docx/numbering.py | 2 +- 2 files changed, 42 insertions(+), 11 deletions(-) diff --git a/src/calibre/ebooks/docx/images.py b/src/calibre/ebooks/docx/images.py index fcee1e20f6..375739e727 100644 --- a/src/calibre/ebooks/docx/images.py +++ b/src/calibre/ebooks/docx/images.py @@ -10,10 +10,12 @@ import os from lxml.html.builder import IMG, HR +from calibre import fit_image from calibre.constants import iswindows from calibre.ebooks.docx.names import barename from calibre.utils.filenames import ascii_filename from calibre.utils.imghdr import what +from calibre.utils.magick import Image class LinkedImageNotFound(ValueError): @@ -105,6 +107,7 @@ class Images(object): self.namespace = namespace self.rid_map = {} self.used = {} + self.resized = {} self.names = set() self.all_images = set() self.links = [] @@ -113,11 +116,7 @@ class Images(object): def __call__(self, relationships_by_id): self.rid_map = relationships_by_id - def generate_filename(self, rid, base=None, rid_map=None): - rid_map = self.rid_map if rid_map is None else rid_map - fname = rid_map[rid] - if fname in self.used: - return self.used[fname] + def read_image_data(self, fname, base=None): if fname.startswith('file://'): src = fname[len('file://'):] if iswindows and src and src[0] == '/': @@ -128,23 +127,25 @@ class Images(object): raw = rawsrc.read() else: raw = self.docx.read(fname) - base = base or ascii_filename(rid_map[rid].rpartition('/')[-1]).replace(' ', '_') or 'image' + base = base or ascii_filename(fname.rpartition('/')[-1]).replace(' ', '_') or 'image' ext = what(None, raw) or base.rpartition('.')[-1] or 'jpeg' if ext == 'emf': # For an example, see: https://bugs.launchpad.net/bugs/1224849 - self.log('Found an EMF image: %s, trying to extract embedded raster image' % base) + self.log('Found an EMF image: %s, trying to extract embedded raster image' % fname) from calibre.utils.wmf.emf import emf_unwrap try: raw = emf_unwrap(raw) - except Exception as e: + except Exception: self.log.exception('Failed to extract embedded raster image from EMF') else: ext = 'png' - base = base.rpartition('.')[0] if not base: base = 'image' base += '.' + ext + return raw, base + + def unique_name(self, base): exists = frozenset(self.used.itervalues()) c = 1 name = base @@ -152,7 +153,37 @@ class Images(object): n, e = base.rpartition('.')[0::2] name = '%s-%d.%s' % (n, c, e) c += 1 - self.used[fname] = name + return name + + def resize_image(self, raw, base, max_width, max_height): + img = Image() + img.load(raw) + resized, nwidth, nheight = fit_image(img.size[0], img.size[1], max_width, max_height) + if resized: + img.size = (nwidth, nheight) + base, ext = os.path.splitext(base) + base = base + '-%dx%d%s' % (max_width, max_height, ext) + raw = img.export(ext[1:]) + return raw, base, resized + + def generate_filename(self, rid, base=None, rid_map=None, max_width=None, max_height=None): + rid_map = self.rid_map if rid_map is None else rid_map + fname = rid_map[rid] + key = (fname, max_width, max_height) + ans = self.used.get(key) + if ans is not None: + return ans + raw, base = self.read_image_data(fname, base=base) + resized = False + if max_width is not None and max_height is not None: + raw, base, resized = self.resize_image(raw, base, max_width, max_height) + name = self.unique_name(base) + self.used[key] = name + if max_width is not None and max_height is not None and not resized: + okey = (fname, None, None) + if okey in self.used: + return self.used[okey] + self.used[okey] = name with open(os.path.join(self.dest_dir, name), 'wb') as f: f.write(raw) self.all_images.add('images/' + name) diff --git a/src/calibre/ebooks/docx/numbering.py b/src/calibre/ebooks/docx/numbering.py index a9e9654db9..846b2c1f5f 100644 --- a/src/calibre/ebooks/docx/numbering.py +++ b/src/calibre/ebooks/docx/numbering.py @@ -119,7 +119,7 @@ class Level(object): rid = pic_map.get(self.pic_id, None) if rid: try: - fname = images.generate_filename(rid, rid_map=rid_map) + fname = images.generate_filename(rid, rid_map=rid_map, max_width=20, max_height=20) except Exception: fname = None else: