Fix caching of images in docx to use filenames instead of possibly

non-unique relationship ids.
This commit is contained in:
Kovid Goyal 2013-06-20 12:02:12 +05:30
parent 59d1054cbf
commit c8b9d624cd

View File

@ -101,10 +101,11 @@ class Images(object):
self.rid_map = relationships_by_id self.rid_map = relationships_by_id
def generate_filename(self, rid, base=None, rid_map=None): def generate_filename(self, rid, base=None, rid_map=None):
if rid in self.used:
return self.used[rid]
rid_map = self.rid_map if rid_map is None else rid_map rid_map = self.rid_map if rid_map is None else rid_map
raw = self.docx.read(rid_map[rid]) fname = rid_map[rid]
if fname in self.used:
return self.used[fname]
raw = self.docx.read(fname)
base = base or ascii_filename(rid_map[rid].rpartition('/')[-1]).replace(' ', '_') or 'image' base = base or ascii_filename(rid_map[rid].rpartition('/')[-1]).replace(' ', '_') or 'image'
ext = what(None, raw) or base.rpartition('.')[-1] or 'jpeg' ext = what(None, raw) or base.rpartition('.')[-1] or 'jpeg'
base = base.rpartition('.')[0] base = base.rpartition('.')[0]
@ -118,7 +119,7 @@ class Images(object):
n, e = base.rpartition('.')[0::2] n, e = base.rpartition('.')[0::2]
name = '%s-%d.%s' % (n, c, e) name = '%s-%d.%s' % (n, c, e)
c += 1 c += 1
self.used[rid] = name self.used[fname] = name
with open(os.path.join(self.dest_dir, name), 'wb') as f: with open(os.path.join(self.dest_dir, name), 'wb') as f:
f.write(raw) f.write(raw)
self.all_images.add('images/' + name) self.all_images.add('images/' + name)