DOCX Input: Fix images that have the # character in their filenames not being converted correctly. Fixes #1616482 [when converting to epub images do not appear](https://bugs.launchpad.net/calibre/+bug/1616482)

This commit is contained in:
Kovid Goyal 2016-08-25 21:22:25 +05:30
parent cae60d5a54
commit 97ca819606

View File

@ -22,6 +22,9 @@ class LinkedImageNotFound(ValueError):
ValueError.__init__(self, fname) ValueError.__init__(self, fname)
self.fname = fname self.fname = fname
def image_filename(x):
return ascii_filename(x).replace(' ', '_').replace('#', '_')
def emu_to_pt(x): def emu_to_pt(x):
return x / 12700 return x / 12700
@ -126,7 +129,7 @@ class Images(object):
raw = rawsrc.read() raw = rawsrc.read()
else: else:
raw = self.docx.read(fname) raw = self.docx.read(fname)
base = base or ascii_filename(fname.rpartition('/')[-1]).replace(' ', '_') or 'image' base = base or image_filename(fname.rpartition('/')[-1]) or 'image'
ext = what(None, raw) or base.rpartition('.')[-1] or 'jpeg' ext = what(None, raw) or base.rpartition('.')[-1] or 'jpeg'
if ext == 'emf': if ext == 'emf':
# For an example, see: https://bugs.launchpad.net/bugs/1224849 # For an example, see: https://bugs.launchpad.net/bugs/1224849
@ -201,7 +204,7 @@ class Images(object):
for pr in XPath('descendant::pic:cNvPr')(pic): for pr in XPath('descendant::pic:cNvPr')(pic):
name = pr.get('name', None) name = pr.get('name', None)
if name: if name:
name = ascii_filename(name).replace(' ', '_') name = image_filename(name)
alt = pr.get('descr', None) alt = pr.get('descr', None)
for a in XPath('descendant::a:blip[@r:embed or @r:link]')(pic): for a in XPath('descendant::a:blip[@r:embed or @r:link]')(pic):
rid = get(a, 'r:embed') rid = get(a, 'r:embed')