DOCX Input: Fix alt and title attributes for some images not being preserved

This commit is contained in:
Kovid Goyal 2017-07-04 13:26:55 +05:30
parent a0ac9cfddb
commit d1c4eeb92a
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -54,14 +54,14 @@ def get_image_properties(parent, XPath, get):
ans['height'] = '%.3gpt' % height ans['height'] = '%.3gpt' % height
alt = None alt = None
title = None
for docPr in XPath('./wp:docPr')(parent): for docPr in XPath('./wp:docPr')(parent):
x = docPr.get('descr', None) alt = docPr.get('descr') or alt
if x: title = docPr.get('title') or title
alt = x
if docPr.get('hidden', None) in {'true', 'on', '1'}: if docPr.get('hidden', None) in {'true', 'on', '1'}:
ans['display'] = 'none' ans['display'] = 'none'
return ans, alt return ans, alt, title
def get_image_margins(elem): def get_image_margins(elem):
@ -197,7 +197,7 @@ class Images(object):
self.all_images.add('images/' + name) self.all_images.add('images/' + name)
return name return name
def pic_to_img(self, pic, alt, parent): def pic_to_img(self, pic, alt, parent, title):
XPath, get = self.namespace.XPath, self.namespace.get XPath, get = self.namespace.XPath, self.namespace.get
name = None name = None
link = None link = None
@ -214,7 +214,7 @@ class Images(object):
name = pr.get('name', None) name = pr.get('name', None)
if name: if name:
name = image_filename(name) name = image_filename(name)
alt = pr.get('descr', None) alt = pr.get('descr') or alt
for a in XPath('descendant::a:blip[@r:embed or @r:link]')(pic): for a in XPath('descendant::a:blip[@r:embed or @r:link]')(pic):
rid = get(a, 'r:embed') rid = get(a, 'r:embed')
if not rid: if not rid:
@ -227,6 +227,8 @@ class Images(object):
continue continue
img = IMG(src='images/%s' % src) img = IMG(src='images/%s' % src)
img.set('alt', alt or 'Image') img.set('alt', alt or 'Image')
if title:
img.set('title', title)
if link is not None: if link is not None:
self.links.append((img, link, self.rid_map)) self.links.append((img, link, self.rid_map))
return img return img
@ -235,9 +237,9 @@ class Images(object):
XPath, get = self.namespace.XPath, self.namespace.get XPath, get = self.namespace.XPath, self.namespace.get
# First process the inline pictures # First process the inline pictures
for inline in XPath('./wp:inline')(drawing): for inline in XPath('./wp:inline')(drawing):
style, alt = get_image_properties(inline, XPath, get) style, alt, title = get_image_properties(inline, XPath, get)
for pic in XPath('descendant::pic:pic')(inline): for pic in XPath('descendant::pic:pic')(inline):
ans = self.pic_to_img(pic, alt, inline) ans = self.pic_to_img(pic, alt, inline, title)
if ans is not None: if ans is not None:
if style: if style:
ans.set('style', '; '.join('%s: %s' % (k, v) for k, v in style.iteritems())) ans.set('style', '; '.join('%s: %s' % (k, v) for k, v in style.iteritems()))
@ -245,10 +247,10 @@ class Images(object):
# Now process the floats # Now process the floats
for anchor in XPath('./wp:anchor')(drawing): for anchor in XPath('./wp:anchor')(drawing):
style, alt = get_image_properties(anchor, XPath, get) style, alt, title = get_image_properties(anchor, XPath, get)
self.get_float_properties(anchor, style, page) self.get_float_properties(anchor, style, page)
for pic in XPath('descendant::pic:pic')(anchor): for pic in XPath('descendant::pic:pic')(anchor):
ans = self.pic_to_img(pic, alt, anchor) ans = self.pic_to_img(pic, alt, anchor, title)
if ans is not None: if ans is not None:
if style: if style:
ans.set('style', '; '.join('%s: %s' % (k, v) for k, v in style.iteritems())) ans.set('style', '; '.join('%s: %s' % (k, v) for k, v in style.iteritems()))