DOCX Input: Support linked images

DOCX Input: Support linked (as opposed to embedded) images, if the
linked image is found on the local computer. Fixes #1243597 [MS Word linked pictures don't get converted](https://bugs.launchpad.net/calibre/+bug/1243597)
This commit is contained in:
Kovid Goyal 2013-10-24 12:01:11 +05:30
parent 78e8ba74a0
commit 54508172a4

View File

@ -10,10 +10,17 @@ import os
from lxml.html.builder import IMG, HR from lxml.html.builder import IMG, HR
from calibre.constants import iswindows
from calibre.ebooks.docx.names import XPath, get, barename from calibre.ebooks.docx.names import XPath, get, barename
from calibre.utils.filenames import ascii_filename from calibre.utils.filenames import ascii_filename
from calibre.utils.imghdr import what from calibre.utils.imghdr import what
class LinkedImageNotFound(ValueError):
def __init__(self, fname):
ValueError.__init__(self, fname)
self.fname = fname
def emu_to_pt(x): def emu_to_pt(x):
return x / 12700 return x / 12700
@ -107,7 +114,16 @@ class Images(object):
fname = rid_map[rid] fname = rid_map[rid]
if fname in self.used: if fname in self.used:
return self.used[fname] return self.used[fname]
raw = self.docx.read(fname) if fname.startswith('file://'):
src = fname[len('file://'):]
if iswindows and src and src[0] == '/':
src = src[1:]
if not src or not os.path.exists(src):
raise LinkedImageNotFound(src)
with open(src, 'rb') as rawsrc:
raw = rawsrc.read()
else:
raw = self.docx.read(fname)
base = base or ascii_filename(rid_map[rid].rpartition('/')[-1]).replace(' ', '_') or 'image' base = base or ascii_filename(rid_map[rid].rpartition('/')[-1]).replace(' ', '_') or 'image'
ext = what(None, raw) or base.rpartition('.')[-1] or 'jpeg' ext = what(None, raw) or base.rpartition('.')[-1] or 'jpeg'
if ext == 'emf': if ext == 'emf':
@ -155,10 +171,16 @@ class Images(object):
if name: if name:
name = ascii_filename(name).replace(' ', '_') name = ascii_filename(name).replace(' ', '_')
alt = pr.get('descr', None) alt = pr.get('descr', None)
for a in XPath('descendant::a:blip[@r:embed]')(pic): for a in XPath('descendant::a:blip[@r:embed or @r:link]')(pic):
rid = get(a, 'r:embed') rid = get(a, 'r:embed')
if rid in self.rid_map: if not rid:
src = self.generate_filename(rid, name) rid = get(a, 'r:link')
if rid and rid in self.rid_map:
try:
src = self.generate_filename(rid, name)
except LinkedImageNotFound as err:
self.log.warn('Linked image: %s not found, ignoring' % err.fname)
continue
img = IMG(src='images/%s' % src) img = IMG(src='images/%s' % src)
img.set('alt', alt or 'Image') img.set('alt', alt or 'Image')
if link is not None: if link is not None:
@ -211,7 +233,11 @@ class Images(object):
for imagedata in XPath('descendant::v:imagedata[@r:id]')(pict): for imagedata in XPath('descendant::v:imagedata[@r:id]')(pict):
rid = get(imagedata, 'r:id') rid = get(imagedata, 'r:id')
if rid in self.rid_map: if rid in self.rid_map:
src = self.generate_filename(rid) try:
src = self.generate_filename(rid)
except LinkedImageNotFound as err:
self.log.warn('Linked image: %s not found, ignoring' % err.fname)
continue
img = IMG(src='images/%s' % src, style="display:block") img = IMG(src='images/%s' % src, style="display:block")
alt = get(imagedata, 'o:title') alt = get(imagedata, 'o:title')
img.set('alt', alt or 'Image') img.set('alt', alt or 'Image')