mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
The plumbing for images in DOCX Output
This commit is contained in:
parent
2d768e9f4e
commit
8c827eefc6
@ -6,7 +6,7 @@ from __future__ import (unicode_literals, division, absolute_import,
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import textwrap
|
||||
import textwrap, os
|
||||
from io import BytesIO
|
||||
|
||||
from lxml import etree
|
||||
@ -14,7 +14,7 @@ from lxml.builder import ElementMaker
|
||||
|
||||
from calibre import guess_type
|
||||
from calibre.constants import numeric_version, __appname__
|
||||
from calibre.ebooks.docx.names import namespaces, STYLES, WEB_SETTINGS
|
||||
from calibre.ebooks.docx.names import namespaces, STYLES, WEB_SETTINGS, IMAGES
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
from calibre.ebooks.metadata.opf2 import OPF as ReadOPF
|
||||
from calibre.ebooks.oeb.base import OPF, OPF2_NS
|
||||
@ -51,7 +51,6 @@ class DocumentRelationships(object):
|
||||
|
||||
def __init__(self):
|
||||
self.rmap = {}
|
||||
self.counter = 0
|
||||
for typ, target in {
|
||||
STYLES: 'styles.xml',
|
||||
WEB_SETTINGS: 'webSettings.xml',
|
||||
@ -64,11 +63,13 @@ class DocumentRelationships(object):
|
||||
def add_relationship(self, target, rtype, target_mode=None):
|
||||
ans = self.get_relationship_id(target, rtype, target_mode)
|
||||
if ans is None:
|
||||
self.counter += 1
|
||||
ans = 'rId%d' % self.counter
|
||||
ans = 'rId%d' % (len(self.rmap) + 1)
|
||||
self.rmap[(target, rtype, target_mode)] = ans
|
||||
return ans
|
||||
|
||||
def add_image(self, target):
|
||||
return self.add_relationship(target, IMAGES)
|
||||
|
||||
def serialize(self):
|
||||
E = ElementMaker(namespace=namespaces['pr'], nsmap={None:namespaces['pr']})
|
||||
relationships = E.Relationships()
|
||||
@ -113,8 +114,13 @@ class DOCX(object):
|
||||
}.iteritems():
|
||||
added.add(ext)
|
||||
types.append(E.Default(Extension=ext, ContentType=mt))
|
||||
# TODO: Iterate over all resources and add mimetypes for any that are
|
||||
# not already added
|
||||
for fname in self.images:
|
||||
ext = fname.rpartition(os.extsep)[-1]
|
||||
if ext not in added:
|
||||
added.add(ext)
|
||||
mt = guess_type('a.' + ext)[0]
|
||||
if mt:
|
||||
types.append(E.Default(Extension=ext, ContentType=mt))
|
||||
return xml2str(types)
|
||||
|
||||
@property
|
||||
@ -176,6 +182,8 @@ class DOCX(object):
|
||||
zf.writestr('word/document.xml', xml2str(self.document))
|
||||
zf.writestr('word/styles.xml', xml2str(self.styles))
|
||||
zf.writestr('word/_rels/document.xml.rels', self.document_relationships.serialize())
|
||||
for fname, data_getter in self.images.iteritems():
|
||||
zf.writestr(fname, data_getter())
|
||||
|
||||
if __name__ == '__main__':
|
||||
d = DOCX(None, None)
|
||||
|
@ -13,6 +13,7 @@ from lxml.builder import ElementMaker
|
||||
|
||||
from calibre.ebooks.docx.names import namespaces
|
||||
from calibre.ebooks.docx.writer.styles import w, StylesManager
|
||||
from calibre.ebooks.docx.writer.images import ImagesManager
|
||||
from calibre.ebooks.oeb.stylizer import Stylizer as Sz, Style as St
|
||||
from calibre.ebooks.oeb.base import XPath, barename
|
||||
from calibre.ebooks.pdf.render.common import PAPER_SIZES
|
||||
@ -151,18 +152,26 @@ class Convert(object):
|
||||
|
||||
def __call__(self):
|
||||
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
|
||||
SVGRasterizer()(self.oeb, self.opts)
|
||||
self.svg_rasterizer = SVGRasterizer()
|
||||
self.svg_rasterizer(self.oeb, self.opts)
|
||||
|
||||
self.styles_manager = StylesManager()
|
||||
self.images_manager = ImagesManager(self.oeb, self.docx.document_relationships)
|
||||
|
||||
for item in self.oeb.spine:
|
||||
self.process_item(item)
|
||||
try:
|
||||
for item in self.oeb.spine:
|
||||
self.process_item(item)
|
||||
|
||||
self.styles_manager.finalize(self.blocks)
|
||||
self.write()
|
||||
self.styles_manager.finalize(self.blocks)
|
||||
self.write()
|
||||
finally:
|
||||
self.images_manager.cleanup()
|
||||
|
||||
def process_item(self, item):
|
||||
stylizer = Stylizer(item.data, item.href, self.oeb, self.opts, self.opts.output_profile)
|
||||
stylizer = self.svg_rasterizer.stylizer_cache.get(item)
|
||||
if stylizer is None:
|
||||
stylizer = Stylizer(item.data, item.href, self.oeb, self.opts, self.opts.output_profile)
|
||||
self.abshref = self.images_manager.abshref = item.abshref
|
||||
|
||||
is_first_block = True
|
||||
for body in XPath('//h:body')(item.data):
|
||||
@ -177,21 +186,24 @@ class Convert(object):
|
||||
block_style = stylizer.style(html_block)
|
||||
if block_style.is_hidden:
|
||||
return
|
||||
if html_block.text:
|
||||
docx_block.add_text(html_block.text, block_style, ignore_leading_whitespace=True, is_parent_style=True)
|
||||
if html_block.tag.endswith('}img'):
|
||||
b = Block(self.styles_manager, html_block, stylizer.style(html_block))
|
||||
self.blocks.append(b)
|
||||
self.images_manager.add_image(html_block, b, stylizer)
|
||||
else:
|
||||
if html_block.text:
|
||||
docx_block.add_text(html_block.text, block_style, ignore_leading_whitespace=True, is_parent_style=True)
|
||||
|
||||
for child in html_block.iterchildren(etree.Element):
|
||||
tag = barename(child.tag)
|
||||
style = stylizer.style(child)
|
||||
display = style._get('display')
|
||||
if tag == 'img':
|
||||
pass # TODO: Handle images
|
||||
if display == 'block' and tag != 'br':
|
||||
b = Block(self.styles_manager, child, style)
|
||||
self.blocks.append(b)
|
||||
self.process_block(child, b, stylizer)
|
||||
else:
|
||||
self.process_inline(child, self.blocks[-1], stylizer)
|
||||
for child in html_block.iterchildren(etree.Element):
|
||||
tag = barename(child.tag)
|
||||
style = stylizer.style(child)
|
||||
display = style._get('display')
|
||||
if display == 'block' and tag != 'br':
|
||||
b = Block(self.styles_manager, child, style)
|
||||
self.blocks.append(b)
|
||||
self.process_block(child, b, stylizer)
|
||||
else:
|
||||
self.process_inline(child, self.blocks[-1], stylizer)
|
||||
|
||||
if ignore_tail is False and html_block.tail and html_block.tail.strip():
|
||||
b = docx_block
|
||||
@ -211,7 +223,7 @@ class Convert(object):
|
||||
if html_child.tail or html_child is not html_child.getparent()[-1]:
|
||||
docx_block.add_break(clear={'both':'all', 'left':'left', 'right':'right'}.get(style['clear'], 'none'))
|
||||
elif tag == 'img':
|
||||
return # TODO: Handle images
|
||||
self.images_manager.add_image(html_child, docx_block, stylizer)
|
||||
else:
|
||||
if html_child.text:
|
||||
docx_block.add_text(html_child.text, style, html_parent=html_child)
|
||||
@ -249,7 +261,7 @@ class Convert(object):
|
||||
E.docGrid(**{w('linePitch'):"360"}),
|
||||
))
|
||||
|
||||
dn = {k:v for k, v in namespaces.iteritems() if k in 'wr'}
|
||||
dn = {k:v for k, v in namespaces.iteritems() if k in tuple('wra') + ('wp',)}
|
||||
E = ElementMaker(namespace=dn['w'], nsmap=dn)
|
||||
self.docx.styles = E.styles(
|
||||
E.docDefaults(
|
||||
@ -268,4 +280,6 @@ class Convert(object):
|
||||
)
|
||||
)
|
||||
)
|
||||
self.docx.images = {}
|
||||
self.styles_manager.serialize(self.docx.styles)
|
||||
self.images_manager.serialize(self.docx.images)
|
||||
|
78
src/calibre/ebooks/docx/writer/images.py
Normal file
78
src/calibre/ebooks/docx/writer/images.py
Normal file
@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import os
|
||||
import shutil, posixpath
|
||||
from collections import namedtuple
|
||||
from functools import partial
|
||||
|
||||
from calibre.ebooks.oeb.base import urlunquote
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
from calibre.utils.magick.draw import identify_data
|
||||
|
||||
Image = namedtuple('Image', 'rid fname width height fmt item')
|
||||
|
||||
class ImagesManager(object):
|
||||
|
||||
def __init__(self, oeb, document_relationships):
|
||||
self.oeb, self.log = oeb, oeb.log
|
||||
self.images = {}
|
||||
self.seen_filenames = set()
|
||||
self.document_relationships = document_relationships
|
||||
self._tdir = None
|
||||
|
||||
@property
|
||||
def tdir(self):
|
||||
if self._tdir is None:
|
||||
self._tdir = PersistentTemporaryDirectory(suffix='_docx_output_images')
|
||||
return self._tdir
|
||||
|
||||
def cleanup(self):
|
||||
if self._tdir is not None:
|
||||
shutil.rmtree(self._tdir)
|
||||
self._tdir = None
|
||||
|
||||
def add_image(self, img, block, stylizer):
|
||||
src = img.get('src')
|
||||
if not src:
|
||||
return
|
||||
href = self.abshref(src)
|
||||
if href not in self.images:
|
||||
item = self.oeb.manifest.hrefs.get(href)
|
||||
if item is None or not isinstance(item.data, bytes):
|
||||
return
|
||||
width, height, fmt = identify_data(item.data)
|
||||
image_fname = 'media/' + self.create_filename(href, fmt)
|
||||
image_rid = self.document_relationships.add_image(image_fname)
|
||||
self.images[href] = Image(image_rid, image_fname, width, height, fmt, item)
|
||||
item.unload_data_from_memory()
|
||||
return self.images[href].rid
|
||||
|
||||
def create_filename(self, href, fmt):
|
||||
fname = ascii_filename(urlunquote(posixpath.basename(href)))
|
||||
fname = posixpath.splitext(fname)[0]
|
||||
fname = fname[:75].rstrip('.') or 'image'
|
||||
num = 0
|
||||
base = fname
|
||||
while fname.lower() in self.seen_filenames:
|
||||
num += 1
|
||||
fname = base + str(num)
|
||||
self.seen_filenames.add(fname.lower())
|
||||
fname += os.extsep + fmt.lower()
|
||||
return fname
|
||||
|
||||
def serialize(self, images_map):
|
||||
for img in self.images.itervalues():
|
||||
images_map['word/' + img.fname] = partial(self.get_data, img.item)
|
||||
|
||||
def get_data(self, item):
|
||||
try:
|
||||
return item.data
|
||||
finally:
|
||||
item.unload_data_from_memory(False)
|
Loading…
x
Reference in New Issue
Block a user