mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
DOCX Output: Add support for SVG images. Now the generated DOCX will contain both the rasterized version of the SVG image and the original SVG image, which is supported by modern versions of Word.
This commit is contained in:
parent
4c4cfb843c
commit
41f7a01e35
@ -46,6 +46,7 @@ TRANSITIONAL_NAMESPACES = {
|
||||
'xml': 'http://www.w3.org/XML/1998/namespace',
|
||||
# Drawing
|
||||
'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
|
||||
'a14': 'http://schemas.microsoft.com/office/drawing/2010/main',
|
||||
'm': 'http://schemas.openxmlformats.org/officeDocument/2006/math',
|
||||
'mv': 'urn:schemas-microsoft-com:mac:vml',
|
||||
'pic': 'http://schemas.openxmlformats.org/drawingml/2006/picture',
|
||||
@ -76,6 +77,7 @@ STRICT_NAMESPACES = {
|
||||
for k, v in iteritems(TRANSITIONAL_NAMESPACES)
|
||||
}
|
||||
SVG_BLIP_URI = '{96DAC541-7B7A-43D3-8B79-37D633B846F1}'
|
||||
USE_LOCAL_DPI_URI = '{28A0092B-C50C-407E-A947-70E740481C1C}'
|
||||
# }}}
|
||||
|
||||
|
||||
|
@ -440,7 +440,7 @@ class Convert:
|
||||
|
||||
self.styles_manager = StylesManager(self.docx.namespace, self.log, self.mi.language)
|
||||
self.links_manager = LinksManager(self.docx.namespace, self.docx.document_relationships, self.log)
|
||||
self.images_manager = ImagesManager(self.oeb, self.docx.document_relationships, self.opts)
|
||||
self.images_manager = ImagesManager(self.oeb, self.docx.document_relationships, self.opts, self.svg_rasterizer)
|
||||
self.lists_manager = ListsManager(self.docx)
|
||||
self.fonts_manager = FontsManager(self.docx.namespace, self.oeb, self.opts)
|
||||
self.blocks = Blocks(self.docx.namespace, self.styles_manager, self.links_manager)
|
||||
@ -481,9 +481,7 @@ class Convert:
|
||||
|
||||
def process_item(self, item):
|
||||
self.current_item = item
|
||||
stylizer = self.svg_rasterizer.stylizer_cache.get(item)
|
||||
if stylizer is None:
|
||||
stylizer = Stylizer(item.data, item.href, self.oeb, self.opts, profile=self.opts.output_profile, base_css=self.base_css)
|
||||
stylizer = self.svg_rasterizer.stylizer(item)
|
||||
self.abshref = self.images_manager.abshref = item.abshref
|
||||
|
||||
self.current_lang = lang_for_tag(item.data) or self.styles_manager.document_lang
|
||||
|
@ -12,11 +12,11 @@ from lxml import etree
|
||||
|
||||
from calibre import fit_image
|
||||
from calibre.ebooks.docx.images import pt_to_emu
|
||||
from calibre.ebooks.docx.names import USE_LOCAL_DPI_URI, SVG_BLIP_URI
|
||||
from calibre.ebooks.oeb.base import urlquote, urlunquote
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
from calibre.utils.imghdr import identify
|
||||
from calibre.utils.resources import get_image_path as I
|
||||
from polyglot.builtins import iteritems, itervalues
|
||||
|
||||
Image = namedtuple('Image', 'rid fname width height fmt item')
|
||||
|
||||
@ -39,13 +39,26 @@ def get_image_margins(style):
|
||||
|
||||
class ImagesManager:
|
||||
|
||||
def __init__(self, oeb, document_relationships, opts):
|
||||
def __init__(self, oeb, document_relationships, opts, svg_rasterizer):
|
||||
self.oeb, self.log = oeb, oeb.log
|
||||
self.svg_rasterizer = svg_rasterizer
|
||||
self.page_width, self.page_height = opts.output_profile.width_pts, opts.output_profile.height_pts
|
||||
self.images = {}
|
||||
self.seen_filenames = set()
|
||||
self.document_relationships = document_relationships
|
||||
self.count = 0
|
||||
self.svg_images = {}
|
||||
|
||||
def read_svg(self, href):
|
||||
if href not in self.svg_images:
|
||||
item = self.oeb.manifest.hrefs.get(href) or self.oeb.manifest.hrefs.get(urlquote(href))
|
||||
if item is None:
|
||||
self.log.warning('Failed to find image:', href)
|
||||
return
|
||||
image_fname = 'media/' + self.create_filename(href, 'svg')
|
||||
image_rid = self.document_relationships.add_image(image_fname)
|
||||
self.svg_images[href] = Image(image_rid, image_fname, -1, -1, 'svg', item)
|
||||
return self.svg_images[href]
|
||||
|
||||
def read_image(self, href):
|
||||
if href not in self.images:
|
||||
@ -84,6 +97,12 @@ class ImagesManager:
|
||||
|
||||
def create_image_markup(self, html_img, stylizer, href, as_block=False):
|
||||
# TODO: img inside a link (clickable image)
|
||||
svg_rid = ''
|
||||
svghref = self.svg_rasterizer.svg_originals.get(href)
|
||||
if svghref:
|
||||
si = self.read_svg(svghref)
|
||||
if si:
|
||||
svg_rid = si.rid
|
||||
style = stylizer.style(html_img)
|
||||
floating = style['float']
|
||||
if floating not in {'left', 'right'}:
|
||||
@ -134,7 +153,7 @@ class ImagesManager:
|
||||
if fake_margins:
|
||||
# DOCX does not support setting margins for inline images, so we
|
||||
# fake it by using effect extents to simulate margins
|
||||
makeelement(parent, 'wp:effectExtent', **{k[-1].lower():v for k, v in iteritems(get_image_margins(style))})
|
||||
makeelement(parent, 'wp:effectExtent', **{k[-1].lower():v for k, v in get_image_margins(style).items()})
|
||||
else:
|
||||
makeelement(parent, 'wp:effectExtent', l='0', r='0', t='0', b='0')
|
||||
if floating is not None:
|
||||
@ -143,10 +162,10 @@ class ImagesManager:
|
||||
makeelement(parent, 'wp:wrapTopAndBottom')
|
||||
else:
|
||||
makeelement(parent, 'wp:wrapSquare', wrapText='bothSides')
|
||||
self.create_docx_image_markup(parent, name, html_img.get('alt') or name, img.rid, width, height)
|
||||
self.create_docx_image_markup(parent, name, html_img.get('alt') or name, img.rid, width, height, svg_rid=svg_rid)
|
||||
return ans
|
||||
|
||||
def create_docx_image_markup(self, parent, name, alt, img_rid, width, height):
|
||||
def create_docx_image_markup(self, parent, name, alt, img_rid, width, height, svg_rid=''):
|
||||
makeelement, namespaces = self.document_relationships.namespace.makeelement, self.document_relationships.namespace.namespaces
|
||||
makeelement(parent, 'wp:docPr', id=str(self.count), name=name, descr=alt)
|
||||
makeelement(makeelement(parent, 'wp:cNvGraphicFramePr'), 'a:graphicFrameLocks', noChangeAspect="1")
|
||||
@ -157,7 +176,11 @@ class ImagesManager:
|
||||
makeelement(nvPicPr, 'pic:cNvPr', id='0', name=name, descr=alt)
|
||||
makeelement(nvPicPr, 'pic:cNvPicPr')
|
||||
bf = makeelement(pic, 'pic:blipFill')
|
||||
makeelement(bf, 'a:blip', r_embed=img_rid)
|
||||
blip = makeelement(bf, 'a:blip', r_embed=img_rid)
|
||||
if svg_rid:
|
||||
ext_list = makeelement(blip, 'a:extLst')
|
||||
makeelement(makeelement(ext_list, 'a:ext', uri=USE_LOCAL_DPI_URI), 'a14:useLocalDpi', val='0')
|
||||
makeelement(makeelement(ext_list, 'a:ext', uri=SVG_BLIP_URI), 'asvg:svgBlip', r_embed=svg_rid)
|
||||
makeelement(makeelement(bf, 'a:stretch'), 'a:fillRect')
|
||||
spPr = makeelement(pic, 'pic:spPr')
|
||||
xfrm = makeelement(spPr, 'a:xfrm')
|
||||
@ -178,8 +201,10 @@ class ImagesManager:
|
||||
return fname
|
||||
|
||||
def serialize(self, images_map):
|
||||
for img in itervalues(self.images):
|
||||
for img in self.images.values():
|
||||
images_map['word/' + img.fname] = partial(self.get_data, img.item)
|
||||
for img in self.svg_images.values():
|
||||
images_map['word/' + img.fname] = lambda: img.item.data_as_bytes_or_none
|
||||
|
||||
def get_data(self, item):
|
||||
try:
|
||||
|
@ -15,6 +15,7 @@ from collections import defaultdict
|
||||
from itertools import count
|
||||
from lxml import etree, html
|
||||
from operator import attrgetter
|
||||
from typing import Optional
|
||||
|
||||
from calibre import as_unicode, force_unicode, get_types_map, isbytestring
|
||||
from calibre.constants import __version__, filesystem_encoding
|
||||
@ -1017,6 +1018,12 @@ class Manifest:
|
||||
|
||||
# }}}
|
||||
|
||||
@property
|
||||
def data_as_bytes_or_none(self) -> Optional[bytes]:
|
||||
if self._loader is None:
|
||||
return None
|
||||
return self._loader(getattr(self, 'html_input_href', self.href))
|
||||
|
||||
@property
|
||||
def data(self):
|
||||
"""Provides MIME type sensitive access to the manifest
|
||||
@ -1033,10 +1040,7 @@ class Manifest:
|
||||
"""
|
||||
data = self._data
|
||||
if data is None:
|
||||
if self._loader is None:
|
||||
return None
|
||||
data = self._loader(getattr(self, 'html_input_href',
|
||||
self.href))
|
||||
data = self.data_as_bytes_or_none
|
||||
try:
|
||||
mt = self.media_type.lower()
|
||||
except Exception:
|
||||
|
@ -5,32 +5,44 @@ SVG rasterization transform.
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
|
||||
import os, re
|
||||
import os
|
||||
import re
|
||||
from base64 import standard_b64encode
|
||||
from functools import lru_cache
|
||||
|
||||
from lxml import etree
|
||||
from qt.core import (
|
||||
Qt, QByteArray, QBuffer, QIODevice, QColor, QImage, QPainter, QSvgRenderer)
|
||||
from calibre.ebooks.oeb.base import XHTML, XLINK
|
||||
from calibre.ebooks.oeb.base import SVG_MIME, PNG_MIME
|
||||
from calibre.ebooks.oeb.base import xml2str, xpath
|
||||
from calibre.ebooks.oeb.base import urlnormalize
|
||||
QBuffer, QByteArray, QColor, QImage, QIODevice, QPainter, QSvgRenderer, Qt,
|
||||
)
|
||||
|
||||
from calibre import guess_type
|
||||
from calibre.ebooks.oeb.base import (
|
||||
PNG_MIME, SVG_MIME, XHTML, XLINK, urlnormalize, xml2str, xpath,
|
||||
)
|
||||
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.utils.imghdr import what
|
||||
from polyglot.urllib import urldefrag
|
||||
|
||||
IMAGE_TAGS = {XHTML('img'), XHTML('object')}
|
||||
KEEP_ATTRS = {'class', 'style', 'width', 'height', 'align'}
|
||||
TEST_SVG = b'''
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="18" height="18" viewBox="0 0 18 18">
|
||||
<path d="M4.5 11H3v4h4v-1.5H4.5V11zM3 7h1.5V4.5H7V3H3v4zm10.5 6.5H11V15h4v-4h-1.5v2.5zM11 3v1.5h2.5V7H15V3h-4z"/>
|
||||
</svg>'''
|
||||
|
||||
def test_svg(): # {{{
|
||||
TEST_PNG_DATA_URI=''
|
||||
return f'''
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="64" height="64" viewBox="0 0 64 64">
|
||||
<path d="M4.5 11H3v4h4v-1.5H4.5V11zM3 7h1.5V4.5H7V3H3v4zm10.5 6.5H11V15h4v-4h-1.5v2.5zM11 3v1.5h2.5V7H15V3h-4z"/>
|
||||
<image width="32" height="32" x="32" y="32" xlink:href="{TEST_PNG_DATA_URI}"/>
|
||||
</svg>'''.encode()
|
||||
# }}}
|
||||
|
||||
|
||||
class Unavailable(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def rasterize_svg(data=TEST_SVG, sizes=(), width=0, height=0, print=None, fmt='PNG', as_qimage=False):
|
||||
def rasterize_svg(data=None, sizes=(), width=0, height=0, print=None, fmt='PNG', as_qimage=False):
|
||||
if data is None:
|
||||
data = test_svg()
|
||||
svg = QSvgRenderer(QByteArray(data))
|
||||
size = svg.defaultSize()
|
||||
if size.width() == 100 and size.height() == 100 and sizes:
|
||||
@ -54,10 +66,16 @@ def rasterize_svg(data=TEST_SVG, sizes=(), width=0, height=0, print=None, fmt='P
|
||||
return array.data()
|
||||
|
||||
|
||||
@lru_cache(maxsize=128)
|
||||
def data_url(mime_type: str, data: bytes) -> str:
|
||||
return f'data:{mime_type};base64,' + standard_b64encode(data).decode('ascii')
|
||||
|
||||
|
||||
class SVGRasterizer:
|
||||
|
||||
def __init__(self, base_css=''):
|
||||
def __init__(self, base_css='', save_svg_originals=False):
|
||||
self.base_css = base_css
|
||||
self.save_svg_originals = save_svg_originals
|
||||
from calibre.gui2 import must_use_qt
|
||||
must_use_qt()
|
||||
|
||||
@ -71,20 +89,15 @@ class SVGRasterizer:
|
||||
|
||||
def __call__(self, oeb, context):
|
||||
oeb.logger.info('Rasterizing SVG images...')
|
||||
self.temp_files = []
|
||||
self.stylizer_cache = {}
|
||||
self.oeb = oeb
|
||||
self.opts = context
|
||||
self.profile = context.dest
|
||||
self.images = {}
|
||||
self.dataize_manifest()
|
||||
self.svg_originals = {}
|
||||
self.scan_for_linked_resources_in_manifest()
|
||||
self.rasterize_spine()
|
||||
self.rasterize_cover()
|
||||
for pt in self.temp_files:
|
||||
try:
|
||||
os.remove(pt)
|
||||
except:
|
||||
pass
|
||||
|
||||
def rasterize_svg(self, elem, width=0, height=0, format='PNG'):
|
||||
view_box = elem.get('viewBox', elem.get('viewbox', None))
|
||||
@ -110,38 +123,41 @@ class SVGRasterizer:
|
||||
|
||||
return rasterize_svg(xml2str(elem, with_tail=False), sizes=sizes, width=width, height=height, print=logger.info, fmt=format)
|
||||
|
||||
def dataize_manifest(self):
|
||||
def scan_for_linked_resources_in_manifest(self):
|
||||
for item in self.oeb.manifest.values():
|
||||
if item.media_type == SVG_MIME and item.data is not None:
|
||||
self.dataize_svg(item)
|
||||
self.scan_for_linked_resources_in_svg(item)
|
||||
|
||||
def dataize_svg(self, item, svg=None):
|
||||
def scan_for_linked_resources_in_svg(self, item, svg=None):
|
||||
if svg is None:
|
||||
svg = item.data
|
||||
hrefs = self.oeb.manifest.hrefs
|
||||
ha = XLINK('href')
|
||||
for elem in xpath(svg, '//svg:*[@xl:href]'):
|
||||
href = urlnormalize(elem.attrib[XLINK('href')])
|
||||
href = urlnormalize(elem.get(ha))
|
||||
path = urldefrag(href)[0]
|
||||
if not path:
|
||||
continue
|
||||
abshref = item.abshref(path)
|
||||
if abshref not in hrefs:
|
||||
linkee = hrefs.get(abshref)
|
||||
if linkee is None:
|
||||
continue
|
||||
linkee = hrefs[abshref]
|
||||
data = linkee.bytes_representation
|
||||
ext = what(None, data) or 'jpg'
|
||||
with PersistentTemporaryFile(suffix='.'+ext) as pt:
|
||||
pt.write(data)
|
||||
self.temp_files.append(pt.name)
|
||||
elem.attrib[XLINK('href')] = pt.name
|
||||
ext = what(None, data)
|
||||
if not ext:
|
||||
continue
|
||||
mt = guess_type('file.'+ext)[0]
|
||||
if not mt or not mt.startswith('image/'):
|
||||
continue
|
||||
elem.set(ha, data_url(mt, data))
|
||||
|
||||
return svg
|
||||
|
||||
def stylizer(self, item):
|
||||
ans = self.stylizer_cache.get(item, None)
|
||||
if ans is None:
|
||||
ans = Stylizer(item.data, item.href, self.oeb, self.opts,
|
||||
ans = self.stylizer_cache[item] = Stylizer(item.data, item.href, self.oeb, self.opts,
|
||||
self.profile, base_css=self.base_css)
|
||||
self.stylizer_cache[item] = ans
|
||||
return ans
|
||||
|
||||
def rasterize_spine(self):
|
||||
@ -172,13 +188,19 @@ class SVGRasterizer:
|
||||
height = style['height']
|
||||
width = (width / 72) * self.profile.dpi
|
||||
height = (height / 72) * self.profile.dpi
|
||||
elem = self.dataize_svg(item, elem)
|
||||
self.scan_for_linked_resources_in_svg(item, elem)
|
||||
data = self.rasterize_svg(elem, width, height)
|
||||
manifest = self.oeb.manifest
|
||||
href = os.path.splitext(item.href)[0] + '.png'
|
||||
id, href = manifest.generate(item.id, href)
|
||||
manifest.add(id, href, PNG_MIME, data=data)
|
||||
img = elem.makeelement(XHTML('img'), src=item.relhref(href))
|
||||
if self.save_svg_originals:
|
||||
svg_bytes = etree.tostring(elem, encoding='utf-8', xml_declaration=True, pretty_print=True, with_tail=False)
|
||||
svg_id, svg_href = manifest.generate(item.id, 'inline.svg')
|
||||
manifest.add(svg_id, svg_href, SVG_MIME, data=svg_bytes)
|
||||
self.svg_originals[href] = svg_href
|
||||
img.tail = elem.tail
|
||||
elem.getparent().replace(elem, img)
|
||||
for prop in ('width', 'height'):
|
||||
if prop in elem.attrib:
|
||||
@ -215,6 +237,7 @@ class SVGRasterizer:
|
||||
id, href = manifest.generate(svgitem.id, href)
|
||||
manifest.add(id, href, PNG_MIME, data=data)
|
||||
self.images[key] = href
|
||||
self.svg_originals[href] = svgitem.href
|
||||
elem.tag = XHTML('img')
|
||||
for attr in elem.attrib:
|
||||
if attr not in KEEP_ATTRS:
|
||||
@ -244,3 +267,7 @@ class SVGRasterizer:
|
||||
id, href = self.oeb.manifest.generate(cover.id, href)
|
||||
self.oeb.manifest.add(id, href, PNG_MIME, data=data)
|
||||
covers[0].value = id
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
open('/t/test-svg-rasterization.png', 'wb').write(rasterize_svg())
|
||||
|
Loading…
x
Reference in New Issue
Block a user