diff --git a/src/calibre/ebooks/docx/names.py b/src/calibre/ebooks/docx/names.py index 6124eb3941..702177f2b4 100644 --- a/src/calibre/ebooks/docx/names.py +++ b/src/calibre/ebooks/docx/names.py @@ -46,6 +46,7 @@ TRANSITIONAL_NAMESPACES = { 'xml': 'http://www.w3.org/XML/1998/namespace', # Drawing 'a': 'http://schemas.openxmlformats.org/drawingml/2006/main', + 'a14': 'http://schemas.microsoft.com/office/drawing/2010/main', 'm': 'http://schemas.openxmlformats.org/officeDocument/2006/math', 'mv': 'urn:schemas-microsoft-com:mac:vml', 'pic': 'http://schemas.openxmlformats.org/drawingml/2006/picture', @@ -76,6 +77,7 @@ STRICT_NAMESPACES = { for k, v in iteritems(TRANSITIONAL_NAMESPACES) } SVG_BLIP_URI = '{96DAC541-7B7A-43D3-8B79-37D633B846F1}' +USE_LOCAL_DPI_URI = '{28A0092B-C50C-407E-A947-70E740481C1C}' # }}} diff --git a/src/calibre/ebooks/docx/writer/from_html.py b/src/calibre/ebooks/docx/writer/from_html.py index a7ed841b78..7bd5628d8d 100644 --- a/src/calibre/ebooks/docx/writer/from_html.py +++ b/src/calibre/ebooks/docx/writer/from_html.py @@ -440,7 +440,7 @@ class Convert: self.styles_manager = StylesManager(self.docx.namespace, self.log, self.mi.language) self.links_manager = LinksManager(self.docx.namespace, self.docx.document_relationships, self.log) - self.images_manager = ImagesManager(self.oeb, self.docx.document_relationships, self.opts) + self.images_manager = ImagesManager(self.oeb, self.docx.document_relationships, self.opts, self.svg_rasterizer) self.lists_manager = ListsManager(self.docx) self.fonts_manager = FontsManager(self.docx.namespace, self.oeb, self.opts) self.blocks = Blocks(self.docx.namespace, self.styles_manager, self.links_manager) @@ -481,9 +481,7 @@ class Convert: def process_item(self, item): self.current_item = item - stylizer = self.svg_rasterizer.stylizer_cache.get(item) - if stylizer is None: - stylizer = Stylizer(item.data, item.href, self.oeb, self.opts, profile=self.opts.output_profile, base_css=self.base_css) + stylizer = self.svg_rasterizer.stylizer(item) self.abshref = self.images_manager.abshref = item.abshref self.current_lang = lang_for_tag(item.data) or self.styles_manager.document_lang diff --git a/src/calibre/ebooks/docx/writer/images.py b/src/calibre/ebooks/docx/writer/images.py index 18bdf6142a..d29a0d8a41 100644 --- a/src/calibre/ebooks/docx/writer/images.py +++ b/src/calibre/ebooks/docx/writer/images.py @@ -12,11 +12,11 @@ from lxml import etree from calibre import fit_image from calibre.ebooks.docx.images import pt_to_emu +from calibre.ebooks.docx.names import USE_LOCAL_DPI_URI, SVG_BLIP_URI from calibre.ebooks.oeb.base import urlquote, urlunquote from calibre.utils.filenames import ascii_filename from calibre.utils.imghdr import identify from calibre.utils.resources import get_image_path as I -from polyglot.builtins import iteritems, itervalues Image = namedtuple('Image', 'rid fname width height fmt item') @@ -39,13 +39,26 @@ def get_image_margins(style): class ImagesManager: - def __init__(self, oeb, document_relationships, opts): + def __init__(self, oeb, document_relationships, opts, svg_rasterizer): self.oeb, self.log = oeb, oeb.log + self.svg_rasterizer = svg_rasterizer self.page_width, self.page_height = opts.output_profile.width_pts, opts.output_profile.height_pts self.images = {} self.seen_filenames = set() self.document_relationships = document_relationships self.count = 0 + self.svg_images = {} + + def read_svg(self, href): + if href not in self.svg_images: + item = self.oeb.manifest.hrefs.get(href) or self.oeb.manifest.hrefs.get(urlquote(href)) + if item is None: + self.log.warning('Failed to find image:', href) + return + image_fname = 'media/' + self.create_filename(href, 'svg') + image_rid = self.document_relationships.add_image(image_fname) + self.svg_images[href] = Image(image_rid, image_fname, -1, -1, 'svg', item) + return self.svg_images[href] def read_image(self, href): if href not in self.images: @@ -84,6 +97,12 @@ class ImagesManager: def create_image_markup(self, html_img, stylizer, href, as_block=False): # TODO: img inside a link (clickable image) + svg_rid = '' + svghref = self.svg_rasterizer.svg_originals.get(href) + if svghref: + si = self.read_svg(svghref) + if si: + svg_rid = si.rid style = stylizer.style(html_img) floating = style['float'] if floating not in {'left', 'right'}: @@ -134,7 +153,7 @@ class ImagesManager: if fake_margins: # DOCX does not support setting margins for inline images, so we # fake it by using effect extents to simulate margins - makeelement(parent, 'wp:effectExtent', **{k[-1].lower():v for k, v in iteritems(get_image_margins(style))}) + makeelement(parent, 'wp:effectExtent', **{k[-1].lower():v for k, v in get_image_margins(style).items()}) else: makeelement(parent, 'wp:effectExtent', l='0', r='0', t='0', b='0') if floating is not None: @@ -143,10 +162,10 @@ class ImagesManager: makeelement(parent, 'wp:wrapTopAndBottom') else: makeelement(parent, 'wp:wrapSquare', wrapText='bothSides') - self.create_docx_image_markup(parent, name, html_img.get('alt') or name, img.rid, width, height) + self.create_docx_image_markup(parent, name, html_img.get('alt') or name, img.rid, width, height, svg_rid=svg_rid) return ans - def create_docx_image_markup(self, parent, name, alt, img_rid, width, height): + def create_docx_image_markup(self, parent, name, alt, img_rid, width, height, svg_rid=''): makeelement, namespaces = self.document_relationships.namespace.makeelement, self.document_relationships.namespace.namespaces makeelement(parent, 'wp:docPr', id=str(self.count), name=name, descr=alt) makeelement(makeelement(parent, 'wp:cNvGraphicFramePr'), 'a:graphicFrameLocks', noChangeAspect="1") @@ -157,7 +176,11 @@ class ImagesManager: makeelement(nvPicPr, 'pic:cNvPr', id='0', name=name, descr=alt) makeelement(nvPicPr, 'pic:cNvPicPr') bf = makeelement(pic, 'pic:blipFill') - makeelement(bf, 'a:blip', r_embed=img_rid) + blip = makeelement(bf, 'a:blip', r_embed=img_rid) + if svg_rid: + ext_list = makeelement(blip, 'a:extLst') + makeelement(makeelement(ext_list, 'a:ext', uri=USE_LOCAL_DPI_URI), 'a14:useLocalDpi', val='0') + makeelement(makeelement(ext_list, 'a:ext', uri=SVG_BLIP_URI), 'asvg:svgBlip', r_embed=svg_rid) makeelement(makeelement(bf, 'a:stretch'), 'a:fillRect') spPr = makeelement(pic, 'pic:spPr') xfrm = makeelement(spPr, 'a:xfrm') @@ -178,8 +201,10 @@ class ImagesManager: return fname def serialize(self, images_map): - for img in itervalues(self.images): + for img in self.images.values(): images_map['word/' + img.fname] = partial(self.get_data, img.item) + for img in self.svg_images.values(): + images_map['word/' + img.fname] = lambda: img.item.data_as_bytes_or_none def get_data(self, item): try: diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index be1e855cca..74d17da6d3 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -15,6 +15,7 @@ from collections import defaultdict from itertools import count from lxml import etree, html from operator import attrgetter +from typing import Optional from calibre import as_unicode, force_unicode, get_types_map, isbytestring from calibre.constants import __version__, filesystem_encoding @@ -1017,6 +1018,12 @@ class Manifest: # }}} + @property + def data_as_bytes_or_none(self) -> Optional[bytes]: + if self._loader is None: + return None + return self._loader(getattr(self, 'html_input_href', self.href)) + @property def data(self): """Provides MIME type sensitive access to the manifest @@ -1033,10 +1040,7 @@ class Manifest: """ data = self._data if data is None: - if self._loader is None: - return None - data = self._loader(getattr(self, 'html_input_href', - self.href)) + data = self.data_as_bytes_or_none try: mt = self.media_type.lower() except Exception: diff --git a/src/calibre/ebooks/oeb/transforms/rasterize.py b/src/calibre/ebooks/oeb/transforms/rasterize.py index 95396703da..ae5459adb1 100644 --- a/src/calibre/ebooks/oeb/transforms/rasterize.py +++ b/src/calibre/ebooks/oeb/transforms/rasterize.py @@ -5,32 +5,44 @@ SVG rasterization transform. __license__ = 'GPL v3' __copyright__ = '2008, Marshall T. Vandegrift ' -import os, re +import os +import re +from base64 import standard_b64encode +from functools import lru_cache +from lxml import etree from qt.core import ( - Qt, QByteArray, QBuffer, QIODevice, QColor, QImage, QPainter, QSvgRenderer) -from calibre.ebooks.oeb.base import XHTML, XLINK -from calibre.ebooks.oeb.base import SVG_MIME, PNG_MIME -from calibre.ebooks.oeb.base import xml2str, xpath -from calibre.ebooks.oeb.base import urlnormalize + QBuffer, QByteArray, QColor, QImage, QIODevice, QPainter, QSvgRenderer, Qt, +) + +from calibre import guess_type +from calibre.ebooks.oeb.base import ( + PNG_MIME, SVG_MIME, XHTML, XLINK, urlnormalize, xml2str, xpath, +) from calibre.ebooks.oeb.stylizer import Stylizer -from calibre.ptempfile import PersistentTemporaryFile from calibre.utils.imghdr import what from polyglot.urllib import urldefrag IMAGE_TAGS = {XHTML('img'), XHTML('object')} KEEP_ATTRS = {'class', 'style', 'width', 'height', 'align'} -TEST_SVG = b''' - - -''' + +def test_svg(): # {{{ + TEST_PNG_DATA_URI='data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAMAAABEpIrGAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJbWFnZVJlYWR5ccllPAAAAWJQTFRFAAAAAAAAAAAAAAAAAAAAAQEAAgIBAwIBBgQCBwUCCAYDCggECwkEDgsFDwwFEA0GHRcKHxkLIBkLIxwMJR0NJx8OKCAOKCAPKSAPMScSPTAWQTQXQzUYSjsaSjsbSzsbUD8dUUAdVEMeWkggW0ggW0ghW0khXUohYk4ja1Umb1gocVoocVopclspc1spdV0qd18reF8riW0xjXEzl3g2mns3nn04nn45n345oIA6ooE7o4I7pII7pIM7pYQ7p4U8qYY8rYo+s45Bxp1Hx55Hy6FJy6JJzaRJz6RKz6RLz6VL0qdL1KpM1apM1qtN16xN2KxN2K1O2a1O2q1O265P3K9P3bBQ3rBP37FP37FQ37JQ4rNR47VR5LVR7LxV7bxV7r1V7r5W8L9W8MBW8b9V8b9W8cBW8cBX8sBW8sBX8sFW8sFX88BX88FW88FX88FY88JX88JY9MFX9MJX9MJY9MNYSw0rOAAAAAR0Uk5T2+rr8giKtGMAAAFDSURBVDjLhdNFUwNBEIbhJWkkuLu7u5PgHtwWl0CGnW34aJLl/3OgUlRlGfKepqafmstUW1Yw8E9By6IMWVn/z7OsQOpYNrE0H4lEwuFwZHmyLnUb+AUzIiLMItDgrWIfKH3mnz4RA6PX/8Im8xuEgVfxxG33g+rVi9OT46OdPQ0kDgv8gCg3FMrLphkNyCD9BYiIqEErraP5ZrDGDrw2MoIhsPACGUH5g2gVqzWDKQ/gETKCZmHwbo4ZbHhJ1q1kBMMJCKbJCCof35V+qjCDOUCrMTKCFkc8vU5GENpW8NwmMxhVccYsGUHVvWKOFhlBySJicV6u7+7s6Ozq6anxgT44Lwy4jlKK4br96WDl09GA/gA4zp7gLh2MM3MS+EgCGl+iD9JB4cDZzbV9ZV/atn1+frvfaPhuX4HMq0cZsjKt/zfXXmDab9zjGwAAAABJRU5ErkJggg==' + return f''' + + + + '''.encode() +# }}} class Unavailable(Exception): pass -def rasterize_svg(data=TEST_SVG, sizes=(), width=0, height=0, print=None, fmt='PNG', as_qimage=False): +def rasterize_svg(data=None, sizes=(), width=0, height=0, print=None, fmt='PNG', as_qimage=False): + if data is None: + data = test_svg() svg = QSvgRenderer(QByteArray(data)) size = svg.defaultSize() if size.width() == 100 and size.height() == 100 and sizes: @@ -54,10 +66,16 @@ def rasterize_svg(data=TEST_SVG, sizes=(), width=0, height=0, print=None, fmt='P return array.data() +@lru_cache(maxsize=128) +def data_url(mime_type: str, data: bytes) -> str: + return f'data:{mime_type};base64,' + standard_b64encode(data).decode('ascii') + + class SVGRasterizer: - def __init__(self, base_css=''): + def __init__(self, base_css='', save_svg_originals=False): self.base_css = base_css + self.save_svg_originals = save_svg_originals from calibre.gui2 import must_use_qt must_use_qt() @@ -71,20 +89,15 @@ class SVGRasterizer: def __call__(self, oeb, context): oeb.logger.info('Rasterizing SVG images...') - self.temp_files = [] self.stylizer_cache = {} self.oeb = oeb self.opts = context self.profile = context.dest self.images = {} - self.dataize_manifest() + self.svg_originals = {} + self.scan_for_linked_resources_in_manifest() self.rasterize_spine() self.rasterize_cover() - for pt in self.temp_files: - try: - os.remove(pt) - except: - pass def rasterize_svg(self, elem, width=0, height=0, format='PNG'): view_box = elem.get('viewBox', elem.get('viewbox', None)) @@ -110,38 +123,41 @@ class SVGRasterizer: return rasterize_svg(xml2str(elem, with_tail=False), sizes=sizes, width=width, height=height, print=logger.info, fmt=format) - def dataize_manifest(self): + def scan_for_linked_resources_in_manifest(self): for item in self.oeb.manifest.values(): if item.media_type == SVG_MIME and item.data is not None: - self.dataize_svg(item) + self.scan_for_linked_resources_in_svg(item) - def dataize_svg(self, item, svg=None): + def scan_for_linked_resources_in_svg(self, item, svg=None): if svg is None: svg = item.data hrefs = self.oeb.manifest.hrefs + ha = XLINK('href') for elem in xpath(svg, '//svg:*[@xl:href]'): - href = urlnormalize(elem.attrib[XLINK('href')]) + href = urlnormalize(elem.get(ha)) path = urldefrag(href)[0] if not path: continue abshref = item.abshref(path) - if abshref not in hrefs: + linkee = hrefs.get(abshref) + if linkee is None: continue - linkee = hrefs[abshref] data = linkee.bytes_representation - ext = what(None, data) or 'jpg' - with PersistentTemporaryFile(suffix='.'+ext) as pt: - pt.write(data) - self.temp_files.append(pt.name) - elem.attrib[XLINK('href')] = pt.name + ext = what(None, data) + if not ext: + continue + mt = guess_type('file.'+ext)[0] + if not mt or not mt.startswith('image/'): + continue + elem.set(ha, data_url(mt, data)) + return svg def stylizer(self, item): ans = self.stylizer_cache.get(item, None) if ans is None: - ans = Stylizer(item.data, item.href, self.oeb, self.opts, + ans = self.stylizer_cache[item] = Stylizer(item.data, item.href, self.oeb, self.opts, self.profile, base_css=self.base_css) - self.stylizer_cache[item] = ans return ans def rasterize_spine(self): @@ -172,13 +188,19 @@ class SVGRasterizer: height = style['height'] width = (width / 72) * self.profile.dpi height = (height / 72) * self.profile.dpi - elem = self.dataize_svg(item, elem) + self.scan_for_linked_resources_in_svg(item, elem) data = self.rasterize_svg(elem, width, height) manifest = self.oeb.manifest href = os.path.splitext(item.href)[0] + '.png' id, href = manifest.generate(item.id, href) manifest.add(id, href, PNG_MIME, data=data) img = elem.makeelement(XHTML('img'), src=item.relhref(href)) + if self.save_svg_originals: + svg_bytes = etree.tostring(elem, encoding='utf-8', xml_declaration=True, pretty_print=True, with_tail=False) + svg_id, svg_href = manifest.generate(item.id, 'inline.svg') + manifest.add(svg_id, svg_href, SVG_MIME, data=svg_bytes) + self.svg_originals[href] = svg_href + img.tail = elem.tail elem.getparent().replace(elem, img) for prop in ('width', 'height'): if prop in elem.attrib: @@ -215,6 +237,7 @@ class SVGRasterizer: id, href = manifest.generate(svgitem.id, href) manifest.add(id, href, PNG_MIME, data=data) self.images[key] = href + self.svg_originals[href] = svgitem.href elem.tag = XHTML('img') for attr in elem.attrib: if attr not in KEEP_ATTRS: @@ -244,3 +267,7 @@ class SVGRasterizer: id, href = self.oeb.manifest.generate(cover.id, href) self.oeb.manifest.add(id, href, PNG_MIME, data=data) covers[0].value = id + + +if __name__ == '__main__': + open('/t/test-svg-rasterization.png', 'wb').write(rasterize_svg())