From 9bb34dffa3fdcb1bc40b19998741281b31bcf3bf Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 27 Dec 2014 10:19:39 +0530 Subject: [PATCH] Conversion: Convert images encoded as data URIs in the input document into normal images, as some ebook reader software cannot handle data URIs. Fixes #1405803 [Can't see images in *.mobi in Kindle after convertation](https://bugs.launchpad.net/calibre/+bug/1405803) --- src/calibre/ebooks/conversion/plumber.py | 2 + src/calibre/ebooks/oeb/transforms/data_url.py | 50 +++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 src/calibre/ebooks/oeb/transforms/data_url.py diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 227c0c87b1..5c217a227a 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -1067,6 +1067,8 @@ OptionRecommendation(name='search_replace', self.oeb.plumber_output_format = self.output_fmt or '' + from calibre.ebooks.oeb.transforms.data_url import DataURL + DataURL()(self.oeb, self.opts) from calibre.ebooks.oeb.transforms.guide import Clean Clean()(self.oeb, self.opts) pr(0.1) diff --git a/src/calibre/ebooks/oeb/transforms/data_url.py b/src/calibre/ebooks/oeb/transforms/data_url.py new file mode 100644 index 0000000000..f03eb723ca --- /dev/null +++ b/src/calibre/ebooks/oeb/transforms/data_url.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2014, Kovid Goyal ' + +import re +from calibre.ebooks.oeb.base import XPath, urlunquote + +class DataURL(object): + + def __call__(self, oeb, opts): + self.log = oeb.log + attr_path = XPath('//h:img[@src]') + for item in oeb.spine: + root = item.data + if not hasattr(root, 'xpath'): + continue + for img in attr_path(root): + raw = img.get('src') + if not raw.startswith('data:'): + continue + header, data = raw.partition(',')[0::2] + if not header.startswith('data:image/') or not data: + continue + if ';base64' in header: + data = re.sub(r'\s+', '', data) + from base64 import b64decode + try: + data = b64decode(data) + except Exception: + self.log.error('Found invalid base64 encoded data URI, ignoring it') + continue + else: + data = urlunquote(data) + from imghdr import what + fmt = what(None, data) + if not fmt: + self.log.warn('Image encoded as data URL has unknown format, ignoring') + continue + img.set('src', item.relhref(self.convert_image_data_uri(data, fmt, oeb))) + + def convert_image_data_uri(self, data, fmt, oeb): + self.log('Found image encoded as data URI converting it to normal image') + from calibre import guess_type + item_id, item_href = oeb.manifest.generate('data-url-image', 'data-url-image.' + fmt) + oeb.manifest.add(item_id, item_href, guess_type(item_href)[0], data=data) + return item_href