Conversion: Convert images encoded as data URIs in the input document into normal images, as some ebook reader software cannot handle data URIs. Fixes #1405803 [Can't see images in *.mobi in Kindle after convertation](https://bugs.launchpad.net/calibre/+bug/1405803)

This commit is contained in:
Kovid Goyal 2014-12-27 10:19:39 +05:30
parent b055c70cb5
commit 9bb34dffa3
2 changed files with 52 additions and 0 deletions

View File

@ -1067,6 +1067,8 @@ OptionRecommendation(name='search_replace',
self.oeb.plumber_output_format = self.output_fmt or ''
from calibre.ebooks.oeb.transforms.data_url import DataURL
DataURL()(self.oeb, self.opts)
from calibre.ebooks.oeb.transforms.guide import Clean
Clean()(self.oeb, self.opts)
pr(0.1)

View File

@ -0,0 +1,50 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
import re
from calibre.ebooks.oeb.base import XPath, urlunquote
class DataURL(object):
def __call__(self, oeb, opts):
self.log = oeb.log
attr_path = XPath('//h:img[@src]')
for item in oeb.spine:
root = item.data
if not hasattr(root, 'xpath'):
continue
for img in attr_path(root):
raw = img.get('src')
if not raw.startswith('data:'):
continue
header, data = raw.partition(',')[0::2]
if not header.startswith('data:image/') or not data:
continue
if ';base64' in header:
data = re.sub(r'\s+', '', data)
from base64 import b64decode
try:
data = b64decode(data)
except Exception:
self.log.error('Found invalid base64 encoded data URI, ignoring it')
continue
else:
data = urlunquote(data)
from imghdr import what
fmt = what(None, data)
if not fmt:
self.log.warn('Image encoded as data URL has unknown format, ignoring')
continue
img.set('src', item.relhref(self.convert_image_data_uri(data, fmt, oeb)))
def convert_image_data_uri(self, data, fmt, oeb):
self.log('Found image encoded as data URI converting it to normal image')
from calibre import guess_type
item_id, item_href = oeb.manifest.generate('data-url-image', 'data-url-image.' + fmt)
oeb.manifest.add(item_id, item_href, guess_type(item_href)[0], data=data)
return item_href