Edit book: When downloading external resources, also convert data URLs into files. Fixes #1774945 [[Enhancement] Image DataURI](https://bugs.launchpad.net/calibre/+bug/1774945)

This commit is contained in:
Kovid Goyal 2018-06-12 17:59:32 +05:30
parent da35f86fa9
commit c82ee257dd
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 40 additions and 9 deletions

View File

@ -2,20 +2,27 @@
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import (unicode_literals, division, absolute_import,
print_function)
import shutil, os, posixpath, cgi, mimetypes
from __future__ import absolute_import, division, print_function, unicode_literals
import cgi
import mimetypes
import os
import posixpath
import re
import shutil
from base64 import standard_b64decode
from collections import defaultdict
from contextlib import closing
from urlparse import urlparse
from multiprocessing.dummy import Pool
from functools import partial
from io import BytesIO
from multiprocessing.dummy import Pool
from tempfile import NamedTemporaryFile
from urllib2 import urlopen
from urlparse import urlparse
from calibre import as_unicode, sanitize_file_name2
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, barename, iterlinks
from calibre.ebooks.oeb.polish.utils import guess_type
from calibre.ebooks.oeb.base import OEB_DOCS, iterlinks, barename, OEB_STYLES
from calibre.ptempfile import TemporaryDirectory
from calibre.web import get_download_filename_from_response
@ -25,7 +32,7 @@ def is_external(url):
purl = urlparse(url)
except Exception:
return False
return purl.scheme in ('http', 'https', 'file', 'ftp')
return purl.scheme in ('http', 'https', 'file', 'ftp', 'data')
def iterhtmllinks(container, name):
@ -104,6 +111,24 @@ def download_one(tdir, timeout, progress_report, url):
src = lopen(purl.path, 'rb')
filename = os.path.basename(src)
sz = (src.seek(0, os.SEEK_END), src.tell(), src.seek(0))[1]
elif purl.scheme == 'data':
prefix, payload = purl.path.split(',', 1)
parts = prefix.split(';')
if parts and parts[-1].lower() == 'base64':
payload = re.sub(r'\s+', '', payload)
payload = standard_b64decode(payload)
else:
payload = payload.encode('utf-8')
src = BytesIO(payload)
sz = len(payload)
ext = 'unknown'
for x in parts:
if '=' not in x and '/' in x:
exts = mimetypes.guess_all_extensions(x)
if exts:
ext = exts[0]
break
filename = 'data-uri.' + ext
else:
src = urlopen(url, timeout=timeout)
filename = get_filename(purl, src)

View File

@ -42,14 +42,20 @@ class ChooseResources(QWidget):
@property
def resources(self):
return {i.text():self.original_resources[i.text()] for i in self if i.checkState() == Qt.Checked}
return {i.data(Qt.UserRole):self.original_resources[i.data(Qt.UserRole)] for i in self if i.checkState() == Qt.Checked}
@resources.setter
def resources(self, resources):
self.items.clear()
self.original_resources = resources
dc = 0
for url in resources:
i = QListWidgetItem(url, self.items)
text = url
if text.startswith('data:'):
dc += 1
text = _('Data URL ({})').format(dc)
i = QListWidgetItem(text, self.items)
i.setData(Qt.UserRole, url)
i.setCheckState(Qt.Checked)
i.setFlags(Qt.ItemIsUserCheckable | Qt.ItemIsEnabled)