diff --git a/src/calibre/ebooks/oeb/polish/download.py b/src/calibre/ebooks/oeb/polish/download.py index c27c09a709..3c0c757ce2 100644 --- a/src/calibre/ebooks/oeb/polish/download.py +++ b/src/calibre/ebooks/oeb/polish/download.py @@ -2,20 +2,27 @@ # vim:fileencoding=utf-8 # License: GPLv3 Copyright: 2016, Kovid Goyal -from __future__ import (unicode_literals, division, absolute_import, - print_function) -import shutil, os, posixpath, cgi, mimetypes +from __future__ import absolute_import, division, print_function, unicode_literals + +import cgi +import mimetypes +import os +import posixpath +import re +import shutil +from base64 import standard_b64decode from collections import defaultdict from contextlib import closing -from urlparse import urlparse -from multiprocessing.dummy import Pool from functools import partial +from io import BytesIO +from multiprocessing.dummy import Pool from tempfile import NamedTemporaryFile from urllib2 import urlopen +from urlparse import urlparse from calibre import as_unicode, sanitize_file_name2 +from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, barename, iterlinks from calibre.ebooks.oeb.polish.utils import guess_type -from calibre.ebooks.oeb.base import OEB_DOCS, iterlinks, barename, OEB_STYLES from calibre.ptempfile import TemporaryDirectory from calibre.web import get_download_filename_from_response @@ -25,7 +32,7 @@ def is_external(url): purl = urlparse(url) except Exception: return False - return purl.scheme in ('http', 'https', 'file', 'ftp') + return purl.scheme in ('http', 'https', 'file', 'ftp', 'data') def iterhtmllinks(container, name): @@ -104,6 +111,24 @@ def download_one(tdir, timeout, progress_report, url): src = lopen(purl.path, 'rb') filename = os.path.basename(src) sz = (src.seek(0, os.SEEK_END), src.tell(), src.seek(0))[1] + elif purl.scheme == 'data': + prefix, payload = purl.path.split(',', 1) + parts = prefix.split(';') + if parts and parts[-1].lower() == 'base64': + payload = re.sub(r'\s+', '', payload) + payload = standard_b64decode(payload) + else: + payload = payload.encode('utf-8') + src = BytesIO(payload) + sz = len(payload) + ext = 'unknown' + for x in parts: + if '=' not in x and '/' in x: + exts = mimetypes.guess_all_extensions(x) + if exts: + ext = exts[0] + break + filename = 'data-uri.' + ext else: src = urlopen(url, timeout=timeout) filename = get_filename(purl, src) diff --git a/src/calibre/gui2/tweak_book/download.py b/src/calibre/gui2/tweak_book/download.py index 984228019d..abfd044ad7 100644 --- a/src/calibre/gui2/tweak_book/download.py +++ b/src/calibre/gui2/tweak_book/download.py @@ -42,14 +42,20 @@ class ChooseResources(QWidget): @property def resources(self): - return {i.text():self.original_resources[i.text()] for i in self if i.checkState() == Qt.Checked} + return {i.data(Qt.UserRole):self.original_resources[i.data(Qt.UserRole)] for i in self if i.checkState() == Qt.Checked} @resources.setter def resources(self, resources): self.items.clear() self.original_resources = resources + dc = 0 for url in resources: - i = QListWidgetItem(url, self.items) + text = url + if text.startswith('data:'): + dc += 1 + text = _('Data URL ({})').format(dc) + i = QListWidgetItem(text, self.items) + i.setData(Qt.UserRole, url) i.setCheckState(Qt.Checked) i.setFlags(Qt.ItemIsUserCheckable | Qt.ItemIsEnabled)