From 7dd371e5e37465475164a5475ca177abde84212d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 22 Jan 2012 16:32:20 +0530 Subject: [PATCH] =?UTF-8?q?HTML=20Input:=20Fix=20handling=20of=20files=20w?= =?UTF-8?q?ith=20=C3=A4=20characters=20in=20their=20filenames.=20Fixes=20#?= =?UTF-8?q?919931=20(ebook-convert=20crash=20on=20converting=20from=20html?= =?UTF-8?q?-source)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/calibre/ebooks/html/input.py | 4 +++- src/calibre/ebooks/oeb/base.py | 17 +++++++++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py index d60baf8bce..d303dd66a5 100644 --- a/src/calibre/ebooks/html/input.py +++ b/src/calibre/ebooks/html/input.py @@ -475,7 +475,9 @@ class HTMLInput(InputFormatPlugin): # bhref refers to an already existing file. The read() method of # DirContainer will call unquote on it before trying to read the # file, therefore we quote it here. - item.html_input_href = quote(bhref) + if isinstance(bhref, unicode): + bhref = bhref.encode('utf-8') + item.html_input_href = quote(bhref).decode('utf-8') if guessed in self.OEB_STYLES: item.override_css_fetch = partial( self.css_import_handler, os.path.dirname(link)) diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index bc01cc13cd..8924f1e913 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -425,15 +425,24 @@ class DirContainer(object): self.opfname = path return + def _unquote(self, path): + # urlunquote must run on a bytestring and will return a bytestring + # if it runs on a unicode object, it returns a double encoded unicode + # string: unquote(u'%C3%A4') != unquote(b'%C3%A4').decode('utf-8') + # and the latter is correct + if isinstance(path, unicode): + path = path.encode('utf-8') + return urlunquote(path).decode('utf-8') + def read(self, path): if path is None: path = self.opfname - path = os.path.join(self.rootdir, path) - with open(urlunquote(path), 'rb') as f: + path = os.path.join(self.rootdir, self._unquote(path)) + with open(path, 'rb') as f: return f.read() def write(self, path, data): - path = os.path.join(self.rootdir, urlunquote(path)) + path = os.path.join(self.rootdir, self._unquote(path)) dir = os.path.dirname(path) if not os.path.isdir(dir): os.makedirs(dir) @@ -442,7 +451,7 @@ class DirContainer(object): def exists(self, path): try: - path = os.path.join(self.rootdir, urlunquote(path)) + path = os.path.join(self.rootdir, self._unquote(path)) except ValueError: #Happens if path contains quoted special chars return False return os.path.isfile(path)