From d4eed478b1c8d7fae2d8ce0ddc87f4ad8fc1391c Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Mon, 2 Feb 2009 18:52:59 -0500 Subject: [PATCH] Fix #1715. Improve handling of paths which contain URI reserved characters. --- src/calibre/ebooks/html.py | 9 +++++---- src/calibre/ebooks/oeb/base.py | 5 ++++- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/calibre/ebooks/html.py b/src/calibre/ebooks/html.py index de863cca75..0229fd6124 100644 --- a/src/calibre/ebooks/html.py +++ b/src/calibre/ebooks/html.py @@ -9,7 +9,7 @@ directory or zip file. All the action starts in :function:`create_dir`. ''' import sys, re, os, shutil, logging, tempfile, cStringIO, operator, functools -from urlparse import urlparse +from urlparse import urlparse, urlunparse from urllib import unquote from lxml import etree @@ -98,7 +98,8 @@ class Link(object): @classmethod def url_to_local_path(cls, url, base): - path = url.path + path = urlunparse(('', '', url.path, url.params, url.query, '')) + path = unquote(path) if os.path.isabs(path): return path return os.path.abspath(os.path.join(base, path)) @@ -111,11 +112,11 @@ class Link(object): ''' assert isinstance(url, unicode) and isinstance(base, unicode) self.url = url - self.parsed_url = urlparse(unquote(self.url)) + self.parsed_url = urlparse(self.url) self.is_local = self.parsed_url.scheme in ('', 'file') self.is_internal = self.is_local and not bool(self.parsed_url.path) self.path = None - self.fragment = self.parsed_url.fragment + self.fragment = unquote(self.parsed_url.fragment) if self.is_local and not self.is_internal: self.path = self.url_to_local_path(self.parsed_url, base) diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 7ed56bee0b..94402ae882 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -154,6 +154,9 @@ def urlquote(href): def urlnormalize(href): parts = urlparse(href) + if not parts.scheme: + path, frag = urldefrag(href) + parts = ('', '', path, '', '', frag) parts = (part.replace('\\', '/') for part in parts) parts = (urlunquote(part) for part in parts) parts = (urlquote(part) for part in parts) @@ -1323,7 +1326,7 @@ class OEBBook(object): with TemporaryDirectory('_html_cover') as tdir: writer = DirWriter() writer.dump(self, tdir) - path = os.path.join(tdir, hcover.href) + path = os.path.join(tdir, urlunquote(hcover.href)) renderer = CoverRenderer(path) data = renderer.image_data id, href = self.manifest.generate('cover', 'cover.jpeg')