Fix #1715. Improve handling of paths which contain URI reserved characters.

This commit is contained in:
Marshall T. Vandegrift 2009-02-02 18:52:59 -05:00
parent f524968ce2
commit d4eed478b1
2 changed files with 9 additions and 5 deletions

View File

@ -9,7 +9,7 @@ directory or zip file. All the action starts in :function:`create_dir`.
''' '''
import sys, re, os, shutil, logging, tempfile, cStringIO, operator, functools import sys, re, os, shutil, logging, tempfile, cStringIO, operator, functools
from urlparse import urlparse from urlparse import urlparse, urlunparse
from urllib import unquote from urllib import unquote
from lxml import etree from lxml import etree
@ -98,7 +98,8 @@ class Link(object):
@classmethod @classmethod
def url_to_local_path(cls, url, base): def url_to_local_path(cls, url, base):
path = url.path path = urlunparse(('', '', url.path, url.params, url.query, ''))
path = unquote(path)
if os.path.isabs(path): if os.path.isabs(path):
return path return path
return os.path.abspath(os.path.join(base, path)) return os.path.abspath(os.path.join(base, path))
@ -111,11 +112,11 @@ class Link(object):
''' '''
assert isinstance(url, unicode) and isinstance(base, unicode) assert isinstance(url, unicode) and isinstance(base, unicode)
self.url = url self.url = url
self.parsed_url = urlparse(unquote(self.url)) self.parsed_url = urlparse(self.url)
self.is_local = self.parsed_url.scheme in ('', 'file') self.is_local = self.parsed_url.scheme in ('', 'file')
self.is_internal = self.is_local and not bool(self.parsed_url.path) self.is_internal = self.is_local and not bool(self.parsed_url.path)
self.path = None self.path = None
self.fragment = self.parsed_url.fragment self.fragment = unquote(self.parsed_url.fragment)
if self.is_local and not self.is_internal: if self.is_local and not self.is_internal:
self.path = self.url_to_local_path(self.parsed_url, base) self.path = self.url_to_local_path(self.parsed_url, base)

View File

@ -154,6 +154,9 @@ def urlquote(href):
def urlnormalize(href): def urlnormalize(href):
parts = urlparse(href) parts = urlparse(href)
if not parts.scheme:
path, frag = urldefrag(href)
parts = ('', '', path, '', '', frag)
parts = (part.replace('\\', '/') for part in parts) parts = (part.replace('\\', '/') for part in parts)
parts = (urlunquote(part) for part in parts) parts = (urlunquote(part) for part in parts)
parts = (urlquote(part) for part in parts) parts = (urlquote(part) for part in parts)
@ -1323,7 +1326,7 @@ class OEBBook(object):
with TemporaryDirectory('_html_cover') as tdir: with TemporaryDirectory('_html_cover') as tdir:
writer = DirWriter() writer = DirWriter()
writer.dump(self, tdir) writer.dump(self, tdir)
path = os.path.join(tdir, hcover.href) path = os.path.join(tdir, urlunquote(hcover.href))
renderer = CoverRenderer(path) renderer = CoverRenderer(path)
data = renderer.image_data data = renderer.image_data
id, href = self.manifest.generate('cover', 'cover.jpeg') id, href = self.manifest.generate('cover', 'cover.jpeg')