Fix #1715. Improve handling of paths which contain URI reserved characters.

This commit is contained in:
Marshall T. Vandegrift 2009-02-02 18:52:59 -05:00
parent f524968ce2
commit d4eed478b1
2 changed files with 9 additions and 5 deletions

View File

@ -9,7 +9,7 @@ directory or zip file. All the action starts in :function:`create_dir`.
'''
import sys, re, os, shutil, logging, tempfile, cStringIO, operator, functools
from urlparse import urlparse
from urlparse import urlparse, urlunparse
from urllib import unquote
from lxml import etree
@ -98,7 +98,8 @@ class Link(object):
@classmethod
def url_to_local_path(cls, url, base):
path = url.path
path = urlunparse(('', '', url.path, url.params, url.query, ''))
path = unquote(path)
if os.path.isabs(path):
return path
return os.path.abspath(os.path.join(base, path))
@ -111,11 +112,11 @@ class Link(object):
'''
assert isinstance(url, unicode) and isinstance(base, unicode)
self.url = url
self.parsed_url = urlparse(unquote(self.url))
self.parsed_url = urlparse(self.url)
self.is_local = self.parsed_url.scheme in ('', 'file')
self.is_internal = self.is_local and not bool(self.parsed_url.path)
self.path = None
self.fragment = self.parsed_url.fragment
self.fragment = unquote(self.parsed_url.fragment)
if self.is_local and not self.is_internal:
self.path = self.url_to_local_path(self.parsed_url, base)

View File

@ -154,6 +154,9 @@ def urlquote(href):
def urlnormalize(href):
parts = urlparse(href)
if not parts.scheme:
path, frag = urldefrag(href)
parts = ('', '', path, '', '', frag)
parts = (part.replace('\\', '/') for part in parts)
parts = (urlunquote(part) for part in parts)
parts = (urlquote(part) for part in parts)
@ -1323,7 +1326,7 @@ class OEBBook(object):
with TemporaryDirectory('_html_cover') as tdir:
writer = DirWriter()
writer.dump(self, tdir)
path = os.path.join(tdir, hcover.href)
path = os.path.join(tdir, urlunquote(hcover.href))
renderer = CoverRenderer(path)
data = renderer.image_data
id, href = self.manifest.generate('cover', 'cover.jpeg')