From 7ea679768ed07d0ea09b30e5e48d3dad23ae4805 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 23 Jun 2010 14:54:54 -0600 Subject: [PATCH] HTML Input: Handle absolute paths in resource links on windows correctly. Fixes #3031 (HTML input: Improper handling of local URLs) --- src/calibre/ebooks/html/input.py | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py index 6108aa329d..229d71e574 100644 --- a/src/calibre/ebooks/html/input.py +++ b/src/calibre/ebooks/html/input.py @@ -20,7 +20,7 @@ from itertools import izip from calibre.customize.conversion import InputFormatPlugin from calibre.ebooks.chardet import xml_to_unicode from calibre.customize.conversion import OptionRecommendation -from calibre.constants import islinux, isfreebsd +from calibre.constants import islinux, isfreebsd, iswindows from calibre import unicode_path from calibre.utils.localization import get_lang from calibre.utils.filenames import ascii_filename @@ -32,9 +32,14 @@ class Link(object): @classmethod def url_to_local_path(cls, url, base): - path = urlunparse(('', '', url.path, url.params, url.query, '')) + path = url.path + isabs = False + if iswindows and path.startswith('/'): + path = path[1:] + isabs = True + path = urlunparse(('', '', path, url.params, url.query, '')) path = unquote(path) - if os.path.isabs(path): + if isabs or os.path.isabs(path): return path return os.path.abspath(os.path.join(base, path)) @@ -411,10 +416,23 @@ class HTMLInput(InputFormatPlugin): def resource_adder(self, link_, base=None): - link = self.urlnormalize(link_) - link, frag = self.urldefrag(link) - link = unquote(link).replace('/', os.sep) - if not link.strip(): + if not isinstance(link_, unicode): + try: + link_ = link_.decode('utf-8', 'error') + except: + self.log.warn('Failed to decode link %r. Ignoring'%link_) + return link_ + try: + l = Link(link_, base if base else os.path.getcwdu()) + except: + self.log.exception('Failed to process link: %r'%link_) + return link_ + if l.path is None: + # Not a local resource + return link_ + link = l.path.replace('/', os.sep).strip() + frag = l.fragment + if not link: return link_ try: if base and not os.path.isabs(link):