diff --git a/src/libprs500/__init__.py b/src/libprs500/__init__.py index f1547982d4..159d14adf8 100644 --- a/src/libprs500/__init__.py +++ b/src/libprs500/__init__.py @@ -377,3 +377,35 @@ def launch(path_or_url): elif iswindows: import win32api win32api.ShellExecute(0, 'open', path_or_url, None, os.getcwd(), 1) + +def relpath(target, base=os.curdir): + """ + Return a relative path to the target from either the current dir or an optional base dir. + Base can be a directory specified either as absolute or relative to current dir. + """ + + if not os.path.exists(target): + raise OSError, 'Target does not exist: '+target + + if not os.path.isdir(base): + raise OSError, 'Base is not a directory or does not exist: '+base + + base_list = (os.path.abspath(base)).split(os.sep) + target_list = (os.path.abspath(target)).split(os.sep) + + # On the windows platform the target may be on a completely different drive from the base. + if iswindows and base_list[0] <> target_list[0]: + raise OSError, 'Target is on a different drive to base. Target: '+target_list[0].upper()+', base: '+base_list[0].upper() + + # Starting from the filepath root, work out how much of the filepath is + # shared by base and target. + for i in range(min(len(base_list), len(target_list))): + if base_list[i] <> target_list[i]: break + else: + # If we broke out of the loop, i is pointing to the first differing path elements. + # If we didn't break out of the loop, i is pointing to identical path elements. + # Increment i so that in all cases it points to the first differing path elements. + i+=1 + + rel_list = [os.pardir] * (len(base_list)-i) + target_list[i:] + return os.path.join(*rel_list) diff --git a/src/libprs500/web/fetch/simple.py b/src/libprs500/web/fetch/simple.py index 9fe122219f..3d115d4340 100644 --- a/src/libprs500/web/fetch/simple.py +++ b/src/libprs500/web/fetch/simple.py @@ -21,7 +21,7 @@ import sys, socket, os, urlparse, codecs, logging, re, time, copy, urllib2, thre from urllib import url2pathname from httplib import responses -from libprs500 import setup_cli_handlers, browser, sanitize_file_name, OptionParser +from libprs500 import setup_cli_handlers, browser, sanitize_file_name, OptionParser, relpath from libprs500.ebooks.BeautifulSoup import BeautifulSoup, Tag from libprs500.ebooks.chardet import xml_to_unicode @@ -45,27 +45,12 @@ def save_soup(soup, target): meta.replaceWith(nm) selfdir = os.path.dirname(target) - def abs2rel(path, base): - prefix = os.path.commonprefix([path, base]) - if not os.path.exists(prefix) or not os.path.isdir(prefix): - prefix = os.path.dirname(prefix) - prefix = os.path.normpath(prefix) - if prefix.startswith(selfdir): # path is in a subdirectory - return path[len(prefix)+1:] - from_prefix = path[len(prefix)+1:] - left = base - ups = [] - while left != prefix: - left = os.path.split(left)[0] - ups.append('..') - ups.append(from_prefix) - return os.path.join(*ups) for tag in soup.findAll(['img', 'link', 'a']): for key in ('src', 'href'): path = tag.get(key, None) if path and os.path.exists(path) and os.path.isabs(path): - tag[key] = abs2rel(path, selfdir).replace(os.sep, '/') + tag[key] = relpath(path, selfdir).replace(os.sep, '/') f = codecs.open(target, 'w', 'utf-8') f.write(unicode(soup))