Fix #7250 ("Save to disk" cannot save non-ASCII file name correctly if not convert to ASCII)

This commit is contained in:
Kovid Goyal 2011-03-06 11:42:22 -07:00
parent 43fcdf8ca9
commit 56a89ca29f
3 changed files with 32 additions and 51 deletions

View File

@ -61,8 +61,9 @@ def osx_version():
if m:
return int(m.group(1)), int(m.group(2)), int(m.group(3))
_filename_sanitize = re.compile(r'[\xae\0\\|\?\*<":>\+/]')
_filename_sanitize_unicode = frozenset([u'\\', u'|', u'?', u'*', u'<',
u'"', u':', u'>', u'+', u'/'] + list(map(unichr, xrange(32))))
def sanitize_file_name(name, substitute='_', as_unicode=False):
'''
@ -85,6 +86,31 @@ def sanitize_file_name(name, substitute='_', as_unicode=False):
# Windows doesn't like path components that end with a period
if one.endswith('.'):
one = one[:-1]+'_'
# Names starting with a period are hidden on Unix
if one.startswith('.'):
one = '_' + one[1:]
return one
def sanitize_file_name_unicode(name, substitute='_'):
'''
Sanitize the filename `name`. All invalid characters are replaced by `substitute`.
The set of invalid characters is the union of the invalid characters in Windows,
OS X and Linux. Also removes leading and trailing whitespace.
**WARNING:** This function also replaces path separators, so only pass file names
and not full paths to it.
'''
if not isinstance(name, unicode):
return sanitize_file_name(name, substitute=substitute, as_unicode=True)
chars = [substitute if c in _filename_sanitize_unicode else c for c in
name]
one = u''.join(chars)
one = re.sub(r'\s', ' ', one).strip()
one = re.sub(r'^\.+$', '_', one)
one = one.replace('..', substitute)
# Windows doesn't like path components that end with a period or space
if one and one[-1] in ('.', ' '):
one = one[:-1]+'_'
# Names starting with a period are hidden on Unix
if one.startswith('.'):
one = '_' + one[1:]
return one

View File

@ -12,13 +12,13 @@ from calibre.constants import DEBUG
from calibre.utils.config import Config, StringConfig, tweaks
from calibre.utils.formatter import TemplateFormatter
from calibre.utils.filenames import shorten_components_to, supports_long_names, \
ascii_filename, sanitize_file_name
ascii_filename
from calibre.ebooks.metadata.opf2 import metadata_to_opf
from calibre.ebooks.metadata.meta import set_metadata
from calibre.constants import preferred_encoding, filesystem_encoding
from calibre.constants import preferred_encoding
from calibre.ebooks.metadata import fmt_sidx
from calibre.ebooks.metadata import title_sort
from calibre import strftime, prints
from calibre import strftime, prints, sanitize_file_name_unicode
plugboard_any_device_value = 'any device'
plugboard_any_format_value = 'any format'
@ -197,12 +197,10 @@ def get_components(template, mi, id, timefmt='%b %Y', length=250,
format_args[key] = ''
components = SafeFormat().safe_format(template, format_args,
'G_C-EXCEPTION!', mi)
components = [x.strip() for x in components.split('/') if x.strip()]
components = [x.strip() for x in components.split('/')]
components = [sanitize_func(x) for x in components if x]
if not components:
components = [str(id)]
components = [x.encode(filesystem_encoding, 'replace') if isinstance(x,
unicode) else x for x in components]
if to_lowercase:
components = [x.lower() for x in components]
if replace_whitespace:
@ -247,7 +245,7 @@ def do_save_book_to_disk(id_, mi, cover, plugboards,
return True, id_, mi.title
components = get_components(opts.template, mi, id_, opts.timefmt, length,
ascii_filename if opts.asciiize else sanitize_file_name,
ascii_filename if opts.asciiize else sanitize_file_name_unicode,
to_lowercase=opts.to_lowercase,
replace_whitespace=opts.replace_whitespace)
base_path = os.path.join(root, *components)
@ -329,8 +327,6 @@ def do_save_book_to_disk(id_, mi, cover, plugboards,
def _sanitize_args(root, opts):
if opts is None:
opts = config().parse()
if isinstance(root, unicode):
root = root.encode(filesystem_encoding)
root = os.path.abspath(root)
opts.template = preprocess_template(opts.template)

View File

@ -72,47 +72,6 @@ if not _run_once:
pass
################################################################################
# Improve builtin path functions to handle unicode sensibly
_abspath = os.path.abspath
def my_abspath(path, encoding=sys.getfilesystemencoding()):
'''
Work around for buggy os.path.abspath. This function accepts either byte strings,
in which it calls os.path.abspath, or unicode string, in which case it first converts
to byte strings using `encoding`, calls abspath and then decodes back to unicode.
'''
to_unicode = False
if encoding is None:
encoding = preferred_encoding
if isinstance(path, unicode):
path = path.encode(encoding)
to_unicode = True
res = _abspath(path)
if to_unicode:
res = res.decode(encoding)
return res
os.path.abspath = my_abspath
_join = os.path.join
def my_join(a, *p):
encoding=sys.getfilesystemencoding()
if not encoding:
encoding = preferred_encoding
p = [a] + list(p)
_unicode = False
for i in p:
if isinstance(i, unicode):
_unicode = True
break
p = [i.encode(encoding) if isinstance(i, unicode) else i for i in p]
res = _join(*p)
if _unicode:
res = res.decode(encoding)
return res
os.path.join = my_join
def local_open(name, mode='r', bufsize=-1):
'''