newdb: Shorten filenames correctly

Ensure that the total path length is <= PATH_LIMIT * 4 and that no
component is longer than PATH_LIMIT on unix.
This commit is contained in:
Kovid Goyal 2013-08-17 13:43:40 +05:30
parent 7cf205f0c7
commit ff8ecb1d56
2 changed files with 32 additions and 13 deletions

View File

@ -1056,25 +1056,31 @@ class DB(object):
''' '''
Construct the directory name for this book based on its metadata. Construct the directory name for this book based on its metadata.
''' '''
author = ascii_filename(author book_id = ' (%d)' % book_id
)[:self.PATH_LIMIT].decode('ascii', 'replace') l = self.PATH_LIMIT - (len(book_id) // 2) - 2
title = ascii_filename(title author = ascii_filename(author)[:l].decode('ascii', 'replace')
)[:self.PATH_LIMIT].decode('ascii', 'replace') title = ascii_filename(title)[:l].decode('ascii', 'replace')
while author[-1] in (' ', '.'): while author[-1] in (' ', '.'):
author = author[:-1] author = author[:-1]
if not author: if not author:
author = ascii_filename(_('Unknown')).decode( author = ascii_filename(_('Unknown')).decode(
'ascii', 'replace') 'ascii', 'replace')
return '%s/%s (%d)'%(author, title, book_id) return '%s/%s%s' % (author, title, book_id)
def construct_file_name(self, book_id, title, author): def construct_file_name(self, book_id, title, author, extlen):
''' '''
Construct the file name for this book based on its metadata. Construct the file name for this book based on its metadata.
''' '''
author = ascii_filename(author extlen = max(extlen, 14) # 14 accounts for ORIGINAL_EPUB
)[:self.PATH_LIMIT].decode('ascii', 'replace') # The PATH_LIMIT on windows already takes into account the doubling
title = ascii_filename(title # (it is used to enforce the total path length limit, individual path
)[:self.PATH_LIMIT].decode('ascii', 'replace') # components can be much longer than the total path length would allow on
# windows).
l = (self.PATH_LIMIT - (extlen // 2) - 2) if iswindows else ((self.PATH_LIMIT - extlen - 2) // 2)
if l < 5:
raise ValueError('Extension length too long: %d' % extlen)
author = ascii_filename(author)[:l].decode('ascii', 'replace')
title = ascii_filename(title)[:l].decode('ascii', 'replace')
name = title + ' - ' + author name = title + ' - ' + author
while name.endswith('.'): while name.endswith('.'):
name = name[:-1] name = name[:-1]
@ -1331,9 +1337,9 @@ class DB(object):
wam.close_handles() wam.close_handles()
def add_format(self, book_id, fmt, stream, title, author, path): def add_format(self, book_id, fmt, stream, title, author, path):
fname = self.construct_file_name(book_id, title, author)
path = os.path.join(self.library_path, path)
fmt = ('.' + fmt.lower()) if fmt else '' fmt = ('.' + fmt.lower()) if fmt else ''
fname = self.construct_file_name(book_id, title, author, len(fmt))
path = os.path.join(self.library_path, path)
dest = os.path.join(path, fname + fmt) dest = os.path.join(path, fname + fmt)
if not os.path.exists(path): if not os.path.exists(path):
os.makedirs(path) os.makedirs(path)
@ -1352,7 +1358,11 @@ class DB(object):
path = self.construct_path_name(book_id, title, author) path = self.construct_path_name(book_id, title, author)
current_path = path_field.for_book(book_id, default_value='') current_path = path_field.for_book(book_id, default_value='')
formats = formats_field.for_book(book_id, default_value=()) formats = formats_field.for_book(book_id, default_value=())
fname = self.construct_file_name(book_id, title, author) try:
extlen = max(len(fmt) for fmt in formats) + 1
except ValueError:
extlen = 10
fname = self.construct_file_name(book_id, title, author, extlen)
# Check if the metadata used to construct paths has changed # Check if the metadata used to construct paths has changed
changed = False changed = False
for fmt in formats: for fmt in formats:

View File

@ -97,3 +97,12 @@ class FilesystemTest(BaseTest):
self.assertEqual(all_ids, cache.all_book_ids()) self.assertEqual(all_ids, cache.all_book_ids())
cache.backend.close() cache.backend.close()
def test_long_filenames(self):
' Test long file names '
cache = self.init_cache()
cache.set_field('title', {1:'a'*10000})
self.assertLessEqual(len(cache.field_for('path', 1)), cache.backend.PATH_LIMIT * 2)
cache.set_field('authors', {1:'b'*10000})
self.assertLessEqual(len(cache.field_for('path', 1)), cache.backend.PATH_LIMIT * 2)
fpath = cache.format_abspath(1, cache.formats(1)[0])
self.assertLessEqual(len(fpath), len(cache.backend.library_path) + cache.backend.PATH_LIMIT * 4)