From c049052dc9cd7158b5bc760e9963c301b414d402 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 17 Apr 2023 17:15:58 +0530 Subject: [PATCH] Add a test to ensure restoring a db from folders preserves extra files Also clean up the restore folder scanning logic a bit --- src/calibre/db/restore.py | 41 +++++++++++++++++++++------------ src/calibre/db/tests/writing.py | 27 ++++++++++++++++++++++ 2 files changed, 53 insertions(+), 15 deletions(-) diff --git a/src/calibre/db/restore.py b/src/calibre/db/restore.py index 3bc736de3c..24c0f1db37 100644 --- a/src/calibre/db/restore.py +++ b/src/calibre/db/restore.py @@ -8,6 +8,7 @@ __docformat__ = 'restructuredtext en' import os import re import shutil +import sys import time import traceback from contextlib import suppress @@ -16,7 +17,7 @@ from threading import Thread from calibre import force_unicode, isbytestring from calibre.constants import filesystem_encoding -from calibre.db.backend import DB, TRASH_DIR_NAME, DBPrefs +from calibre.db.backend import DB, METADATA_FILE_NAME, TRASH_DIR_NAME, DBPrefs from calibre.db.cache import Cache from calibre.ebooks.metadata.opf2 import OPF from calibre.ptempfile import TemporaryDirectory @@ -29,7 +30,7 @@ NON_EBOOK_EXTENSIONS = frozenset(( def read_opf(dirpath, read_annotations=True): - opf = os.path.join(dirpath, 'metadata.opf') + opf = os.path.join(dirpath, METADATA_FILE_NAME) parsed_opf = OPF(opf, basedir=dirpath) mi = parsed_opf.to_book_metadata() annotations = tuple(parsed_opf.read_annotations()) if read_annotations else () @@ -74,7 +75,6 @@ class Restore(Thread): if not callable(self.progress_callback): self.progress_callback = lambda x, y: x self.dirs = [] - self.ignored_dirs = [] self.failed_dirs = [] self.books = [] self.conflicting_custom_cols = {} @@ -183,26 +183,37 @@ class Restore(Thread): dirnames.remove(TRASH_DIR_NAME) leaf = os.path.basename(dirpath) m = self.db_id_regexp.search(leaf) - if m is None or 'metadata.opf' not in filenames: - self.ignored_dirs.append(dirpath) + if m is None or METADATA_FILE_NAME not in filenames: continue - self.dirs.append((dirpath, filenames, m.group(1))) + self.dirs.append((dirpath, list(dirnames), filenames, m.group(1))) + del dirnames[:] self.progress_callback(None, len(self.dirs)) - for i, x in enumerate(self.dirs): - dirpath, filenames, book_id = x + for i, (dirpath, dirnames, filenames, book_id) in enumerate(self.dirs): try: - self.process_dir(dirpath, filenames, book_id) - except: + self.process_dir(dirpath, dirnames, filenames, book_id) + except Exception: self.failed_dirs.append((dirpath, traceback.format_exc())) self.progress_callback(_('Processed') + ' ' + dirpath, i+1) - def process_dir(self, dirpath, filenames, book_id): + def process_dir(self, dirpath, dirnames, filenames, book_id): book_id = int(book_id) - formats = list(filter(is_ebook_file, filenames)) - fmts = [os.path.splitext(x)[1][1:].upper() for x in formats] - sizes = [os.path.getsize(os.path.join(dirpath, x)) for x in formats] - names = [os.path.splitext(x)[0] for x in formats] + def safe_mtime(path): + with suppress(OSError): + return os.path.getmtime(path) + return sys.maxsize + + filenames.sort(key=lambda f: safe_mtime(os.path.join(dirpath, filenames))) + fmt_map = {} + fmts, formats, sizes, names = [], [], [], [] + for x in filenames: + if is_ebook_file(x): + fmt = os.path.splitext(x)[1][1:].upper() + if fmt and fmt_map.setdefault(fmt, x) is x: + formats.append(x) + sizes.append(os.path.getsize(os.path.join(dirpath, x))) + names.append(os.path.splitext(x)[0]) + fmts.append(fmt) mi, timestamp, annotations = read_opf(dirpath) path = os.path.relpath(dirpath, self.src_library_path).replace(os.sep, '/') diff --git a/src/calibre/db/tests/writing.py b/src/calibre/db/tests/writing.py index ab27e50fa4..ccffb79227 100644 --- a/src/calibre/db/tests/writing.py +++ b/src/calibre/db/tests/writing.py @@ -5,6 +5,7 @@ __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal ' __docformat__ = 'restructuredtext en' +import os from collections import namedtuple from functools import partial from io import BytesIO @@ -368,6 +369,22 @@ class WritingTest(BaseTest): af(mb.is_alive()) from calibre.ebooks.metadata.opf2 import OPF book_ids = (1,2,3) + + def read_all_formats(): + fbefore = {} + for book_id in book_ids: + ff = fbefore[book_id] = {} + for fmt in cache.formats(book_id): + ff[fmt] = cache.format(book_id, fmt) + return fbefore + + def read_all_extra_files(book_id=1): + ans = {} + bp = cache.field_for('path', book_id) + for (relpath, fobj, mtime) in cache.backend.iter_extra_files(book_id, bp, cache.fields['formats']): + ans[relpath] = fobj.read() + return ans + for book_id in book_ids: raw = cache.read_backup(book_id) opf = OPF(BytesIO(raw)) @@ -376,6 +393,14 @@ class WritingTest(BaseTest): tested_fields = 'title authors tags'.split() before = {f:cache.all_field_for(f, book_ids) for f in tested_fields} lbefore = tuple(cache.get_all_link_maps_for_book(i) for i in book_ids) + fbefore = read_all_formats() + bookdir = os.path.dirname(cache.format_abspath(1, '__COVER_INTERNAL__')) + with open(os.path.join(bookdir, 'exf'), 'w') as f: + f.write('exf') + os.mkdir(os.path.join(bookdir, 'sub')) + with open(os.path.join(bookdir, 'sub', 'recurse'), 'w') as f: + f.write('recurse') + ebefore = read_all_extra_files() cache.close() from calibre.db.restore import Restore restorer = Restore(cl) @@ -385,6 +410,8 @@ class WritingTest(BaseTest): cache = self.init_cache(cl) ae(before, {f:cache.all_field_for(f, book_ids) for f in tested_fields}) ae(lbefore, tuple(cache.get_all_link_maps_for_book(i) for i in book_ids)) + ae(fbefore, read_all_formats()) + ae(ebefore, read_all_extra_files()) # }}} def test_set_cover(self): # {{{