Add support for FTS index to export/import

This commit is contained in:
Kovid Goyal 2022-06-20 14:51:06 +05:30
parent e111569c99
commit 340241e5d7
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
4 changed files with 67 additions and 17 deletions

View File

@ -16,7 +16,7 @@ import shutil
import sys
import time
import uuid
from contextlib import suppress
from contextlib import suppress, closing
from functools import partial
from calibre import as_unicode, force_unicode, isbytestring, prints
@ -1255,7 +1255,6 @@ class DB:
def dump_and_restore(self, callback=None, sql=None):
import codecs
from apsw import Shell
from contextlib import closing
if callback is None:
callback = lambda x: x
uv = int(self.user_version)
@ -2184,14 +2183,18 @@ class DB:
self.executemany('INSERT INTO data (book,format,uncompressed_size,name) VALUES (?,?,?,?)', vals)
def backup_database(self, path):
dest_db = apsw.Connection(path)
with closing(apsw.Connection(path)) as dest_db:
with dest_db.backup('main', self.conn, 'main') as b:
while not b.done:
try:
b.step(100)
except apsw.BusyError:
pass
with suppress(apsw.BusyError):
b.step(128)
dest_db.cursor().execute('DELETE FROM metadata_dirtied; VACUUM;')
dest_db.close()
def backup_fts_database(self, path):
with closing(apsw.Connection(path)) as dest_db:
with dest_db.backup('main', self.conn, 'fts_db') as b:
while not b.done:
with suppress(apsw.BusyError):
b.step(128)
dest_db.cursor().execute('VACUUM;')
# }}}

View File

@ -473,6 +473,8 @@ class Cache:
self.fts_job_queue.put(None)
self.fts_queue_thread = None
self.fts_job_queue = Queue()
if fts:
self._update_fts_indexing_numbers()
return fts
@write_api
@ -497,6 +499,7 @@ class Cache:
if not path or not is_fmt_ok(fmt):
with self.write_lock:
self.backend.remove_dirty_fts(book_id, fmt)
self._update_fts_indexing_numbers()
return True
with self.read_lock, open(path, 'rb') as src, PersistentTemporaryFile(suffix=f'.{fmt.lower()}') as pt:
@ -2617,7 +2620,9 @@ class Cache:
key_prefix = as_hex_unicode(library_key)
book_ids = self._all_book_ids()
total = len(book_ids) + 1
format_metadata = {}
has_fts = self.is_fts_enabled()
if has_fts:
total += 1
if progress is not None:
progress('metadata.db', 0, total)
pt = PersistentTemporaryFile('-export.db')
@ -2627,12 +2632,28 @@ class Cache:
with lopen(pt.name, 'rb') as f:
exporter.add_file(f, dbkey)
os.remove(pt.name)
poff = 1
if has_fts:
poff += 1
if progress is not None:
progress('full-text-search.db', 1, total)
pt = PersistentTemporaryFile('-export.db')
pt.close()
self.backend.backup_fts_database(pt.name)
ftsdbkey = key_prefix + ':::' + 'full-text-search.db'
with lopen(pt.name, 'rb') as f:
exporter.add_file(f, ftsdbkey)
os.remove(pt.name)
format_metadata = {}
metadata = {'format_data':format_metadata, 'metadata.db':dbkey, 'total':total}
if has_fts:
metadata['full-text-search.db'] = ftsdbkey
for i, book_id in enumerate(book_ids):
if abort is not None and abort.is_set():
return
if progress is not None:
progress(self._field_for('title', book_id), i + 1, total)
progress(self._field_for('title', book_id), i + poff, total)
format_metadata[book_id] = {}
for fmt in self._formats(book_id):
mdata = self.format_metadata(book_id, fmt)
@ -2743,6 +2764,7 @@ def import_library(library_key, importer, library_path, progress=None, abort=Non
from calibre.db.backend import DB
metadata = importer.metadata[library_key]
total = metadata['total']
poff = 1
if progress is not None:
progress('metadata.db', 0, total)
if abort is not None and abort.is_set():
@ -2751,6 +2773,16 @@ def import_library(library_key, importer, library_path, progress=None, abort=Non
src = importer.start_file(metadata['metadata.db'], 'metadata.db for ' + library_path)
shutil.copyfileobj(src, f)
src.close()
if 'full-text-search.db' in metadata:
if progress is not None:
progress('full-text-search.db', 1, total)
if abort is not None and abort.is_set():
return
poff += 1
with open(os.path.join(library_path, 'full-text-search.db'), 'wb') as f:
src = importer.start_file(metadata['full-text-search.db'], 'full-text-search.db for ' + library_path)
shutil.copyfileobj(src, f)
src.close()
cache = Cache(DB(library_path, load_user_formatter_functions=False))
cache.init()
format_data = {int(book_id):data for book_id, data in iteritems(metadata['format_data'])}
@ -2759,7 +2791,7 @@ def import_library(library_key, importer, library_path, progress=None, abort=Non
return
title = cache._field_for('title', book_id)
if progress is not None:
progress(title, i + 1, total)
progress(title, i + poff, total)
cache._update_path((book_id,), mark_as_dirtied=False)
for fmt, fmtkey in iteritems(fmt_key_map):
if fmt == '.cover':

View File

@ -19,8 +19,6 @@ from calibre.utils.date import EPOCH, utcnow
from .pool import Pool
from .schema_upgrade import SchemaUpgrade
# TODO: calibre export/import should preserve indexed data
def print(*args, **kwargs):
kwargs['file'] = sys.__stdout__

View File

@ -5,7 +5,7 @@ __license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import unittest, os
import unittest, os, time
from io import BytesIO
from calibre.constants import iswindows
@ -166,6 +166,23 @@ class FilesystemTest(BaseTest):
for fmt in cache.formats(book_id):
self.assertEqual(cache.format(book_id, fmt), ic.format(book_id, fmt))
self.assertEqual(cache.format_metadata(book_id, fmt)['mtime'], cache.format_metadata(book_id, fmt)['mtime'])
cache.add_format(1, 'TXT', BytesIO(b'testing exim'))
cache.fts_indexing_sleep_time = 0.001
cache.enable_fts()
cache.set_fts_num_of_workers(4)
st = time.monotonic()
while cache.fts_indexing_left > 0 and time.monotonic() - st < 15:
time.sleep(0.05)
if cache.fts_indexing_left > 0:
raise ValueError('FTS indexing did not complete')
self.assertEqual(cache.fts_search('exim')[0]['id'], 1)
with TemporaryDirectory('export_lib') as tdir, TemporaryDirectory('import_lib') as idir:
exporter = Exporter(tdir)
cache.export_library('l', exporter)
exporter.commit()
importer = Importer(tdir)
ic = import_library('l', importer, idir)
self.assertEqual(ic.fts_search('exim')[0]['id'], 1)
def test_find_books_in_directory(self):
from calibre.db.adding import find_books_in_directory, compile_rule