Add support for FTS index to export/import

This commit is contained in:
Kovid Goyal 2022-06-20 14:51:06 +05:30
parent e111569c99
commit 340241e5d7
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
4 changed files with 67 additions and 17 deletions

View File

@ -16,7 +16,7 @@ import shutil
import sys import sys
import time import time
import uuid import uuid
from contextlib import suppress from contextlib import suppress, closing
from functools import partial from functools import partial
from calibre import as_unicode, force_unicode, isbytestring, prints from calibre import as_unicode, force_unicode, isbytestring, prints
@ -1255,7 +1255,6 @@ class DB:
def dump_and_restore(self, callback=None, sql=None): def dump_and_restore(self, callback=None, sql=None):
import codecs import codecs
from apsw import Shell from apsw import Shell
from contextlib import closing
if callback is None: if callback is None:
callback = lambda x: x callback = lambda x: x
uv = int(self.user_version) uv = int(self.user_version)
@ -2184,14 +2183,18 @@ class DB:
self.executemany('INSERT INTO data (book,format,uncompressed_size,name) VALUES (?,?,?,?)', vals) self.executemany('INSERT INTO data (book,format,uncompressed_size,name) VALUES (?,?,?,?)', vals)
def backup_database(self, path): def backup_database(self, path):
dest_db = apsw.Connection(path) with closing(apsw.Connection(path)) as dest_db:
with dest_db.backup('main', self.conn, 'main') as b: with dest_db.backup('main', self.conn, 'main') as b:
while not b.done: while not b.done:
try: with suppress(apsw.BusyError):
b.step(100) b.step(128)
except apsw.BusyError:
pass
dest_db.cursor().execute('DELETE FROM metadata_dirtied; VACUUM;') dest_db.cursor().execute('DELETE FROM metadata_dirtied; VACUUM;')
dest_db.close()
def backup_fts_database(self, path):
with closing(apsw.Connection(path)) as dest_db:
with dest_db.backup('main', self.conn, 'fts_db') as b:
while not b.done:
with suppress(apsw.BusyError):
b.step(128)
dest_db.cursor().execute('VACUUM;')
# }}} # }}}

View File

@ -473,6 +473,8 @@ class Cache:
self.fts_job_queue.put(None) self.fts_job_queue.put(None)
self.fts_queue_thread = None self.fts_queue_thread = None
self.fts_job_queue = Queue() self.fts_job_queue = Queue()
if fts:
self._update_fts_indexing_numbers()
return fts return fts
@write_api @write_api
@ -497,6 +499,7 @@ class Cache:
if not path or not is_fmt_ok(fmt): if not path or not is_fmt_ok(fmt):
with self.write_lock: with self.write_lock:
self.backend.remove_dirty_fts(book_id, fmt) self.backend.remove_dirty_fts(book_id, fmt)
self._update_fts_indexing_numbers()
return True return True
with self.read_lock, open(path, 'rb') as src, PersistentTemporaryFile(suffix=f'.{fmt.lower()}') as pt: with self.read_lock, open(path, 'rb') as src, PersistentTemporaryFile(suffix=f'.{fmt.lower()}') as pt:
@ -2617,7 +2620,9 @@ class Cache:
key_prefix = as_hex_unicode(library_key) key_prefix = as_hex_unicode(library_key)
book_ids = self._all_book_ids() book_ids = self._all_book_ids()
total = len(book_ids) + 1 total = len(book_ids) + 1
format_metadata = {} has_fts = self.is_fts_enabled()
if has_fts:
total += 1
if progress is not None: if progress is not None:
progress('metadata.db', 0, total) progress('metadata.db', 0, total)
pt = PersistentTemporaryFile('-export.db') pt = PersistentTemporaryFile('-export.db')
@ -2627,12 +2632,28 @@ class Cache:
with lopen(pt.name, 'rb') as f: with lopen(pt.name, 'rb') as f:
exporter.add_file(f, dbkey) exporter.add_file(f, dbkey)
os.remove(pt.name) os.remove(pt.name)
poff = 1
if has_fts:
poff += 1
if progress is not None:
progress('full-text-search.db', 1, total)
pt = PersistentTemporaryFile('-export.db')
pt.close()
self.backend.backup_fts_database(pt.name)
ftsdbkey = key_prefix + ':::' + 'full-text-search.db'
with lopen(pt.name, 'rb') as f:
exporter.add_file(f, ftsdbkey)
os.remove(pt.name)
format_metadata = {}
metadata = {'format_data':format_metadata, 'metadata.db':dbkey, 'total':total} metadata = {'format_data':format_metadata, 'metadata.db':dbkey, 'total':total}
if has_fts:
metadata['full-text-search.db'] = ftsdbkey
for i, book_id in enumerate(book_ids): for i, book_id in enumerate(book_ids):
if abort is not None and abort.is_set(): if abort is not None and abort.is_set():
return return
if progress is not None: if progress is not None:
progress(self._field_for('title', book_id), i + 1, total) progress(self._field_for('title', book_id), i + poff, total)
format_metadata[book_id] = {} format_metadata[book_id] = {}
for fmt in self._formats(book_id): for fmt in self._formats(book_id):
mdata = self.format_metadata(book_id, fmt) mdata = self.format_metadata(book_id, fmt)
@ -2743,6 +2764,7 @@ def import_library(library_key, importer, library_path, progress=None, abort=Non
from calibre.db.backend import DB from calibre.db.backend import DB
metadata = importer.metadata[library_key] metadata = importer.metadata[library_key]
total = metadata['total'] total = metadata['total']
poff = 1
if progress is not None: if progress is not None:
progress('metadata.db', 0, total) progress('metadata.db', 0, total)
if abort is not None and abort.is_set(): if abort is not None and abort.is_set():
@ -2751,6 +2773,16 @@ def import_library(library_key, importer, library_path, progress=None, abort=Non
src = importer.start_file(metadata['metadata.db'], 'metadata.db for ' + library_path) src = importer.start_file(metadata['metadata.db'], 'metadata.db for ' + library_path)
shutil.copyfileobj(src, f) shutil.copyfileobj(src, f)
src.close() src.close()
if 'full-text-search.db' in metadata:
if progress is not None:
progress('full-text-search.db', 1, total)
if abort is not None and abort.is_set():
return
poff += 1
with open(os.path.join(library_path, 'full-text-search.db'), 'wb') as f:
src = importer.start_file(metadata['full-text-search.db'], 'full-text-search.db for ' + library_path)
shutil.copyfileobj(src, f)
src.close()
cache = Cache(DB(library_path, load_user_formatter_functions=False)) cache = Cache(DB(library_path, load_user_formatter_functions=False))
cache.init() cache.init()
format_data = {int(book_id):data for book_id, data in iteritems(metadata['format_data'])} format_data = {int(book_id):data for book_id, data in iteritems(metadata['format_data'])}
@ -2759,7 +2791,7 @@ def import_library(library_key, importer, library_path, progress=None, abort=Non
return return
title = cache._field_for('title', book_id) title = cache._field_for('title', book_id)
if progress is not None: if progress is not None:
progress(title, i + 1, total) progress(title, i + poff, total)
cache._update_path((book_id,), mark_as_dirtied=False) cache._update_path((book_id,), mark_as_dirtied=False)
for fmt, fmtkey in iteritems(fmt_key_map): for fmt, fmtkey in iteritems(fmt_key_map):
if fmt == '.cover': if fmt == '.cover':

View File

@ -19,8 +19,6 @@ from calibre.utils.date import EPOCH, utcnow
from .pool import Pool from .pool import Pool
from .schema_upgrade import SchemaUpgrade from .schema_upgrade import SchemaUpgrade
# TODO: calibre export/import should preserve indexed data
def print(*args, **kwargs): def print(*args, **kwargs):
kwargs['file'] = sys.__stdout__ kwargs['file'] = sys.__stdout__

View File

@ -5,7 +5,7 @@ __license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import unittest, os import unittest, os, time
from io import BytesIO from io import BytesIO
from calibre.constants import iswindows from calibre.constants import iswindows
@ -166,6 +166,23 @@ class FilesystemTest(BaseTest):
for fmt in cache.formats(book_id): for fmt in cache.formats(book_id):
self.assertEqual(cache.format(book_id, fmt), ic.format(book_id, fmt)) self.assertEqual(cache.format(book_id, fmt), ic.format(book_id, fmt))
self.assertEqual(cache.format_metadata(book_id, fmt)['mtime'], cache.format_metadata(book_id, fmt)['mtime']) self.assertEqual(cache.format_metadata(book_id, fmt)['mtime'], cache.format_metadata(book_id, fmt)['mtime'])
cache.add_format(1, 'TXT', BytesIO(b'testing exim'))
cache.fts_indexing_sleep_time = 0.001
cache.enable_fts()
cache.set_fts_num_of_workers(4)
st = time.monotonic()
while cache.fts_indexing_left > 0 and time.monotonic() - st < 15:
time.sleep(0.05)
if cache.fts_indexing_left > 0:
raise ValueError('FTS indexing did not complete')
self.assertEqual(cache.fts_search('exim')[0]['id'], 1)
with TemporaryDirectory('export_lib') as tdir, TemporaryDirectory('import_lib') as idir:
exporter = Exporter(tdir)
cache.export_library('l', exporter)
exporter.commit()
importer = Importer(tdir)
ic = import_library('l', importer, idir)
self.assertEqual(ic.fts_search('exim')[0]['id'], 1)
def test_find_books_in_directory(self): def test_find_books_in_directory(self):
from calibre.db.adding import find_books_in_directory, compile_rule from calibre.db.adding import find_books_in_directory, compile_rule