Implement ex/im for notes

This commit is contained in:
Kovid Goyal 2023-08-23 08:25:06 +05:30
parent cfadc5fc4d
commit 5d67c74add
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
5 changed files with 107 additions and 33 deletions

View File

@ -1001,6 +1001,15 @@ class DB:
yield from self.notes.search(
self.conn, fts_engine_query, use_stemming, highlight_start, highlight_end, snippet_size, restrict_to_fields, return_text, process_each_result)
def export_notes_data(self, outfile):
import zipfile, tempfile
with zipfile.ZipFile(outfile, mode='w') as zf:
with tempfile.NamedTemporaryFile() as dbf:
self.backup_notes_database(dbf.name)
dbf.seek(0)
zf.writestr('notes.db', dbf.read())
self.notes.export_non_db_data(zf)
def initialize_fts(self, dbref):
self.fts = None
if not self.prefs['fts_enabled']:
@ -2572,18 +2581,21 @@ class DB:
self.conn # Connect to the moved metadata.db
progress(_('Completed'), total, total)
def backup_database(self, path):
def _backup_database(self, path, name, extra_sql=''):
with closing(apsw.Connection(path)) as dest_db:
with dest_db.backup('main', self.conn, 'main') as b:
with dest_db.backup('main', self.conn, name) as b:
while not b.done:
with suppress(apsw.BusyError):
b.step(128)
dest_db.cursor().execute('DELETE FROM metadata_dirtied; VACUUM;')
if extra_sql:
dest_db.cursor().execute(extra_sql)
def backup_database(self, path):
self._backup_database(path, 'main', 'DELETE FROM metadata_dirtied; VACUUM;')
def backup_fts_database(self, path):
with closing(apsw.Connection(path)) as dest_db:
with dest_db.backup('main', self.conn, 'fts_db') as b:
while not b.done:
with suppress(apsw.BusyError):
b.step(128)
self._backup_database(path, 'fts_db')
def backup_notes_database(self, path):
self._backup_database(path, 'notes_db')
# }}}

View File

@ -15,12 +15,13 @@ import traceback
import weakref
from collections import defaultdict
from collections.abc import MutableSet, Set
from contextlib import closing
from functools import partial, wraps
from io import DEFAULT_BUFFER_SIZE, BytesIO
from queue import Queue
from threading import Lock
from time import monotonic, sleep, time
from typing import NamedTuple, Tuple, Optional
from typing import NamedTuple, Optional, Tuple
from calibre import as_unicode, detect_ncpus, isbytestring
from calibre.constants import iswindows, preferred_encoding
@ -31,6 +32,7 @@ from calibre.customize.ui import (
from calibre.db import SPOOL_SIZE, _get_next_series_num_for_list
from calibre.db.annotations import merge_annotations
from calibre.db.categories import get_categories
from calibre.db.constants import NOTES_DIR_NAME
from calibre.db.errors import NoSuchBook, NoSuchFormat
from calibre.db.fields import IDENTITY, InvalidLinkTable, create_field
from calibre.db.lazy import FormatMetadata, FormatsList, ProxyMetadata
@ -3009,12 +3011,18 @@ class Cache:
from polyglot.binary import as_hex_unicode
key_prefix = as_hex_unicode(library_key)
book_ids = self._all_book_ids()
total = len(book_ids) + 1
total = len(book_ids) + 2
has_fts = self.is_fts_enabled()
if has_fts:
total += 1
if progress is not None:
progress('metadata.db', 0, total)
poff = 0
def report_progress(fname):
nonlocal poff
if progress is not None:
progress(fname, poff, total)
poff += 1
report_progress('metadata.db')
pt = PersistentTemporaryFile('-export.db')
pt.close()
self.backend.backup_database(pt.name)
@ -3022,29 +3030,33 @@ class Cache:
with open(pt.name, 'rb') as f:
exporter.add_file(f, dbkey)
os.remove(pt.name)
poff = 1
if has_fts:
poff += 1
if progress is not None:
progress('full-text-search.db', 1, total)
report_progress('full-text-search.db')
pt = PersistentTemporaryFile('-export.db')
pt.close()
self.backend.backup_fts_database(pt.name)
ftsdbkey = key_prefix + ':::' + 'full-text-search.db'
ftsdbkey = key_prefix + ':::full-text-search.db'
with open(pt.name, 'rb') as f:
exporter.add_file(f, ftsdbkey)
os.remove(pt.name)
notesdbkey = key_prefix + ':::notes.db'
with PersistentTemporaryFile('-export.db') as pt:
self.backend.export_notes_data(pt)
pt.flush()
pt.seek(0)
report_progress('notes.db')
exporter.add_file(pt, notesdbkey)
format_metadata = {}
extra_files = {}
metadata = {'format_data':format_metadata, 'metadata.db':dbkey, 'total':total, 'extra_files': extra_files}
metadata = {'format_data':format_metadata, 'metadata.db':dbkey, 'notes.db': notesdbkey, 'total':total, 'extra_files': extra_files}
if has_fts:
metadata['full-text-search.db'] = ftsdbkey
for i, book_id in enumerate(book_ids):
if abort is not None and abort.is_set():
return
if progress is not None:
progress(self._field_for('title', book_id), i + poff, total)
report_progress(self._field_for('title', book_id))
format_metadata[book_id] = fm = {}
for fmt in self._formats(book_id):
mdata = self.format_metadata(book_id, fmt)
@ -3335,9 +3347,13 @@ def import_library(library_key, importer, library_path, progress=None, abort=Non
from calibre.db.backend import DB
metadata = importer.metadata[library_key]
total = metadata['total']
poff = 1
if progress is not None:
progress('metadata.db', 0, total)
poff = 0
def report_progress(fname):
nonlocal poff
if progress is not None:
progress(fname, poff, total)
poff += 1
report_progress('metadata.db')
if abort is not None and abort.is_set():
return
with open(os.path.join(library_path, 'metadata.db'), 'wb') as f:
@ -3354,8 +3370,21 @@ def import_library(library_key, importer, library_path, progress=None, abort=Non
src = importer.start_file(metadata['full-text-search.db'], 'full-text-search.db for ' + library_path)
shutil.copyfileobj(src, f)
src.close()
if abort is not None and abort.is_set():
return
if 'notes.db' in metadata:
import zipfile
notes_dir = os.path.join(library_path, NOTES_DIR_NAME)
os.makedirs(notes_dir, exist_ok=True)
with closing(importer.start_file(metadata['notes.db'], 'notes.db for ' + library_path)) as stream:
stream.check_hash = False
with zipfile.ZipFile(stream) as zf:
zf.extractall(notes_dir)
if abort is not None and abort.is_set():
return
cache = Cache(DB(library_path, load_user_formatter_functions=False))
cache.init()
format_data = {int(book_id):data for book_id, data in iteritems(metadata['format_data'])}
extra_files = {int(book_id):data for book_id, data in metadata.get('extra_files', {}).items()}
for i, (book_id, fmt_key_map) in enumerate(iteritems(format_data)):

View File

@ -60,15 +60,15 @@ class Notes:
conn = backend.get_connection()
self.temp_table_counter = count()
libdir = os.path.dirname(os.path.abspath(conn.db_filename('main')))
notes_dir = os.path.join(libdir, NOTES_DIR_NAME)
self.resources_dir = os.path.join(notes_dir, 'resources')
self.backup_dir = os.path.join(notes_dir, 'backup')
self.retired_dir = os.path.join(notes_dir, 'retired')
if not os.path.exists(notes_dir):
os.makedirs(notes_dir, exist_ok=True)
self.notes_dir = os.path.join(libdir, NOTES_DIR_NAME)
self.resources_dir = os.path.join(self.notes_dir, 'resources')
self.backup_dir = os.path.join(self.notes_dir, 'backup')
self.retired_dir = os.path.join(self.notes_dir, 'retired')
if not os.path.exists(self.notes_dir):
os.makedirs(self.notes_dir, exist_ok=True)
if iswindows:
winutil.set_file_attributes(notes_dir, winutil.FILE_ATTRIBUTE_HIDDEN | winutil.FILE_ATTRIBUTE_NOT_CONTENT_INDEXED)
dbpath = os.path.join(notes_dir, 'notes.db')
winutil.set_file_attributes(self.notes_dir, winutil.FILE_ATTRIBUTE_HIDDEN | winutil.FILE_ATTRIBUTE_NOT_CONTENT_INDEXED)
dbpath = os.path.join(self.notes_dir, 'notes.db')
conn.execute("ATTACH DATABASE ? AS notes_db", (dbpath,))
os.makedirs(self.resources_dir, exist_ok=True)
os.makedirs(self.backup_dir, exist_ok=True)
@ -351,3 +351,16 @@ class Notes:
break
except apsw.SQLError as e:
raise FTSQueryError(fts_engine_query, query, e) from e
def export_non_db_data(self, zf):
import zipfile
def add_dir(which):
for dirpath, _, filenames in os.walk(which):
for f in filenames:
path = os.path.join(dirpath, f)
with open(path, 'rb') as src:
zi = zipfile.ZipInfo.from_file(path, arcname=os.path.relpath(path, self.notes_dir))
with zf.open(zi, 'w') as dest:
shutil.copyfileobj(src, dest)
add_dir(self.backup_dir)
add_dir(self.resources_dir)

View File

@ -264,6 +264,9 @@ class FilesystemTest(BaseTest):
bookdir = os.path.dirname(ic.format_abspath(1, '__COVER_INTERNAL__'))
self.assertEqual('exf', open(os.path.join(bookdir, 'exf')).read())
self.assertEqual('recurse', open(os.path.join(bookdir, 'sub', 'recurse')).read())
r1 = cache.add_notes_resource(b'res1', 'res.jpg')
r2 = cache.add_notes_resource(b'res2', 'res.jpg')
cache.set_notes_for('authors', 2, 'some notes', resource_ids=(r1, r2))
cache.add_format(1, 'TXT', BytesIO(b'testing exim'))
cache.fts_indexing_sleep_time = 0.001
cache.enable_fts()
@ -281,6 +284,8 @@ class FilesystemTest(BaseTest):
importer = Importer(tdir)
ic = import_library('l', importer, idir)
self.assertEqual(ic.fts_search('exim')[0]['id'], 1)
self.assertEqual(cache.notes_for('authors', 2), ic.notes_for('authors', 2))
self.assertEqual(cache.get_notes_resource(r1), ic.get_notes_resource(r1))
def test_find_books_in_directory(self):
from calibre.db.adding import find_books_in_directory, compile_rule

View File

@ -222,10 +222,24 @@ class FileSource:
def __init__(self, f, size, digest, description, mtime, importer):
self.f, self.size, self.digest, self.description = f, size, digest, description
self.seekable = self.f.seekable
self.mtime = mtime
self.end = f.tell() + size
self.start = f.tell()
self.end = self.start + size
self.hasher = hashlib.sha1()
self.importer = importer
self.check_hash = True
def seek(self, amt, whence=os.SEEK_SET):
if whence == os.SEEK_SET:
return self.f.seek(self.start + amt, os.SEEK_SET)
if whence == os.SEEK_END:
return self.f.seek(self.end + amt, os.SEEK_SET)
if whence == os.SEEK_CUR:
return self.f.seek(amt, whence)
def tell(self):
return self.f.tell() - self.start
def read(self, size=None):
if size is not None and size < 1:
@ -235,11 +249,12 @@ class FileSource:
if amt < 1:
return b''
ans = self.f.read(amt)
self.hasher.update(ans)
if self.check_hash:
self.hasher.update(ans)
return ans
def close(self):
if self.hasher.hexdigest() != self.digest:
if self.check_hash and self.hasher.hexdigest() != self.digest:
self.importer.corrupted_files.append(self.description)
self.hasher = self.f = None