Annotations browser: Fix searching for words in languages such as Chinese that do not have word delimiters not working. Fixes #1929325 [browsing annotations in the main interface the results cannot be searched in Chinese](https://bugs.launchpad.net/calibre/+bug/1929325)

Now calibre has its own custom ICU based FTS tokenizer. I really have no
clue about Chinese, so I can only hope I got it right.
This commit is contained in:
Kovid Goyal 2021-06-28 18:24:13 +05:30
parent bdb030f33e
commit d8752252e6
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
5 changed files with 71 additions and 29 deletions

View File

@ -633,4 +633,4 @@ CREATE TRIGGER series_update_trg
BEGIN
UPDATE series SET sort=title_sort(NEW.name) WHERE id=NEW.id;
END;
pragma user_version=24;
pragma user_version=25;

View File

@ -7,40 +7,53 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
# Imports {{{
import os, shutil, uuid, json, glob, time, hashlib, errno, sys
import apsw
import errno
import glob
import hashlib
import json
import os
import shutil
import sys
import time
import uuid
from functools import partial
import apsw
from polyglot.builtins import (iteritems, itervalues,
unicode_type, reraise, string_or_bytes, cmp, native_string_type)
from calibre import isbytestring, force_unicode, prints, as_unicode
from calibre.constants import (iswindows, filesystem_encoding,
preferred_encoding)
from calibre.ptempfile import PersistentTemporaryFile, TemporaryFile
from calibre import as_unicode, force_unicode, isbytestring, prints
from calibre.constants import (
filesystem_encoding, iswindows, plugins, preferred_encoding
)
from calibre.db import SPOOL_SIZE
from calibre.db.annotations import annot_db_data, unicode_normalize
from calibre.db.schema_upgrades import SchemaUpgrade
from calibre.db.delete_service import delete_service
from calibre.db.errors import NoSuchFormat
from calibre.db.schema_upgrades import SchemaUpgrade
from calibre.db.tables import (
AuthorsTable, CompositeTable, FormatsTable, IdentifiersTable, ManyToManyTable,
ManyToOneTable, OneToOneTable, PathTable, RatingTable, SizeTable, UUIDTable
)
from calibre.ebooks.metadata import author_to_author_sort, title_sort
from calibre.library.field_metadata import FieldMetadata
from calibre.ebooks.metadata import title_sort, author_to_author_sort
from calibre.ptempfile import PersistentTemporaryFile, TemporaryFile
from calibre.utils import pickle_binary_string, unpickle_binary_string
from calibre.utils.icu import sort_key
from calibre.utils.config import to_json, from_json, prefs, tweaks
from calibre.utils.date import utcfromtimestamp, parse_date, utcnow, EPOCH
from calibre.utils.config import from_json, prefs, to_json, tweaks
from calibre.utils.date import EPOCH, parse_date, utcfromtimestamp, utcnow
from calibre.utils.filenames import (
is_case_sensitive, samefile, hardlink_file, ascii_filename,
WindowsAtomicFolderMove, atomic_rename, remove_dir_if_empty,
copytree_using_links, copyfile_using_links)
WindowsAtomicFolderMove, ascii_filename, atomic_rename, copyfile_using_links,
copytree_using_links, hardlink_file, is_case_sensitive, remove_dir_if_empty,
samefile
)
from calibre.utils.formatter_functions import (
compile_user_template_functions, formatter_functions,
load_user_template_functions, unload_user_template_functions
)
from calibre.utils.icu import sort_key
from calibre.utils.img import save_cover_data_to
from calibre.utils.formatter_functions import (load_user_template_functions,
unload_user_template_functions,
compile_user_template_functions,
formatter_functions)
from calibre.db.tables import (OneToOneTable, ManyToOneTable, ManyToManyTable,
SizeTable, FormatsTable, AuthorsTable, IdentifiersTable, PathTable,
CompositeTable, UUIDTable, RatingTable)
from polyglot.builtins import (
cmp, iteritems, itervalues, native_string_type, reraise, string_or_bytes,
unicode_type
)
# }}}
@ -322,7 +335,11 @@ class Connection(apsw.Connection): # {{{
BUSY_TIMEOUT = 10000 # milliseconds
def __init__(self, path):
apsw.Connection.__init__(self, path)
from calibre.utils.localization import get_lang
from calibre_extensions.sqlite_extension import set_ui_language
set_ui_language(get_lang())
super().__init__(path)
plugins.load_apsw_extension(self, 'sqlite_extension')
self.setbusytimeout(self.BUSY_TIMEOUT)
self.execute('pragma cache_size=-5000')
@ -1977,6 +1994,12 @@ class DB(object):
return count
return 0
def reindex_annotations(self):
self.execute('''
INSERT INTO {0}({0}) VALUES('rebuild');
INSERT INTO {1}({1}) VALUES('rebuild');
'''.format('annotations_fts', 'annotations_fts_stemmed'))
def conversion_options(self, book_id, fmt):
for (data,) in self.conn.get('SELECT data FROM conversion_options WHERE book=? AND format=?', (book_id, fmt.upper())):
if data:

View File

@ -2504,6 +2504,10 @@ class Cache(object):
alist.append((annot, ts))
self._set_annotations_for_book(book_id, fmt, alist, user_type=user_type, user=user)
@write_api
def reindex_annotations(self):
self.backend.reindex_annotations()
def import_library(library_key, importer, library_path, progress=None, abort=None):
from calibre.db.backend import DB

View File

@ -791,3 +791,6 @@ CREATE TRIGGER fkc_annot_update
END;
''')
def upgrade_version_24(self):
self.db.reindex_annotations()

View File

@ -24,7 +24,7 @@ from calibre.gui2 import (
)
from calibre.gui2.dialogs.confirm_delete import confirm
from calibre.gui2.viewer.widgets import ResultsDelegate, SearchBox
from calibre.gui2.widgets2 import Dialog
from calibre.gui2.widgets2 import Dialog, RightClickButton
# rendering {{{
@ -937,9 +937,16 @@ class AnnotationsBrowser(Dialog):
b.setToolTip(_('Export the selected annotations'))
b.setIcon(QIcon(I('save.png')))
b.clicked.connect(self.export_selected)
self.refresh_button = b = self.bb.addButton(_('Refresh'), QDialogButtonBox.ButtonRole.ActionRole)
self.refresh_button = b = RightClickButton(self.bb)
self.bb.addButton(b, QDialogButtonBox.ButtonRole.ActionRole)
b.setText(_('Refresh'))
b.setToolButtonStyle(Qt.ToolButtonStyle.ToolButtonTextBesideIcon)
self.refresh_menu = m = QMenu(self)
m.addAction(_('Rebuild search index')).triggered.connect(self.rebuild)
b.setMenu(m)
b.setToolTip(_('Refresh annotations in case they have been changed since this window was opened'))
b.setIcon(QIcon(I('restart.png')))
b.setPopupMode(QToolButton.ToolButtonPopupMode.DelayedPopup)
b.clicked.connect(self.refresh)
def delete_selected(self):
@ -1004,12 +1011,17 @@ class AnnotationsBrowser(Dialog):
self.current_restriction = restrict_to_book_ids
self.browse_panel.re_initialize(restrict_to_book_ids or set())
def refresh(self, current_restriction):
def refresh(self):
state = self.browse_panel.save_tree_state()
self.browse_panel.re_initialize(self.current_restriction)
self.browse_panel.effective_query_changed()
self.browse_panel.restore_tree_state(state)
def rebuild(self):
with BusyCursor():
current_db().reindex_annotations()
self.refresh()
if __name__ == '__main__':
from calibre.library import db