mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
API for searching the FTS corpus
This commit is contained in:
parent
fc80be414c
commit
6f3cd9cc44
@ -11,6 +11,14 @@ import numbers
|
|||||||
from polyglot.builtins import iteritems
|
from polyglot.builtins import iteritems
|
||||||
|
|
||||||
|
|
||||||
|
class FTSQueryError(ValueError):
|
||||||
|
|
||||||
|
def __init__(self, query, sql_statement, apsw_error):
|
||||||
|
ValueError.__init__(self, f'Failed to parse search query: {query} with error: {apsw_error}')
|
||||||
|
self.query = query
|
||||||
|
self.sql_statement = sql_statement
|
||||||
|
|
||||||
|
|
||||||
def _get_next_series_num_for_list(series_indices, unwrap=True):
|
def _get_next_series_num_for_list(series_indices, unwrap=True):
|
||||||
from calibre.utils.config_base import tweaks
|
from calibre.utils.config_base import tweaks
|
||||||
from math import ceil, floor
|
from math import ceil, floor
|
||||||
|
@ -23,7 +23,7 @@ from calibre import as_unicode, force_unicode, isbytestring, prints
|
|||||||
from calibre.constants import (
|
from calibre.constants import (
|
||||||
filesystem_encoding, iswindows, plugins, preferred_encoding
|
filesystem_encoding, iswindows, plugins, preferred_encoding
|
||||||
)
|
)
|
||||||
from calibre.db import SPOOL_SIZE
|
from calibre.db import SPOOL_SIZE, FTSQueryError
|
||||||
from calibre.db.annotations import annot_db_data, unicode_normalize
|
from calibre.db.annotations import annot_db_data, unicode_normalize
|
||||||
from calibre.db.delete_service import delete_service
|
from calibre.db.delete_service import delete_service
|
||||||
from calibre.db.errors import NoSuchFormat
|
from calibre.db.errors import NoSuchFormat
|
||||||
@ -55,14 +55,6 @@ from polyglot.builtins import (
|
|||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
|
||||||
class FTSQueryError(ValueError):
|
|
||||||
|
|
||||||
def __init__(self, query, sql_statement, apsw_error):
|
|
||||||
ValueError.__init__(self, f'Failed to parse search query: {query} with error: {apsw_error}')
|
|
||||||
self.query = query
|
|
||||||
self.sql_statement = sql_statement
|
|
||||||
|
|
||||||
|
|
||||||
CUSTOM_DATA_TYPES = frozenset(('rating', 'text', 'comments', 'datetime',
|
CUSTOM_DATA_TYPES = frozenset(('rating', 'text', 'comments', 'datetime',
|
||||||
'int', 'float', 'bool', 'series', 'composite', 'enumeration'))
|
'int', 'float', 'bool', 'series', 'composite', 'enumeration'))
|
||||||
WINDOWS_RESERVED_NAMES = frozenset('CON PRN AUX NUL COM1 COM2 COM3 COM4 COM5 COM6 COM7 COM8 COM9 LPT1 LPT2 LPT3 LPT4 LPT5 LPT6 LPT7 LPT8 LPT9'.split())
|
WINDOWS_RESERVED_NAMES = frozenset('CON PRN AUX NUL COM1 COM2 COM3 COM4 COM5 COM6 COM7 COM8 COM9 LPT1 LPT2 LPT3 LPT4 LPT5 LPT6 LPT7 LPT8 LPT9'.split())
|
||||||
@ -979,6 +971,11 @@ class DB:
|
|||||||
def commit_fts_result(self, book_id, fmt, fmt_size, fmt_hash, text, err_msg):
|
def commit_fts_result(self, book_id, fmt, fmt_size, fmt_hash, text, err_msg):
|
||||||
return self.fts.commit_result(book_id, fmt, fmt_size, fmt_hash, text, err_msg)
|
return self.fts.commit_result(book_id, fmt, fmt_size, fmt_hash, text, err_msg)
|
||||||
|
|
||||||
|
def search(self,
|
||||||
|
fts_engine_query, use_stemming, highlight_start, highlight_end, snippet_size, restrict_to_book_ids,
|
||||||
|
):
|
||||||
|
yield from self.fts.search(fts_engine_query, use_stemming, highlight_start, highlight_end, snippet_size, restrict_to_book_ids,)
|
||||||
|
|
||||||
def shutdown_fts(self):
|
def shutdown_fts(self):
|
||||||
if self.fts_enabled:
|
if self.fts_enabled:
|
||||||
self.fts.shutdown()
|
self.fts.shutdown()
|
||||||
|
@ -481,6 +481,25 @@ class Cache:
|
|||||||
self.queue_next_fts_job()
|
self.queue_next_fts_job()
|
||||||
return existing
|
return existing
|
||||||
|
|
||||||
|
@read_api
|
||||||
|
def fts_search(
|
||||||
|
self,
|
||||||
|
fts_engine_query,
|
||||||
|
use_stemming=True,
|
||||||
|
highlight_start=None,
|
||||||
|
highlight_end=None,
|
||||||
|
snippet_size=None,
|
||||||
|
restrict_to_book_ids=None,
|
||||||
|
):
|
||||||
|
return tuple(self.backend.fts_search(
|
||||||
|
fts_engine_query,
|
||||||
|
use_stemming=use_stemming,
|
||||||
|
highlight_start=highlight_start,
|
||||||
|
highlight_end=highlight_end,
|
||||||
|
snippet_size=snippet_size,
|
||||||
|
restrict_to_book_ids=restrict_to_book_ids,
|
||||||
|
))
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
# Cache Layer API {{{
|
# Cache Layer API {{{
|
||||||
|
@ -3,13 +3,15 @@
|
|||||||
# License: GPL v3 Copyright: 2022, Kovid Goyal <kovid at kovidgoyal.net>
|
# License: GPL v3 Copyright: 2022, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
|
||||||
|
|
||||||
import builtins
|
import builtins, apsw
|
||||||
import hashlib
|
import hashlib
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
from contextlib import suppress
|
from contextlib import suppress
|
||||||
|
|
||||||
from calibre.utils.date import EPOCH, utcnow
|
from calibre.utils.date import EPOCH, utcnow
|
||||||
|
from calibre.db import FTSQueryError
|
||||||
|
from calibre.db.annotations import unicode_normalize
|
||||||
|
|
||||||
from .pool import Pool
|
from .pool import Pool
|
||||||
from .schema_upgrade import SchemaUpgrade
|
from .schema_upgrade import SchemaUpgrade
|
||||||
@ -117,5 +119,39 @@ class FTS:
|
|||||||
os.remove(path)
|
os.remove(path)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def search(self,
|
||||||
|
fts_engine_query, use_stemming, highlight_start, highlight_end, snippet_size, restrict_to_book_ids,
|
||||||
|
):
|
||||||
|
fts_engine_query = unicode_normalize(fts_engine_query)
|
||||||
|
fts_table = 'books_fts_stemmed' if use_stemming else 'books_fts'
|
||||||
|
text = 'books_text.searchable_text'
|
||||||
|
if highlight_start is not None and highlight_end is not None:
|
||||||
|
if snippet_size is not None:
|
||||||
|
text = 'snippet({fts_table}, 0, "{highlight_start}", "{highlight_end}", "…", {snippet_size})'.format(
|
||||||
|
fts_table=fts_table, highlight_start=highlight_start, highlight_end=highlight_end,
|
||||||
|
snippet_size=max(1, min(snippet_size, 64)))
|
||||||
|
else:
|
||||||
|
text = f'highlight({fts_table}, 0, "{highlight_start}", "{highlight_end}")'
|
||||||
|
query = 'SELECT {0}.id, {0}.book, {0}.format, {1} FROM {0} '
|
||||||
|
query = query.format('books_text', text)
|
||||||
|
query += ' JOIN {fts_table} ON books_text.id = {fts_table}.rowid'.format(fts_table=fts_table)
|
||||||
|
query += f' WHERE {fts_table} MATCH ?'
|
||||||
|
data = [fts_engine_query]
|
||||||
|
query += f' ORDER BY {fts_table}.rank '
|
||||||
|
try:
|
||||||
|
for (rowid, book_id, fmt, user_type, user, annot_data, text) in self.execute(query, tuple(data)):
|
||||||
|
if restrict_to_book_ids is not None and book_id not in restrict_to_book_ids:
|
||||||
|
continue
|
||||||
|
yield {
|
||||||
|
'id': rowid,
|
||||||
|
'book_id': book_id,
|
||||||
|
'format': fmt,
|
||||||
|
'user_type': user_type,
|
||||||
|
'user': user,
|
||||||
|
'text': text,
|
||||||
|
}
|
||||||
|
except apsw.SQLError as e:
|
||||||
|
raise FTSQueryError(fts_engine_query, query, e)
|
||||||
|
|
||||||
def shutdown(self):
|
def shutdown(self):
|
||||||
self.pool.shutdown()
|
self.pool.shutdown()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user