mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
API for searching the FTS corpus
This commit is contained in:
parent
fc80be414c
commit
6f3cd9cc44
@ -11,6 +11,14 @@ import numbers
|
||||
from polyglot.builtins import iteritems
|
||||
|
||||
|
||||
class FTSQueryError(ValueError):
|
||||
|
||||
def __init__(self, query, sql_statement, apsw_error):
|
||||
ValueError.__init__(self, f'Failed to parse search query: {query} with error: {apsw_error}')
|
||||
self.query = query
|
||||
self.sql_statement = sql_statement
|
||||
|
||||
|
||||
def _get_next_series_num_for_list(series_indices, unwrap=True):
|
||||
from calibre.utils.config_base import tweaks
|
||||
from math import ceil, floor
|
||||
|
@ -23,7 +23,7 @@ from calibre import as_unicode, force_unicode, isbytestring, prints
|
||||
from calibre.constants import (
|
||||
filesystem_encoding, iswindows, plugins, preferred_encoding
|
||||
)
|
||||
from calibre.db import SPOOL_SIZE
|
||||
from calibre.db import SPOOL_SIZE, FTSQueryError
|
||||
from calibre.db.annotations import annot_db_data, unicode_normalize
|
||||
from calibre.db.delete_service import delete_service
|
||||
from calibre.db.errors import NoSuchFormat
|
||||
@ -55,14 +55,6 @@ from polyglot.builtins import (
|
||||
# }}}
|
||||
|
||||
|
||||
class FTSQueryError(ValueError):
|
||||
|
||||
def __init__(self, query, sql_statement, apsw_error):
|
||||
ValueError.__init__(self, f'Failed to parse search query: {query} with error: {apsw_error}')
|
||||
self.query = query
|
||||
self.sql_statement = sql_statement
|
||||
|
||||
|
||||
CUSTOM_DATA_TYPES = frozenset(('rating', 'text', 'comments', 'datetime',
|
||||
'int', 'float', 'bool', 'series', 'composite', 'enumeration'))
|
||||
WINDOWS_RESERVED_NAMES = frozenset('CON PRN AUX NUL COM1 COM2 COM3 COM4 COM5 COM6 COM7 COM8 COM9 LPT1 LPT2 LPT3 LPT4 LPT5 LPT6 LPT7 LPT8 LPT9'.split())
|
||||
@ -979,6 +971,11 @@ class DB:
|
||||
def commit_fts_result(self, book_id, fmt, fmt_size, fmt_hash, text, err_msg):
|
||||
return self.fts.commit_result(book_id, fmt, fmt_size, fmt_hash, text, err_msg)
|
||||
|
||||
def search(self,
|
||||
fts_engine_query, use_stemming, highlight_start, highlight_end, snippet_size, restrict_to_book_ids,
|
||||
):
|
||||
yield from self.fts.search(fts_engine_query, use_stemming, highlight_start, highlight_end, snippet_size, restrict_to_book_ids,)
|
||||
|
||||
def shutdown_fts(self):
|
||||
if self.fts_enabled:
|
||||
self.fts.shutdown()
|
||||
|
@ -481,6 +481,25 @@ class Cache:
|
||||
self.queue_next_fts_job()
|
||||
return existing
|
||||
|
||||
@read_api
|
||||
def fts_search(
|
||||
self,
|
||||
fts_engine_query,
|
||||
use_stemming=True,
|
||||
highlight_start=None,
|
||||
highlight_end=None,
|
||||
snippet_size=None,
|
||||
restrict_to_book_ids=None,
|
||||
):
|
||||
return tuple(self.backend.fts_search(
|
||||
fts_engine_query,
|
||||
use_stemming=use_stemming,
|
||||
highlight_start=highlight_start,
|
||||
highlight_end=highlight_end,
|
||||
snippet_size=snippet_size,
|
||||
restrict_to_book_ids=restrict_to_book_ids,
|
||||
))
|
||||
|
||||
# }}}
|
||||
|
||||
# Cache Layer API {{{
|
||||
|
@ -3,13 +3,15 @@
|
||||
# License: GPL v3 Copyright: 2022, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
|
||||
|
||||
import builtins
|
||||
import builtins, apsw
|
||||
import hashlib
|
||||
import os
|
||||
import sys
|
||||
from contextlib import suppress
|
||||
|
||||
from calibre.utils.date import EPOCH, utcnow
|
||||
from calibre.db import FTSQueryError
|
||||
from calibre.db.annotations import unicode_normalize
|
||||
|
||||
from .pool import Pool
|
||||
from .schema_upgrade import SchemaUpgrade
|
||||
@ -117,5 +119,39 @@ class FTS:
|
||||
os.remove(path)
|
||||
return False
|
||||
|
||||
def search(self,
|
||||
fts_engine_query, use_stemming, highlight_start, highlight_end, snippet_size, restrict_to_book_ids,
|
||||
):
|
||||
fts_engine_query = unicode_normalize(fts_engine_query)
|
||||
fts_table = 'books_fts_stemmed' if use_stemming else 'books_fts'
|
||||
text = 'books_text.searchable_text'
|
||||
if highlight_start is not None and highlight_end is not None:
|
||||
if snippet_size is not None:
|
||||
text = 'snippet({fts_table}, 0, "{highlight_start}", "{highlight_end}", "…", {snippet_size})'.format(
|
||||
fts_table=fts_table, highlight_start=highlight_start, highlight_end=highlight_end,
|
||||
snippet_size=max(1, min(snippet_size, 64)))
|
||||
else:
|
||||
text = f'highlight({fts_table}, 0, "{highlight_start}", "{highlight_end}")'
|
||||
query = 'SELECT {0}.id, {0}.book, {0}.format, {1} FROM {0} '
|
||||
query = query.format('books_text', text)
|
||||
query += ' JOIN {fts_table} ON books_text.id = {fts_table}.rowid'.format(fts_table=fts_table)
|
||||
query += f' WHERE {fts_table} MATCH ?'
|
||||
data = [fts_engine_query]
|
||||
query += f' ORDER BY {fts_table}.rank '
|
||||
try:
|
||||
for (rowid, book_id, fmt, user_type, user, annot_data, text) in self.execute(query, tuple(data)):
|
||||
if restrict_to_book_ids is not None and book_id not in restrict_to_book_ids:
|
||||
continue
|
||||
yield {
|
||||
'id': rowid,
|
||||
'book_id': book_id,
|
||||
'format': fmt,
|
||||
'user_type': user_type,
|
||||
'user': user,
|
||||
'text': text,
|
||||
}
|
||||
except apsw.SQLError as e:
|
||||
raise FTSQueryError(fts_engine_query, query, e)
|
||||
|
||||
def shutdown(self):
|
||||
self.pool.shutdown()
|
||||
|
Loading…
x
Reference in New Issue
Block a user