mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
calibredb: A new fts_search command to perform full text searching
This commit is contained in:
parent
9ae2074669
commit
6662e353aa
167
src/calibre/db/cli/cmd_fts_search.py
Normal file
167
src/calibre/db/cli/cmd_fts_search.py
Normal file
@ -0,0 +1,167 @@
|
||||
#!/usr/bin/env python
|
||||
# License: GPLv3 Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
|
||||
import re
|
||||
|
||||
readonly = True
|
||||
version = 0 # change this if you change signature of implementation()
|
||||
|
||||
|
||||
def implementation(db, notify_changes, query, adata):
|
||||
rto = adata['restrict_to']
|
||||
restrict_to = None
|
||||
if rto:
|
||||
if isinstance(rto, str):
|
||||
restrict_to = db.search(rto)
|
||||
else:
|
||||
restrict_to = set(rto)
|
||||
|
||||
metadata_cache = {}
|
||||
include_snippets = adata['include_snippets']
|
||||
|
||||
def add_metadata(result):
|
||||
result.pop('id', None)
|
||||
if not include_snippets:
|
||||
result.pop('text', None)
|
||||
bid = result['book_id']
|
||||
if bid not in metadata_cache:
|
||||
with db.safe_read_lock:
|
||||
metadata_cache[bid] = {'title': db._field_for('title', bid), 'authors': db._field_for('authors', bid)}
|
||||
return result
|
||||
|
||||
from calibre.db import FTSQueryError
|
||||
try:
|
||||
return db.fts_search(
|
||||
query, use_stemming=adata['use_stemming'], highlight_start=adata['start_marker'], highlight_end=adata['end_marker'],
|
||||
return_text=include_snippets, restrict_to_book_ids=restrict_to, result_type=tuple if adata['as_tuple'] else lambda x: x,
|
||||
process_each_result=add_metadata, snippet_size=64
|
||||
), metadata_cache
|
||||
except FTSQueryError as e:
|
||||
e.suppress_traceback = True
|
||||
raise e
|
||||
|
||||
|
||||
def option_parser(get_parser, args):
|
||||
parser = get_parser(
|
||||
_(
|
||||
'''\
|
||||
%prog fts_search [options] search expression
|
||||
|
||||
Do a full text search on the entire library or a subset of it.
|
||||
|
||||
'''
|
||||
))
|
||||
parser.add_option(
|
||||
'--include-snippets',
|
||||
default=False,
|
||||
action='store_true',
|
||||
help=_('Include snippets of the text surrounding each match. Note that this makes searching much slower.')
|
||||
)
|
||||
parser.add_option(
|
||||
'--match-start-marker',
|
||||
default='\x1b[31m',
|
||||
help=_('The marker used to indicate the start of a matched word inside a snippet')
|
||||
)
|
||||
parser.add_option(
|
||||
'--match-end-marker',
|
||||
default='\x1b[m',
|
||||
help=_('The marker used to indicate the end of a matched word inside a snippet')
|
||||
)
|
||||
parser.add_option(
|
||||
'--do-not-match-on-related-words',
|
||||
default=True,
|
||||
dest='use_stemming',
|
||||
action='store_false',
|
||||
help=_('Only match on exact words not related words. So correction will not match correcting.')
|
||||
)
|
||||
parser.add_option(
|
||||
'--restrict-to',
|
||||
default='',
|
||||
help=_('Restrict the searched books, either using a search expression or ids.'
|
||||
' For example: ids:1,2,3 to restrict by ids or search:tag:foo to restrict to books having the tag foo.')
|
||||
)
|
||||
parser.add_option(
|
||||
'--output-format', default='text', choices=('text', 'json'),
|
||||
help=_('The format to output the search results in. Either "text" for plain text or "json" for JSON output.')
|
||||
)
|
||||
return parser
|
||||
|
||||
|
||||
def output_results_as_text(results, metadata_cache, include_snippets):
|
||||
from calibre.utils.terminal import geometry
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
width = max(5, geometry()[0])
|
||||
separator = '─' * width
|
||||
if not include_snippets:
|
||||
bids = {}
|
||||
for result in results:
|
||||
bids.setdefault(result['book_id'], []).append(result['format'])
|
||||
for bid, fmts in bids.items():
|
||||
m = metadata_cache[bid]
|
||||
print(_('{0} by {1}').format(m['title'], authors_to_string(m['authors'])))
|
||||
print(f'Book id: {bid} Formats: {", ".join(fmts)}')
|
||||
print(separator)
|
||||
return
|
||||
|
||||
current_text_q = ''
|
||||
current_id = -1
|
||||
current_formats = []
|
||||
pat = re.compile(r'\s+')
|
||||
|
||||
def print_result():
|
||||
m = metadata_cache[current_id]
|
||||
print(_('{0} by {1}').format(m['title'], authors_to_string(m['authors'])))
|
||||
print(f'Book id: {current_id} Formats: {", ".join(current_formats)}')
|
||||
print(current_text_q)
|
||||
print(separator)
|
||||
|
||||
for result in results:
|
||||
textq = pat.sub(' ', result['text'])
|
||||
if result['book_id'] == current_id and textq == current_text_q:
|
||||
current_formats.append(result['format'])
|
||||
else:
|
||||
if current_id > -1:
|
||||
print_result()
|
||||
current_id, current_text_q, current_formats = result['book_id'], textq, [result['format']]
|
||||
|
||||
if current_id > -1:
|
||||
print_result()
|
||||
|
||||
|
||||
def main(opts, args, dbctx):
|
||||
if len(args) < 1:
|
||||
dbctx.option_parser.print_help()
|
||||
raise SystemExit(_('Error: You must specify the search expression'))
|
||||
search_expression = ' '.join(args)
|
||||
restrict_to = ''
|
||||
if opts.restrict_to:
|
||||
q, v = opts.restrict_to.partition(':')[::2]
|
||||
if q == 'ids':
|
||||
restrict_to = tuple(set(map(int, v.split(','))))
|
||||
elif q == 'search':
|
||||
restrict_to = v
|
||||
else:
|
||||
raise SystemExit('The --restrict-to option must start with either ids: or search:')
|
||||
from calibre.db import FTSQueryError
|
||||
try:
|
||||
results, metadata_cache = dbctx.run('fts_search', search_expression, {
|
||||
'start_marker': opts.match_start_marker, 'end_marker': opts.match_end_marker, 'use_stemming': opts.use_stemming,
|
||||
'include_snippets': opts.include_snippets, 'restrict_to': restrict_to, 'as_tuple': dbctx.is_remote
|
||||
})
|
||||
if opts.output_format == 'json':
|
||||
if not dbctx.is_remote:
|
||||
results = tuple(results)
|
||||
for r in results:
|
||||
m = metadata_cache[r['book_id']]
|
||||
r['title'], r['authors'] = m['title'], m['authors']
|
||||
import json
|
||||
print(json.dumps(results, sort_keys=True, indent=' '))
|
||||
else:
|
||||
output_results_as_text(results, metadata_cache, opts.include_snippets)
|
||||
except FTSQueryError as e:
|
||||
raise SystemExit(str(e))
|
||||
except Exception as e:
|
||||
if getattr(e, 'suppress_traceback', False):
|
||||
raise SystemExit(str(e))
|
||||
raise
|
||||
return 0
|
@ -21,7 +21,7 @@ COMMANDS = (
|
||||
'set_metadata', 'export', 'catalog', 'saved_searches', 'add_custom_column',
|
||||
'custom_columns', 'remove_custom_column', 'set_custom', 'restore_database',
|
||||
'check_library', 'list_categories', 'backup_metadata', 'clone', 'embed_metadata',
|
||||
'search', 'fts_index'
|
||||
'search', 'fts_index', 'fts_search',
|
||||
)
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user