From a784e359a2f572fac853d962e3262efc9848c411 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 11 Dec 2022 13:11:27 +0530 Subject: [PATCH] More work on CS FTS --- src/calibre/srv/fts.py | 53 ++++++++++- src/pyj/book_list/fts.pyj | 148 +++++++++++++++++++++++++++-- src/pyj/book_list/library_data.pyj | 15 +-- 3 files changed, 197 insertions(+), 19 deletions(-) diff --git a/src/calibre/srv/fts.py b/src/calibre/srv/fts.py index 62ec517203..8f650a17d3 100644 --- a/src/calibre/srv/fts.py +++ b/src/calibre/srv/fts.py @@ -2,6 +2,9 @@ # vim:fileencoding=utf-8 # License: GPL v3 Copyright: 2022, Kovid Goyal +import re + +from calibre.ebooks.metadata import authors_to_string from calibre.srv.errors import ( HTTPBadRequest, HTTPPreconditionRequired, HTTPUnprocessableEntity, ) @@ -22,7 +25,7 @@ def fts_search(ctx, rd): raise HTTPPreconditionRequired('Full text searching is not enabled on this library') metadata_cache = {} l, t = db.fts_indexing_progress()[:2] - ans = {'metadata': metadata_cache, 'left': l, 'total': t} + ans = {'metadata': metadata_cache, 'indexing_status': {'left': l, 'total': t}} use_stemming = rd.query.get('use_stemming', 'y') == 'y' query = rd.query.get('query' '') @@ -38,7 +41,7 @@ def fts_search(ctx, rd): bid = result['book_id'] if bid not in metadata_cache: with db.safe_read_lock: - metadata_cache[bid] = {'title': db._field_for('title', bid), 'authors': db._field_for('authors', bid)} + metadata_cache[bid] = {'title': db._field_for('title', bid), 'authors': authors_to_string(db._field_for('authors', bid))} return result from calibre.db import FTSQueryError @@ -49,3 +52,49 @@ def fts_search(ctx, rd): except FTSQueryError as e: raise HTTPUnprocessableEntity(str(e)) return ans + + +@endpoint('/fts/snippets/{book_ids}', postprocess=json) +def fts_snippets(ctx, rd, book_ids): + ''' + Perform the specified full text query and return the results with snippets restricted to the specified book ids. + + Optional: ?query=&library_id=&use_stemming= + &query_id=arbitrary&snippet_size=32&highlight_start=\x1c&highlight_end=\x1e + ''' + db = get_library_data(ctx, rd)[0] + if not db.is_fts_enabled(): + raise HTTPPreconditionRequired('Full text searching is not enabled on this library') + + use_stemming = rd.query.get('use_stemming', 'y') == 'y' + query = rd.query.get('query' '') + if not query: + raise HTTPBadRequest('No search query specified') + try: + bids = frozenset(map(int, book_ids.split(','))) + except Exception: + raise HTTPBadRequest('Invalid list of book ids') + try: + ssz = int(rd.query.get('snippet_size', 32)) + except Exception: + raise HTTPBadRequest('Invalid snippet size') + snippets = {bid:{} for bid in bids} + ans = {} + qid = rd.query.get('query_id') + if qid: + ans['query_id'] = qid + from calibre.db import FTSQueryError + sanitize_pat = re.compile(r'\s+') + try: + for x in db.fts_search( + query, use_stemming=use_stemming, return_text=True, + highlight_start=rd.query.get('highlight_start', '\x1c'), highlight_end=rd.query.get('highlight_end', '\x1e'), + restrict_to_book_ids=bids, snippet_size=ssz, + ): + r = snippets[x['book_id']] + q = sanitize_pat.sub('', x['text']) + r.setdefault(q, {'formats': [], 'text': x['text'],})['formats'].append(x['format']) + except FTSQueryError as e: + raise HTTPUnprocessableEntity(str(e)) + ans['snippets'] = {bid: tuple(v.values()) for bid, v in snippets.items()} + return ans diff --git a/src/pyj/book_list/fts.pyj b/src/pyj/book_list/fts.pyj index ebf67fa38c..6f5380cf41 100644 --- a/src/pyj/book_list/fts.pyj +++ b/src/pyj/book_list/fts.pyj @@ -5,10 +5,13 @@ from __python__ import bound_methods, hash_literals from elementmaker import E from ajax import ajax -from book_list.globals import get_session_data, get_current_query +from book_list.cover_grid import THUMBNAIL_MAX_HEIGHT, THUMBNAIL_MAX_WIDTH +from book_list.globals import get_current_query, get_session_data from book_list.router import back, push_state from book_list.top_bar import create_top_bar from book_list.ui import set_panel_handler +from book_list.views import create_image +from book_list.library_data import current_library_id from complete import create_search_bar from dom import add_extra_css, clear, set_css from gettext import gettext as _ @@ -26,6 +29,7 @@ add_extra_css(def(): style += f'{sel} .h' + ' { font-weight: bold; padding-bottom: 0.25ex }\n' style += f'{sel} .bq' + ' { margin-left: 1em; margin-top: 0.5ex; margin-bottom: 0.5ex; font-style: italic }\n' style += f'{sel} p' + ' { margin: 0}\n' + style += '.fts-highlight-match { font-style: italic; font-weight: bold }\n' return style ) @@ -41,7 +45,9 @@ def showing_search_panel(): def make_new_fts_query(q): nonlocal current_fts_query, query_id_counter query_id_counter += 1 - current_fts_query = {'query_id': query_id_counter} + current_fts_query = {'query_id': query_id_counter + ''} + if current_library_id(): + current_fts_query.library_id = current_library_id() Object.assign(current_fts_query, q) xhr = ajax('fts/search', on_initial_fts_fetched, query=current_fts_query, bypass_cache=True) xhr.send() @@ -59,7 +65,7 @@ def on_initial_fts_fetched(end_type, xhr, ev): results = JSON.parse(xhr.responseText) except Exception as err: return error_dialog(_('Server error'), _('Failed to parse search response from server.'), err + '') - if results.query_id is not current_fts_query.query_id: + if results.query_id + '' is not current_fts_query.query_id: return current_fts_query.results = results show_initial_results() @@ -167,31 +173,153 @@ def apply_search_panel_state(): show_initial_results() +def book_result_tile(book_id, title, authors): + tile_height, img_max_width = '16ex', '12ex' + img = create_image(book_id, THUMBNAIL_MAX_WIDTH, THUMBNAIL_MAX_HEIGHT, def():pass;) + img.style.maxHeight = tile_height + img.style.maxWidth = img_max_width + tooltip = (title) + ' ' + _('by') + ' ' + (authors) + img.alt = _('Cover of') + ' ' + tooltip + return E.div( + title=tooltip, + data_book_id=book_id + '', data_snippets_needed='1', + style=f'cursor: pointer; margin-bottom: 1ex; display:flex; height: {tile_height}; max-height: {tile_height}; width: 100%; align-items: stretch', + E.div( + style=f'margin-right: 1ex; width: {img_max_width}', + img + ), + + E.div( + style=f'display:flex; flex-direction: column; height: 100%; overflow-y: auto', + E.div(E.span(style='font-size: small; font-style: italic; opacity: 0.5;', _('loading'), '…'), class_='snippets_container'), + ) + ) + + +def on_snippets_fetched(end_type, xhr, ev): + if end_type is 'abort' or not showing_search_panel(): + return + if end_type is not 'load': + return error_dialog(_('Failed to search'), _('The search failed. Click "Show details" for more information.'), xhr.error_html) + container = component('results') + if not container: + return + try: + results = JSON.parse(xhr.responseText) + except Exception as err: + return error_dialog(_('Server error'), _('Failed to parse search response from server.'), err + '') + if results.query_id is not current_fts_query.query_id: + return + if not current_fts_query.results.snippets: + current_fts_query.results.snippets = {} + Object.assign(current_fts_query.results.snippets, results.snippets) + show_snippets(results.snippets) + fetch_snippets() + + +def render_text(parent, text): + in_highlighted = False + while text.length > 0: + q = '\x1e' if in_highlighted else '\x1c' + idx = text.indexOf(q) + if idx < 0: + idx = text.length + chunk = text[:idx] + text = text[idx+1:] + if in_highlighted: + parent.append(E.span(class_='fts-highlight-match', chunk)) + in_highlighted = False + else: + parent.append(E.span(chunk)) + in_highlighted = True + + +def show_snippets(snippets): + container = component('results') + for book_id in Object.keys(snippets): + c = container.querySelector(f'[data-book-id="{book_id}"]') + v'delete c.dataset.snippetsNeeded' + s = c.querySelector('.snippets_container') + clear(s) + for x in snippets[book_id]: + f = ' '.join(x.formats) + e = E.div(E.code( + style='border: solid 1px currentColor; border-radius: 6px; padding: 0 4px; font-size: smaller', + data_formats=f, f) + ) + e.appendChild(E.span(' ')) + render_text(e, x.text) + s.appendChild(e) + + +def fetch_snippets(): + container = component('results') + if not container: + return + ids = v'[]' + for x in container.querySelectorAll('[data-snippets-needed="1"]'): + book_id = int(x.dataset.bookId) + ids.push(book_id) + if ids.length > 1: + break + if ids.length < 1: + return + ids = ','.join(ids) + q = {} + Object.assign(q, current_fts_query) + q.results = v'undefined' + xhr = ajax(f'fts/snippets/{ids}', on_snippets_fetched, query=q, bypass_cache=True) + xhr.send() + + def show_initial_results(): container = component('results') if not container: return clear(container) results = current_fts_query.results - results + left, total = results.indexing_status['left'], results.indexing_status['total'] + if left > 0: + pc = int(((total-left) / total) * 100) + container.appendChild(E.div( + style='margin-top: 0.5ex', + E.span(_('WARNING:'), style='color: red; font-weight: bold'), '\xa0', + _('Indexing of library only {}% complete, search results may be incomplete.').format(pc) + )) + rc = E.div(style='margin-top: 0.5ex') + container.appendChild(rc) + mm = results.metadata + seen = {} + for r in results.results: + bid = r['book_id'] + m = mm[bid] + if not seen[bid]: + rc.appendChild(book_result_tile(bid, m['title'], m['authors'])) + seen[bid] = rc.lastChild + rc.appendChild(E.hr()) + if results.results.length < 1: + rc.appendChild(E.div(_('No matches found'))) + fetch_snippets() -def show_panel(visible, hidden): +def show_panel(visible): c = component(visible) if c: + x = c.parentNode.firstChild + while x: + if x.nodeType is 1 and x is not c: + x.style.display = 'none' + x = x.nextSibling c.style.display = 'block' - c = component(hidden) - if c: - c.style.display = 'none' def show_search_panel(): - show_panel('search', 'index') + show_panel('search') apply_search_panel_state() def show_index_panel(): - show_panel('index', 'search') + show_panel('index') def init(container_id): diff --git a/src/pyj/book_list/library_data.pyj b/src/pyj/book_list/library_data.pyj index cb4afc8582..6ca87ab005 100644 --- a/src/pyj/book_list/library_data.pyj +++ b/src/pyj/book_list/library_data.pyj @@ -203,11 +203,12 @@ def field_names_for(field, proceed): def thumbnail_url(book_id, width, height): - return absolute_path( - 'get/thumb/{}/{}?sz={}x{}'.format( - book_id, loaded_books_query().library_id, - Math.ceil(width * window.devicePixelRatio), Math.ceil(height * window.devicePixelRatio) - )) + query = f'sz={Math.ceil(width * window.devicePixelRatio)}x{Math.ceil(height * window.devicePixelRatio)}' + prefix = f'get/thumb/{book_id}' + lid = loaded_books_query().library_id or current_library_id() + if lid: + prefix += f'/{lid}' + return absolute_path(f'{prefix}?{query}') def cover_url(book_id): @@ -269,7 +270,7 @@ class ThumbnailCache: # Cache to prevent browser from issuing HTTP requests when thumbnails pages # are destroyed/rebuilt. - def __init__(self, size=250): + def __init__(self, size=256): self.cache = LRUCache(size) def get(self, book_id, width, height, callback): @@ -281,7 +282,7 @@ class ThumbnailCache: img.onerror = self.load_finished.bind(None, item, 'error') img.onload = self.load_finished.bind(None, item, 'load') img.onabort = self.load_finished.bind(None, item, 'abort') - img.dataset.bookId = str(book_id) + img.dataset.bookId = book_id + '' img.src = url self.cache.set(url, item) return img