From a784e359a2f572fac853d962e3262efc9848c411 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 11 Dec 2022 13:11:27 +0530
Subject: [PATCH] More work on CS FTS

---
 src/calibre/srv/fts.py             |  53 ++++++++++-
 src/pyj/book_list/fts.pyj          | 148 +++++++++++++++++++++++++++--
 src/pyj/book_list/library_data.pyj |  15 +--
 3 files changed, 197 insertions(+), 19 deletions(-)
diff --git a/src/calibre/srv/fts.py b/src/calibre/srv/fts.py
index 62ec517203..8f650a17d3 100644
--- a/src/calibre/srv/fts.py
+++ b/src/calibre/srv/fts.py
@@ -2,6 +2,9 @@
 # vim:fileencoding=utf-8
 # License: GPL v3 Copyright: 2022, Kovid Goyal <kovid at kovidgoyal.net>
 
+import re
+
+from calibre.ebooks.metadata import authors_to_string
 from calibre.srv.errors import (
     HTTPBadRequest, HTTPPreconditionRequired, HTTPUnprocessableEntity,
 )
@@ -22,7 +25,7 @@ def fts_search(ctx, rd):
         raise HTTPPreconditionRequired('Full text searching is not enabled on this library')
     metadata_cache = {}
     l, t = db.fts_indexing_progress()[:2]
-    ans = {'metadata': metadata_cache, 'left': l, 'total': t}
+    ans = {'metadata': metadata_cache, 'indexing_status': {'left': l, 'total': t}}
 
     use_stemming = rd.query.get('use_stemming', 'y') == 'y'
     query = rd.query.get('query' '')
@@ -38,7 +41,7 @@ def fts_search(ctx, rd):
         bid = result['book_id']
         if bid not in metadata_cache:
             with db.safe_read_lock:
-                metadata_cache[bid] = {'title': db._field_for('title', bid), 'authors': db._field_for('authors', bid)}
+                metadata_cache[bid] = {'title': db._field_for('title', bid), 'authors': authors_to_string(db._field_for('authors', bid))}
         return result
 
     from calibre.db import FTSQueryError
@@ -49,3 +52,49 @@ def fts_search(ctx, rd):
     except FTSQueryError as e:
         raise HTTPUnprocessableEntity(str(e))
     return ans
+
+
+@endpoint('/fts/snippets/{book_ids}', postprocess=json)
+def fts_snippets(ctx, rd, book_ids):
+    '''
+    Perform the specified full text query and return the results with snippets restricted to the specified book ids.
+
+    Optional: ?query=<search query>&library_id=<default library>&use_stemming=<y or n>
+    &query_id=arbitrary&snippet_size=32&highlight_start=\x1c&highlight_end=\x1e
+    '''
+    db = get_library_data(ctx, rd)[0]
+    if not db.is_fts_enabled():
+        raise HTTPPreconditionRequired('Full text searching is not enabled on this library')
+
+    use_stemming = rd.query.get('use_stemming', 'y') == 'y'
+    query = rd.query.get('query' '')
+    if not query:
+        raise HTTPBadRequest('No search query specified')
+    try:
+        bids = frozenset(map(int, book_ids.split(',')))
+    except Exception:
+        raise HTTPBadRequest('Invalid list of book ids')
+    try:
+        ssz = int(rd.query.get('snippet_size', 32))
+    except Exception:
+        raise HTTPBadRequest('Invalid snippet size')
+    snippets = {bid:{} for bid in bids}
+    ans = {}
+    qid = rd.query.get('query_id')
+    if qid:
+        ans['query_id'] = qid
+    from calibre.db import FTSQueryError
+    sanitize_pat = re.compile(r'\s+')
+    try:
+        for x in db.fts_search(
+            query, use_stemming=use_stemming, return_text=True,
+            highlight_start=rd.query.get('highlight_start', '\x1c'), highlight_end=rd.query.get('highlight_end', '\x1e'),
+            restrict_to_book_ids=bids, snippet_size=ssz,
+        ):
+            r = snippets[x['book_id']]
+            q = sanitize_pat.sub('', x['text'])
+            r.setdefault(q, {'formats': [], 'text': x['text'],})['formats'].append(x['format'])
+    except FTSQueryError as e:
+        raise HTTPUnprocessableEntity(str(e))
+    ans['snippets'] = {bid: tuple(v.values()) for bid, v in snippets.items()}
+    return ans
diff --git a/src/pyj/book_list/fts.pyj b/src/pyj/book_list/fts.pyj
index ebf67fa38c..6f5380cf41 100644
--- a/src/pyj/book_list/fts.pyj
+++ b/src/pyj/book_list/fts.pyj
@@ -5,10 +5,13 @@ from __python__ import bound_methods, hash_literals
 from elementmaker import E
 
 from ajax import ajax
-from book_list.globals import get_session_data, get_current_query
+from book_list.cover_grid import THUMBNAIL_MAX_HEIGHT, THUMBNAIL_MAX_WIDTH
+from book_list.globals import get_current_query, get_session_data
 from book_list.router import back, push_state
 from book_list.top_bar import create_top_bar
 from book_list.ui import set_panel_handler
+from book_list.views import create_image
+from book_list.library_data import current_library_id
 from complete import create_search_bar
 from dom import add_extra_css, clear, set_css
 from gettext import gettext as _
@@ -26,6 +29,7 @@ add_extra_css(def():
     style += f'{sel} .h' + ' { font-weight: bold; padding-bottom: 0.25ex }\n'
     style += f'{sel} .bq' + ' { margin-left: 1em; margin-top: 0.5ex; margin-bottom: 0.5ex; font-style: italic }\n'
     style += f'{sel} p' + ' { margin: 0}\n'
+    style += '.fts-highlight-match { font-style: italic; font-weight: bold }\n'
     return style
 )
 
@@ -41,7 +45,9 @@ def showing_search_panel():
 def make_new_fts_query(q):
     nonlocal current_fts_query, query_id_counter
     query_id_counter += 1
-    current_fts_query = {'query_id': query_id_counter}
+    current_fts_query = {'query_id': query_id_counter + ''}
+    if current_library_id():
+        current_fts_query.library_id = current_library_id()
     Object.assign(current_fts_query, q)
     xhr = ajax('fts/search', on_initial_fts_fetched, query=current_fts_query, bypass_cache=True)
     xhr.send()
@@ -59,7 +65,7 @@ def on_initial_fts_fetched(end_type, xhr, ev):
         results = JSON.parse(xhr.responseText)
     except Exception as err:
         return error_dialog(_('Server error'), _('Failed to parse search response from server.'), err + '')
-    if results.query_id is not current_fts_query.query_id:
+    if results.query_id + '' is not current_fts_query.query_id:
         return
     current_fts_query.results = results
     show_initial_results()
@@ -167,31 +173,153 @@ def apply_search_panel_state():
     show_initial_results()
 
 
+def book_result_tile(book_id, title, authors):
+    tile_height, img_max_width = '16ex', '12ex'
+    img = create_image(book_id, THUMBNAIL_MAX_WIDTH, THUMBNAIL_MAX_HEIGHT, def():pass;)
+    img.style.maxHeight = tile_height
+    img.style.maxWidth = img_max_width
+    tooltip = (title) + ' ' + _('by') + ' ' + (authors)
+    img.alt = _('Cover of') + ' ' + tooltip
+    return E.div(
+        title=tooltip,
+        data_book_id=book_id + '', data_snippets_needed='1',
+        style=f'cursor: pointer; margin-bottom: 1ex; display:flex; height: {tile_height}; max-height: {tile_height}; width: 100%; align-items: stretch',
+        E.div(
+            style=f'margin-right: 1ex; width: {img_max_width}',
+            img
+        ),
+
+        E.div(
+            style=f'display:flex; flex-direction: column; height: 100%; overflow-y: auto',
+            E.div(E.span(style='font-size: small; font-style: italic; opacity: 0.5;', _('loading'), '…'), class_='snippets_container'),
+        )
+    )
+
+
+def on_snippets_fetched(end_type, xhr, ev):
+    if end_type is 'abort' or not showing_search_panel():
+        return
+    if end_type is not 'load':
+        return error_dialog(_('Failed to search'), _('The search failed. Click "Show details" for more information.'), xhr.error_html)
+    container = component('results')
+    if not container:
+        return
+    try:
+        results = JSON.parse(xhr.responseText)
+    except Exception as err:
+        return error_dialog(_('Server error'), _('Failed to parse search response from server.'), err + '')
+    if results.query_id is not current_fts_query.query_id:
+        return
+    if not current_fts_query.results.snippets:
+        current_fts_query.results.snippets = {}
+    Object.assign(current_fts_query.results.snippets, results.snippets)
+    show_snippets(results.snippets)
+    fetch_snippets()
+
+
+def render_text(parent, text):
+    in_highlighted = False
+    while text.length > 0:
+        q = '\x1e' if in_highlighted else '\x1c'
+        idx = text.indexOf(q)
+        if idx < 0:
+            idx = text.length
+        chunk = text[:idx]
+        text = text[idx+1:]
+        if in_highlighted:
+            parent.append(E.span(class_='fts-highlight-match', chunk))
+            in_highlighted = False
+        else:
+            parent.append(E.span(chunk))
+            in_highlighted = True
+
+
+def show_snippets(snippets):
+    container = component('results')
+    for book_id in Object.keys(snippets):
+        c = container.querySelector(f'[data-book-id="{book_id}"]')
+        v'delete c.dataset.snippetsNeeded'
+        s = c.querySelector('.snippets_container')
+        clear(s)
+        for x in snippets[book_id]:
+            f = ' '.join(x.formats)
+            e = E.div(E.code(
+                style='border: solid 1px currentColor; border-radius: 6px; padding: 0 4px; font-size: smaller',
+                data_formats=f, f)
+            )
+            e.appendChild(E.span(' '))
+            render_text(e, x.text)
+            s.appendChild(e)
+
+
+def fetch_snippets():
+    container = component('results')
+    if not container:
+        return
+    ids = v'[]'
+    for x in container.querySelectorAll('[data-snippets-needed="1"]'):
+        book_id = int(x.dataset.bookId)
+        ids.push(book_id)
+        if ids.length > 1:
+            break
+    if ids.length < 1:
+        return
+    ids = ','.join(ids)
+    q = {}
+    Object.assign(q, current_fts_query)
+    q.results = v'undefined'
+    xhr = ajax(f'fts/snippets/{ids}', on_snippets_fetched, query=q, bypass_cache=True)
+    xhr.send()
+
+
 def show_initial_results():
     container = component('results')
     if not container:
         return
     clear(container)
     results = current_fts_query.results
-    results
+    left, total = results.indexing_status['left'], results.indexing_status['total']
+    if left > 0:
+        pc = int(((total-left) / total) * 100)
+        container.appendChild(E.div(
+            style='margin-top: 0.5ex',
+            E.span(_('WARNING:'), style='color: red; font-weight: bold'), '\xa0',
+            _('Indexing of library only {}% complete, search results may be incomplete.').format(pc)
+        ))
+    rc = E.div(style='margin-top: 0.5ex')
+    container.appendChild(rc)
+    mm = results.metadata
+    seen = {}
+    for r in results.results:
+        bid = r['book_id']
+        m = mm[bid]
+        if not seen[bid]:
+            rc.appendChild(book_result_tile(bid, m['title'], m['authors']))
+            seen[bid] = rc.lastChild
+            rc.appendChild(E.hr())
+    if results.results.length < 1:
+        rc.appendChild(E.div(_('No matches found')))
+    fetch_snippets()
 
 
-def show_panel(visible, hidden):
+def show_panel(visible):
     c = component(visible)
     if c:
+        x = c.parentNode.firstChild
+        while x:
+            if x.nodeType is 1 and x is not c:
+                x.style.display = 'none'
+            x = x.nextSibling
         c.style.display = 'block'
-    c = component(hidden)
-    if c:
-        c.style.display = 'none'
 
 
 def show_search_panel():
-    show_panel('search', 'index')
+    show_panel('search')
     apply_search_panel_state()
 
 
 def show_index_panel():
-    show_panel('index', 'search')
+    show_panel('index')
 
 
 def init(container_id):
diff --git a/src/pyj/book_list/library_data.pyj b/src/pyj/book_list/library_data.pyj
index cb4afc8582..6ca87ab005 100644
--- a/src/pyj/book_list/library_data.pyj
+++ b/src/pyj/book_list/library_data.pyj
@@ -203,11 +203,12 @@ def field_names_for(field, proceed):
 
 
 def thumbnail_url(book_id, width, height):
-    return absolute_path(
-        'get/thumb/{}/{}?sz={}x{}'.format(
-            book_id, loaded_books_query().library_id,
-            Math.ceil(width * window.devicePixelRatio), Math.ceil(height * window.devicePixelRatio)
-        ))
+    query = f'sz={Math.ceil(width * window.devicePixelRatio)}x{Math.ceil(height * window.devicePixelRatio)}'
+    prefix = f'get/thumb/{book_id}'
+    lid = loaded_books_query().library_id or current_library_id()
+    if lid:
+        prefix += f'/{lid}'
+    return absolute_path(f'{prefix}?{query}')
 
 
 def cover_url(book_id):
@@ -269,7 +270,7 @@ class ThumbnailCache:
     # Cache to prevent browser from issuing HTTP requests when thumbnails pages
     # are destroyed/rebuilt.
 
-    def __init__(self, size=250):
+    def __init__(self, size=256):
         self.cache = LRUCache(size)
 
     def get(self, book_id, width, height, callback):
@@ -281,7 +282,7 @@ class ThumbnailCache:
             img.onerror = self.load_finished.bind(None, item, 'error')
             img.onload = self.load_finished.bind(None, item, 'load')
             img.onabort = self.load_finished.bind(None, item, 'abort')
-            img.dataset.bookId = str(book_id)
+            img.dataset.bookId = book_id + ''
             img.src = url
             self.cache.set(url, item)
             return img