diff --git a/resources/fts_sqlite.sql b/resources/fts_sqlite.sql index 8ef19cfd3b..d2b0bd764f 100644 --- a/resources/fts_sqlite.sql +++ b/resources/fts_sqlite.sql @@ -10,10 +10,11 @@ CREATE TABLE fts_db.books_text ( id INTEGER PRIMARY KEY, timestamp REAL NOT NULL, format TEXT NOT NULL COLLATE NOCASE, format_hash TEXT NOT NULL COLLATE NOCASE, - format_size INTEGER NOT NULL, + format_size INTEGER NOT NULL DEFAULT 0, searchable_text TEXT NOT NULL DEFAULT "", - text_size INTEGER NOT NULL, - text_hash TEXT NOT NULL COLLATE NOCASE, + text_size INTEGER NOT NULL DEFAULT 0, + text_hash TEXT NOT NULL COLLATE NOCASE DEFAULT "", + err_msg TEXT DEFAULT "", UNIQUE(book, format) ); diff --git a/src/calibre/db/fts/connect.py b/src/calibre/db/fts/connect.py index 15d7c14990..b5cc8e29ca 100644 --- a/src/calibre/db/fts/connect.py +++ b/src/calibre/db/fts/connect.py @@ -67,11 +67,17 @@ class FTS: conn = self.get_connection() conn.execute('DELETE FROM fts_db.dirtied_formats WHERE book=? AND format=?', (book_id, fmt.upper())) - def add_text(self, book_id, fmt, text, text_hash='', fmt_size=0, fmt_hash=''): + def add_text(self, book_id, fmt, text, text_hash='', fmt_size=0, fmt_hash='', err_msg=''): conn = self.get_connection() ts = (utcnow() - EPOCH).total_seconds() fmt = fmt.upper() - if text: + if err_msg: + conn.execute( + 'INSERT OR REPLACE INTO fts_db.books_text ' + '(book, timestamp, format, format_size, format_hash, err_msg) VALUES ' + '(?, ?, ?, ?, ?, ?)', ( + book_id, ts, fmt, fmt_size, fmt_hash, err_msg)) + elif text: conn.execute( 'INSERT OR REPLACE INTO fts_db.books_text ' '(book, timestamp, format, format_size, format_hash, searchable_text, text_size, text_hash) VALUES ' @@ -94,7 +100,7 @@ class FTS: for x in conn.get('SELECT id FROM fts_db.books_text WHERE book=? AND format=? AND text_hash=?', (book_id, fmt, text_hash)): text = '' break - self.add_text(book_id, fmt, text, text_hash, fmt_size, fmt_hash) + self.add_text(book_id, fmt, text, text_hash, fmt_size, fmt_hash, err_msg) def queue_job(self, book_id, fmt, path, fmt_size, fmt_hash): conn = self.get_connection() diff --git a/src/calibre/db/tests/fts_api.py b/src/calibre/db/tests/fts_api.py index aa0439e5fa..1fc66abc31 100644 --- a/src/calibre/db/tests/fts_api.py +++ b/src/calibre/db/tests/fts_api.py @@ -7,8 +7,9 @@ import os import shutil import sys import time -from io import BytesIO +from io import BytesIO, StringIO from zipfile import ZipFile +from unittest.mock import patch from calibre.db.fts.text import html_to_text from calibre.db.tests.base import BaseTest @@ -87,8 +88,6 @@ class FTSAPITest(BaseTest): self.assertFalse(fts.pool.initialized) # TODO: check shutdown when worker hangs - # TODO: add a max scan time and check that the worker honors it - # TODO: Add a column to store failures with tracebacks in the books_text table # check enabling scans pre-exisintg cache = self.new_library() @@ -100,6 +99,16 @@ class FTSAPITest(BaseTest): cache.add_format(1, 'TXTZ', self.make_txtz(b'a test text', extra='xxx')) self.wait_for_fts_to_finish(fts) check(id=1, book=1, format='TXTZ', searchable_text='a test text') + + # check max_duration + for w in fts.pool.workers: + w.max_duration = -1 + with patch('sys.stderr', new_callable=StringIO): + cache.add_format(1, 'TXTZ', self.make_txtz(b'a timed out text')) + self.wait_for_fts_to_finish(fts) + check(id=2, book=1, format='TXTZ', err_msg='Extracting text from the TXTZ file of size 132 B took too long') + for w in fts.pool.workers: + w.max_duration = w.__class__.max_duration cache.close() def test_fts_triggers(self):