Implement storing of failure messages in books table

This commit is contained in:
Kovid Goyal 2022-04-19 09:49:06 +05:30
parent 2f2a1063e2
commit f7b47a72fc
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
3 changed files with 25 additions and 9 deletions

View File

@ -10,10 +10,11 @@ CREATE TABLE fts_db.books_text ( id INTEGER PRIMARY KEY,
timestamp REAL NOT NULL,
format TEXT NOT NULL COLLATE NOCASE,
format_hash TEXT NOT NULL COLLATE NOCASE,
format_size INTEGER NOT NULL,
format_size INTEGER NOT NULL DEFAULT 0,
searchable_text TEXT NOT NULL DEFAULT "",
text_size INTEGER NOT NULL,
text_hash TEXT NOT NULL COLLATE NOCASE,
text_size INTEGER NOT NULL DEFAULT 0,
text_hash TEXT NOT NULL COLLATE NOCASE DEFAULT "",
err_msg TEXT DEFAULT "",
UNIQUE(book, format)
);

View File

@ -67,11 +67,17 @@ class FTS:
conn = self.get_connection()
conn.execute('DELETE FROM fts_db.dirtied_formats WHERE book=? AND format=?', (book_id, fmt.upper()))
def add_text(self, book_id, fmt, text, text_hash='', fmt_size=0, fmt_hash=''):
def add_text(self, book_id, fmt, text, text_hash='', fmt_size=0, fmt_hash='', err_msg=''):
conn = self.get_connection()
ts = (utcnow() - EPOCH).total_seconds()
fmt = fmt.upper()
if text:
if err_msg:
conn.execute(
'INSERT OR REPLACE INTO fts_db.books_text '
'(book, timestamp, format, format_size, format_hash, err_msg) VALUES '
'(?, ?, ?, ?, ?, ?)', (
book_id, ts, fmt, fmt_size, fmt_hash, err_msg))
elif text:
conn.execute(
'INSERT OR REPLACE INTO fts_db.books_text '
'(book, timestamp, format, format_size, format_hash, searchable_text, text_size, text_hash) VALUES '
@ -94,7 +100,7 @@ class FTS:
for x in conn.get('SELECT id FROM fts_db.books_text WHERE book=? AND format=? AND text_hash=?', (book_id, fmt, text_hash)):
text = ''
break
self.add_text(book_id, fmt, text, text_hash, fmt_size, fmt_hash)
self.add_text(book_id, fmt, text, text_hash, fmt_size, fmt_hash, err_msg)
def queue_job(self, book_id, fmt, path, fmt_size, fmt_hash):
conn = self.get_connection()

View File

@ -7,8 +7,9 @@ import os
import shutil
import sys
import time
from io import BytesIO
from io import BytesIO, StringIO
from zipfile import ZipFile
from unittest.mock import patch
from calibre.db.fts.text import html_to_text
from calibre.db.tests.base import BaseTest
@ -87,8 +88,6 @@ class FTSAPITest(BaseTest):
self.assertFalse(fts.pool.initialized)
# TODO: check shutdown when worker hangs
# TODO: add a max scan time and check that the worker honors it
# TODO: Add a column to store failures with tracebacks in the books_text table
# check enabling scans pre-exisintg
cache = self.new_library()
@ -100,6 +99,16 @@ class FTSAPITest(BaseTest):
cache.add_format(1, 'TXTZ', self.make_txtz(b'a test text', extra='xxx'))
self.wait_for_fts_to_finish(fts)
check(id=1, book=1, format='TXTZ', searchable_text='a test text')
# check max_duration
for w in fts.pool.workers:
w.max_duration = -1
with patch('sys.stderr', new_callable=StringIO):
cache.add_format(1, 'TXTZ', self.make_txtz(b'a timed out text'))
self.wait_for_fts_to_finish(fts)
check(id=2, book=1, format='TXTZ', err_msg='Extracting text from the TXTZ file of size 132 B took too long')
for w in fts.pool.workers:
w.max_duration = w.__class__.max_duration
cache.close()
def test_fts_triggers(self):