Implement storing of failure messages in books table

This commit is contained in:
Kovid Goyal 2022-04-19 09:49:06 +05:30
parent 2f2a1063e2
commit f7b47a72fc
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
3 changed files with 25 additions and 9 deletions

View File

@ -10,10 +10,11 @@ CREATE TABLE fts_db.books_text ( id INTEGER PRIMARY KEY,
timestamp REAL NOT NULL, timestamp REAL NOT NULL,
format TEXT NOT NULL COLLATE NOCASE, format TEXT NOT NULL COLLATE NOCASE,
format_hash TEXT NOT NULL COLLATE NOCASE, format_hash TEXT NOT NULL COLLATE NOCASE,
format_size INTEGER NOT NULL, format_size INTEGER NOT NULL DEFAULT 0,
searchable_text TEXT NOT NULL DEFAULT "", searchable_text TEXT NOT NULL DEFAULT "",
text_size INTEGER NOT NULL, text_size INTEGER NOT NULL DEFAULT 0,
text_hash TEXT NOT NULL COLLATE NOCASE, text_hash TEXT NOT NULL COLLATE NOCASE DEFAULT "",
err_msg TEXT DEFAULT "",
UNIQUE(book, format) UNIQUE(book, format)
); );

View File

@ -67,11 +67,17 @@ class FTS:
conn = self.get_connection() conn = self.get_connection()
conn.execute('DELETE FROM fts_db.dirtied_formats WHERE book=? AND format=?', (book_id, fmt.upper())) conn.execute('DELETE FROM fts_db.dirtied_formats WHERE book=? AND format=?', (book_id, fmt.upper()))
def add_text(self, book_id, fmt, text, text_hash='', fmt_size=0, fmt_hash=''): def add_text(self, book_id, fmt, text, text_hash='', fmt_size=0, fmt_hash='', err_msg=''):
conn = self.get_connection() conn = self.get_connection()
ts = (utcnow() - EPOCH).total_seconds() ts = (utcnow() - EPOCH).total_seconds()
fmt = fmt.upper() fmt = fmt.upper()
if text: if err_msg:
conn.execute(
'INSERT OR REPLACE INTO fts_db.books_text '
'(book, timestamp, format, format_size, format_hash, err_msg) VALUES '
'(?, ?, ?, ?, ?, ?)', (
book_id, ts, fmt, fmt_size, fmt_hash, err_msg))
elif text:
conn.execute( conn.execute(
'INSERT OR REPLACE INTO fts_db.books_text ' 'INSERT OR REPLACE INTO fts_db.books_text '
'(book, timestamp, format, format_size, format_hash, searchable_text, text_size, text_hash) VALUES ' '(book, timestamp, format, format_size, format_hash, searchable_text, text_size, text_hash) VALUES '
@ -94,7 +100,7 @@ class FTS:
for x in conn.get('SELECT id FROM fts_db.books_text WHERE book=? AND format=? AND text_hash=?', (book_id, fmt, text_hash)): for x in conn.get('SELECT id FROM fts_db.books_text WHERE book=? AND format=? AND text_hash=?', (book_id, fmt, text_hash)):
text = '' text = ''
break break
self.add_text(book_id, fmt, text, text_hash, fmt_size, fmt_hash) self.add_text(book_id, fmt, text, text_hash, fmt_size, fmt_hash, err_msg)
def queue_job(self, book_id, fmt, path, fmt_size, fmt_hash): def queue_job(self, book_id, fmt, path, fmt_size, fmt_hash):
conn = self.get_connection() conn = self.get_connection()

View File

@ -7,8 +7,9 @@ import os
import shutil import shutil
import sys import sys
import time import time
from io import BytesIO from io import BytesIO, StringIO
from zipfile import ZipFile from zipfile import ZipFile
from unittest.mock import patch
from calibre.db.fts.text import html_to_text from calibre.db.fts.text import html_to_text
from calibre.db.tests.base import BaseTest from calibre.db.tests.base import BaseTest
@ -87,8 +88,6 @@ class FTSAPITest(BaseTest):
self.assertFalse(fts.pool.initialized) self.assertFalse(fts.pool.initialized)
# TODO: check shutdown when worker hangs # TODO: check shutdown when worker hangs
# TODO: add a max scan time and check that the worker honors it
# TODO: Add a column to store failures with tracebacks in the books_text table
# check enabling scans pre-exisintg # check enabling scans pre-exisintg
cache = self.new_library() cache = self.new_library()
@ -100,6 +99,16 @@ class FTSAPITest(BaseTest):
cache.add_format(1, 'TXTZ', self.make_txtz(b'a test text', extra='xxx')) cache.add_format(1, 'TXTZ', self.make_txtz(b'a test text', extra='xxx'))
self.wait_for_fts_to_finish(fts) self.wait_for_fts_to_finish(fts)
check(id=1, book=1, format='TXTZ', searchable_text='a test text') check(id=1, book=1, format='TXTZ', searchable_text='a test text')
# check max_duration
for w in fts.pool.workers:
w.max_duration = -1
with patch('sys.stderr', new_callable=StringIO):
cache.add_format(1, 'TXTZ', self.make_txtz(b'a timed out text'))
self.wait_for_fts_to_finish(fts)
check(id=2, book=1, format='TXTZ', err_msg='Extracting text from the TXTZ file of size 132 B took too long')
for w in fts.pool.workers:
w.max_duration = w.__class__.max_duration
cache.close() cache.close()
def test_fts_triggers(self): def test_fts_triggers(self):