mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Track indexing rate in the Cache object itself
This commit is contained in:
parent
87d51c1802
commit
119df51028
@ -973,8 +973,8 @@ class DB:
|
|||||||
def remove_dirty_fts(self, book_id, fmt):
|
def remove_dirty_fts(self, book_id, fmt):
|
||||||
return self.fts.remove_dirty(book_id, fmt)
|
return self.fts.remove_dirty(book_id, fmt)
|
||||||
|
|
||||||
def queue_fts_job(self, book_id, fmt, path, fmt_size, fmt_hash):
|
def queue_fts_job(self, book_id, fmt, path, fmt_size, fmt_hash, start_time):
|
||||||
return self.fts.queue_job(book_id, fmt, path, fmt_size, fmt_hash)
|
return self.fts.queue_job(book_id, fmt, path, fmt_size, fmt_hash, start_time)
|
||||||
|
|
||||||
def commit_fts_result(self, book_id, fmt, fmt_size, fmt_hash, text, err_msg):
|
def commit_fts_result(self, book_id, fmt, fmt_size, fmt_hash, text, err_msg):
|
||||||
if self.fts is not None:
|
if self.fts is not None:
|
||||||
|
@ -19,9 +19,9 @@ from functools import partial, wraps
|
|||||||
from io import DEFAULT_BUFFER_SIZE, BytesIO
|
from io import DEFAULT_BUFFER_SIZE, BytesIO
|
||||||
from queue import Queue
|
from queue import Queue
|
||||||
from threading import Lock
|
from threading import Lock
|
||||||
from time import sleep, time
|
from time import monotonic, sleep, time
|
||||||
|
|
||||||
from calibre import as_unicode, isbytestring, detect_ncpus
|
from calibre import as_unicode, detect_ncpus, isbytestring
|
||||||
from calibre.constants import iswindows, preferred_encoding
|
from calibre.constants import iswindows, preferred_encoding
|
||||||
from calibre.customize.ui import (
|
from calibre.customize.ui import (
|
||||||
run_plugins_on_import, run_plugins_on_postadd, run_plugins_on_postimport
|
run_plugins_on_import, run_plugins_on_postadd, run_plugins_on_postimport
|
||||||
@ -437,6 +437,8 @@ class Cache:
|
|||||||
# FTS API {{{
|
# FTS API {{{
|
||||||
def initialize_fts(self):
|
def initialize_fts(self):
|
||||||
self.fts_queue_thread = None
|
self.fts_queue_thread = None
|
||||||
|
self.fts_measuring_rate = None
|
||||||
|
self.fts_num_done_since_start = 0
|
||||||
self.fts_job_queue = Queue()
|
self.fts_job_queue = Queue()
|
||||||
self.fts_indexing_left = self.fts_indexing_total = 0
|
self.fts_indexing_left = self.fts_indexing_total = 0
|
||||||
fts = self.backend.initialize_fts(weakref.ref(self))
|
fts = self.backend.initialize_fts(weakref.ref(self))
|
||||||
@ -445,7 +447,7 @@ class Cache:
|
|||||||
return fts
|
return fts
|
||||||
|
|
||||||
def start_fts_pool(self):
|
def start_fts_pool(self):
|
||||||
from threading import Thread, Event
|
from threading import Event, Thread
|
||||||
self.fts_dispatch_stop_event = Event()
|
self.fts_dispatch_stop_event = Event()
|
||||||
self.fts_queue_thread = Thread(name='FTSQueue', target=Cache.dispatch_fts_jobs, args=(
|
self.fts_queue_thread = Thread(name='FTSQueue', target=Cache.dispatch_fts_jobs, args=(
|
||||||
self.fts_job_queue, self.fts_dispatch_stop_event, weakref.ref(self)), daemon=True)
|
self.fts_job_queue, self.fts_dispatch_stop_event, weakref.ref(self)), daemon=True)
|
||||||
@ -458,20 +460,32 @@ class Cache:
|
|||||||
def is_fts_enabled(self):
|
def is_fts_enabled(self):
|
||||||
return self.backend.fts_enabled
|
return self.backend.fts_enabled
|
||||||
|
|
||||||
@api
|
@write_api
|
||||||
def fts_indexing_progress(self):
|
def fts_start_measuring_rate(self, measure=True):
|
||||||
return self.fts_indexing_left, self.fts_indexing_total
|
self.fts_measuring_rate = monotonic() if measure else None
|
||||||
|
self.fts_num_done_since_start = 0
|
||||||
|
|
||||||
def _update_fts_indexing_numbers(self):
|
def _update_fts_indexing_numbers(self, job_time=None):
|
||||||
# this is called when new formats are added and when a format is
|
# this is called when new formats are added and when a format is
|
||||||
# indexed, but NOT when books or formats are deleted, so total may not
|
# indexed, but NOT when books or formats are deleted, so total may not
|
||||||
# be up to date.
|
# be up to date.
|
||||||
nl = self.backend.fts.number_dirtied()
|
nl = self.backend.fts.number_dirtied()
|
||||||
nt = self.backend.get('SELECT COUNT(*) FROM main.data')[0][0] or 0
|
nt = self.backend.get('SELECT COUNT(*) FROM main.data')[0][0] or 0
|
||||||
if (self.fts_indexing_left, self.fts_indexing_total) != (nl, nt):
|
if not nl:
|
||||||
|
self._fts_start_measuring_rate(measure=False)
|
||||||
|
if job_time is not None and self.fts_measuring_rate is not None:
|
||||||
|
self.fts_num_done_since_start += 1
|
||||||
|
if (self.fts_indexing_left, self.fts_indexing_total) != (nl, nt) or job_time is not None:
|
||||||
self.fts_indexing_left = nl
|
self.fts_indexing_left = nl
|
||||||
self.fts_indexing_total = nt
|
self.fts_indexing_total = nt
|
||||||
self.event_dispatcher(EventType.indexing_progress_changed, nl, nt)
|
self.event_dispatcher(EventType.indexing_progress_changed, *self._fts_indexing_progress())
|
||||||
|
|
||||||
|
@read_api
|
||||||
|
def fts_indexing_progress(self):
|
||||||
|
rate = None
|
||||||
|
if self.fts_measuring_rate is not None and self.fts_num_done_since_start > 4:
|
||||||
|
rate = self.fts_num_done_since_start / (monotonic() - self.fts_measuring_rate)
|
||||||
|
return self.fts_indexing_left, self.fts_indexing_total, rate
|
||||||
|
|
||||||
@write_api
|
@write_api
|
||||||
def enable_fts(self, enabled=True, start_pool=True):
|
def enable_fts(self, enabled=True, start_pool=True):
|
||||||
@ -498,6 +512,7 @@ class Cache:
|
|||||||
self = dbref()
|
self = dbref()
|
||||||
if self is None:
|
if self is None:
|
||||||
return False
|
return False
|
||||||
|
start_time = monotonic()
|
||||||
with self.read_lock:
|
with self.read_lock:
|
||||||
if not self.backend.fts_enabled:
|
if not self.backend.fts_enabled:
|
||||||
return False
|
return False
|
||||||
@ -522,9 +537,9 @@ class Cache:
|
|||||||
h.update(chunk)
|
h.update(chunk)
|
||||||
pt.write(chunk)
|
pt.write(chunk)
|
||||||
with self.write_lock:
|
with self.write_lock:
|
||||||
queued = self.backend.queue_fts_job(book_id, fmt, pt.name, sz, h.hexdigest())
|
queued = self.backend.queue_fts_job(book_id, fmt, pt.name, sz, h.hexdigest(), start_time)
|
||||||
if not queued: # means a dirtied book was removed from the dirty list because the text has not changed
|
if not queued: # means a dirtied book was removed from the dirty list because the text has not changed
|
||||||
self._update_fts_indexing_numbers()
|
self._update_fts_indexing_numbers(monotonic() - start_time)
|
||||||
return self.backend.fts_has_idle_workers
|
return self.backend.fts_has_idle_workers
|
||||||
|
|
||||||
def loop_while_more_available():
|
def loop_while_more_available():
|
||||||
@ -555,9 +570,9 @@ class Cache:
|
|||||||
self._update_fts_indexing_numbers()
|
self._update_fts_indexing_numbers()
|
||||||
|
|
||||||
@write_api
|
@write_api
|
||||||
def commit_fts_result(self, book_id, fmt, fmt_size, fmt_hash, text, err_msg):
|
def commit_fts_result(self, book_id, fmt, fmt_size, fmt_hash, text, err_msg, start_time):
|
||||||
ans = self.backend.commit_fts_result(book_id, fmt, fmt_size, fmt_hash, text, err_msg)
|
ans = self.backend.commit_fts_result(book_id, fmt, fmt_size, fmt_hash, text, err_msg)
|
||||||
self._update_fts_indexing_numbers()
|
self._update_fts_indexing_numbers(monotonic() - start_time)
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
@write_api
|
@write_api
|
||||||
@ -583,23 +598,29 @@ class Cache:
|
|||||||
self._queue_next_fts_job()
|
self._queue_next_fts_job()
|
||||||
return fts
|
return fts
|
||||||
|
|
||||||
@api
|
@write_api
|
||||||
def set_fts_num_of_workers(self, num=None):
|
def set_fts_num_of_workers(self, num):
|
||||||
existing = self.backend.fts_num_of_workers
|
existing = self.backend.fts_num_of_workers
|
||||||
if num is not None:
|
if num != existing:
|
||||||
self.backend.fts_num_of_workers = num
|
self.backend.fts_num_of_workers = num
|
||||||
if num > existing:
|
if num > existing:
|
||||||
self.queue_next_fts_job()
|
self._queue_next_fts_job()
|
||||||
return existing
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
@api
|
@write_api
|
||||||
def set_fts_speed(self, slow=True):
|
def set_fts_speed(self, slow=True):
|
||||||
|
orig = self.fts_indexing_sleep_time
|
||||||
if slow:
|
if slow:
|
||||||
self.fts_indexing_sleep_time = Cache.fts_indexing_sleep_time
|
self.fts_indexing_sleep_time = Cache.fts_indexing_sleep_time
|
||||||
self.set_fts_num_of_workers(1)
|
changed = self._set_fts_num_of_workers(1)
|
||||||
else:
|
else:
|
||||||
self.fts_indexing_sleep_time = 0.1
|
self.fts_indexing_sleep_time = 0.1
|
||||||
self.set_fts_num_of_workers(max(1, detect_ncpus()))
|
changed = self._set_fts_num_of_workers(max(1, detect_ncpus()))
|
||||||
|
changed = changed or orig != self.fts_indexing_sleep_time
|
||||||
|
if changed and self.fts_measuring_rate is not None:
|
||||||
|
self._fts_start_measuring_rate()
|
||||||
|
return changed
|
||||||
|
|
||||||
@read_api
|
@read_api
|
||||||
def fts_search(
|
def fts_search(
|
||||||
@ -967,7 +988,7 @@ class Cache:
|
|||||||
return
|
return
|
||||||
ret = pt.name
|
ret = pt.name
|
||||||
elif as_pixmap or as_image:
|
elif as_pixmap or as_image:
|
||||||
from qt.core import QPixmap, QImage
|
from qt.core import QImage, QPixmap
|
||||||
ret = QImage() if as_image else QPixmap()
|
ret = QImage() if as_image else QPixmap()
|
||||||
with self.safe_read_lock:
|
with self.safe_read_lock:
|
||||||
path = self._format_abspath(book_id, '__COVER_INTERNAL__')
|
path = self._format_abspath(book_id, '__COVER_INTERNAL__')
|
||||||
|
@ -133,14 +133,14 @@ class FTS:
|
|||||||
break
|
break
|
||||||
self.add_text(book_id, fmt, text, text_hash, fmt_size, fmt_hash, err_msg)
|
self.add_text(book_id, fmt, text, text_hash, fmt_size, fmt_hash, err_msg)
|
||||||
|
|
||||||
def queue_job(self, book_id, fmt, path, fmt_size, fmt_hash):
|
def queue_job(self, book_id, fmt, path, fmt_size, fmt_hash, start_time):
|
||||||
conn = self.get_connection()
|
conn = self.get_connection()
|
||||||
fmt = fmt.upper()
|
fmt = fmt.upper()
|
||||||
for x in conn.get('SELECT id FROM fts_db.books_text WHERE book=? AND format=? AND format_size=? AND format_hash=?', (
|
for x in conn.get('SELECT id FROM fts_db.books_text WHERE book=? AND format=? AND format_size=? AND format_hash=?', (
|
||||||
book_id, fmt, fmt_size, fmt_hash)):
|
book_id, fmt, fmt_size, fmt_hash)):
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
self.pool.add_job(book_id, fmt, path, fmt_size, fmt_hash)
|
self.pool.add_job(book_id, fmt, path, fmt_size, fmt_hash, start_time)
|
||||||
conn.execute('UPDATE fts_db.dirtied_formats SET in_progress=TRUE WHERE book=? AND format=?', (book_id, fmt))
|
conn.execute('UPDATE fts_db.dirtied_formats SET in_progress=TRUE WHERE book=? AND format=?', (book_id, fmt))
|
||||||
return True
|
return True
|
||||||
self.remove_dirty(book_id, fmt)
|
self.remove_dirty(book_id, fmt)
|
||||||
|
@ -21,12 +21,13 @@ quit = object()
|
|||||||
|
|
||||||
class Job:
|
class Job:
|
||||||
|
|
||||||
def __init__(self, book_id, fmt, path, fmt_size, fmt_hash):
|
def __init__(self, book_id, fmt, path, fmt_size, fmt_hash, start_time):
|
||||||
self.book_id = book_id
|
self.book_id = book_id
|
||||||
self.fmt = fmt
|
self.fmt = fmt
|
||||||
self.fmt_size = fmt_size
|
self.fmt_size = fmt_size
|
||||||
self.fmt_hash = fmt_hash
|
self.fmt_hash = fmt_hash
|
||||||
self.path = path
|
self.path = path
|
||||||
|
self.start_time = start_time
|
||||||
|
|
||||||
|
|
||||||
class Result:
|
class Result:
|
||||||
@ -37,6 +38,7 @@ class Result:
|
|||||||
self.fmt_size = job.fmt_size
|
self.fmt_size = job.fmt_size
|
||||||
self.fmt_hash = job.fmt_hash
|
self.fmt_hash = job.fmt_hash
|
||||||
self.ok = not bool(err_msg)
|
self.ok = not bool(err_msg)
|
||||||
|
self.start_time = job.start_time
|
||||||
if self.ok:
|
if self.ok:
|
||||||
with open(job.path + '.txt', 'rb') as src:
|
with open(job.path + '.txt', 'rb') as src:
|
||||||
try:
|
try:
|
||||||
@ -177,9 +179,9 @@ class Pool:
|
|||||||
self.initialize()
|
self.initialize()
|
||||||
self.supervise_queue.put(check_for_work)
|
self.supervise_queue.put(check_for_work)
|
||||||
|
|
||||||
def add_job(self, book_id, fmt, path, fmt_size, fmt_hash):
|
def add_job(self, book_id, fmt, path, fmt_size, fmt_hash, start_time):
|
||||||
self.initialize()
|
self.initialize()
|
||||||
job = Job(book_id, fmt, path, fmt_size, fmt_hash)
|
job = Job(book_id, fmt, path, fmt_size, fmt_hash, start_time)
|
||||||
self.jobs_queue.put(job)
|
self.jobs_queue.put(job)
|
||||||
|
|
||||||
def commit_result(self, result):
|
def commit_result(self, result):
|
||||||
@ -192,7 +194,7 @@ class Pool:
|
|||||||
text = ''
|
text = ''
|
||||||
db = self.dbref()
|
db = self.dbref()
|
||||||
if db is not None:
|
if db is not None:
|
||||||
db.commit_fts_result(result.book_id, result.fmt, result.fmt_size, result.fmt_hash, text, err_msg)
|
db.commit_fts_result(result.book_id, result.fmt, result.fmt_size, result.fmt_hash, text, err_msg, result.start_time)
|
||||||
|
|
||||||
def shutdown(self):
|
def shutdown(self):
|
||||||
if self.initialized.is_set():
|
if self.initialized.is_set():
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
import os, errno, sys, re, time
|
import os, errno, sys, re
|
||||||
from locale import localeconv
|
from locale import localeconv
|
||||||
from collections import OrderedDict, namedtuple
|
from collections import OrderedDict, namedtuple
|
||||||
from polyglot.builtins import iteritems, itervalues, string_or_bytes
|
from polyglot.builtins import iteritems, itervalues, string_or_bytes
|
||||||
@ -458,24 +458,15 @@ class IndexingProgress:
|
|||||||
self.reset()
|
self.reset()
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return f'IndexingProgress(left={self.left}, total={self.total})'
|
return f'IndexingProgress(left={self.left}, total={self.total}, rate={self.indexing_rate})'
|
||||||
|
|
||||||
def clear_rate_information(self):
|
|
||||||
from collections import deque
|
|
||||||
self.done_events = deque()
|
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
self.left = self.total = -1
|
self.left = self.total = -1
|
||||||
self.clear_rate_information()
|
self.indexing_rate = None
|
||||||
|
|
||||||
def update(self, left, total):
|
def update(self, left, total, indexing_rate):
|
||||||
changed = (left, total) != (self.left, self.total)
|
changed = (left, total, indexing_rate) != (self.left, self.total, self.indexing_rate)
|
||||||
if changed:
|
self.indexing_rate = indexing_rate
|
||||||
done_num = self.left - left
|
|
||||||
if done_num > 0 and self.left > -1: # initial event will have self.left == -1
|
|
||||||
self.done_events.append((done_num, time.monotonic()))
|
|
||||||
if len(self.done_events) > 50:
|
|
||||||
self.done_events.popleft()
|
|
||||||
self.left, self.total = left, total
|
self.left, self.total = left, total
|
||||||
return changed
|
return changed
|
||||||
|
|
||||||
@ -493,14 +484,10 @@ class IndexingProgress:
|
|||||||
return _('calculating time left')
|
return _('calculating time left')
|
||||||
if self.left < 2:
|
if self.left < 2:
|
||||||
return _('almost done')
|
return _('almost done')
|
||||||
if len(self.done_events) < 5:
|
if self.indexing_rate is None:
|
||||||
return _('calculating time left')
|
return _('calculating time left')
|
||||||
try:
|
try:
|
||||||
start_time = self.done_events[0][1]
|
seconds_left = self.left / self.indexing_rate
|
||||||
end_time = self.done_events[-1][1]
|
|
||||||
num_done = sum(x[0] for x in self.done_events) - self.done_events[0][0]
|
|
||||||
rate = num_done / max(0.1, end_time - start_time)
|
|
||||||
seconds_left = self.left / rate
|
|
||||||
return _('~{} left').format(human_readable_interval(seconds_left))
|
return _('~{} left').format(human_readable_interval(seconds_left))
|
||||||
except Exception:
|
except Exception:
|
||||||
return _('calculating time left')
|
return _('calculating time left')
|
||||||
|
@ -70,7 +70,6 @@ class ScanProgress(QWidget):
|
|||||||
def change_speed(self):
|
def change_speed(self):
|
||||||
db = get_db()
|
db = get_db()
|
||||||
db.set_fts_speed(slow=not self.fast_button.isChecked())
|
db.set_fts_speed(slow=not self.fast_button.isChecked())
|
||||||
self.indexing_progress.clear_rate_information()
|
|
||||||
|
|
||||||
def update(self, complete, left, total):
|
def update(self, complete, left, total):
|
||||||
if complete:
|
if complete:
|
||||||
@ -130,14 +129,15 @@ class ScanStatus(QWidget):
|
|||||||
|
|
||||||
def gui_update_event(self, db, event_type, event_data):
|
def gui_update_event(self, db, event_type, event_data):
|
||||||
if event_type is EventType.indexing_progress_changed:
|
if event_type is EventType.indexing_progress_changed:
|
||||||
self.update_stats()
|
self.update_stats(event_data)
|
||||||
|
|
||||||
def __call__(self, event_type, library_id, event_data):
|
def __call__(self, event_type, library_id, event_data):
|
||||||
if event_type is EventType.indexing_progress_changed:
|
if event_type is EventType.indexing_progress_changed:
|
||||||
self.update_stats()
|
self.update_stats(event_data)
|
||||||
|
|
||||||
def update_stats(self):
|
def update_stats(self, event_data=None):
|
||||||
changed = self.indexing_progress.update(*self.db.fts_indexing_progress())
|
event_data = event_data or self.db.fts_indexing_progress()
|
||||||
|
changed = self.indexing_progress.update(*event_data)
|
||||||
if changed:
|
if changed:
|
||||||
self.indexing_progress_changed.emit(self.indexing_progress.complete, self.indexing_progress.left, self.indexing_progress.total)
|
self.indexing_progress_changed.emit(self.indexing_progress.complete, self.indexing_progress.left, self.indexing_progress.total)
|
||||||
|
|
||||||
@ -194,12 +194,12 @@ class ScanStatus(QWidget):
|
|||||||
self.apply_fts_state()
|
self.apply_fts_state()
|
||||||
|
|
||||||
def startup(self):
|
def startup(self):
|
||||||
self.indexing_progress.clear_rate_information()
|
self.db.fts_start_measuring_rate(measure=True)
|
||||||
|
|
||||||
def shutdown(self):
|
def shutdown(self):
|
||||||
self.scan_progress.slow_button.setChecked(True)
|
self.scan_progress.slow_button.setChecked(True)
|
||||||
self.reset_indexing_state_for_current_db()
|
self.reset_indexing_state_for_current_db()
|
||||||
self.indexing_progress.clear_rate_information()
|
self.db.fts_start_measuring_rate(measure=False)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
Loading…
x
Reference in New Issue
Block a user