mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Refactor the metadata backup architecture
This commit is contained in:
commit
03bea1803f
@ -343,7 +343,7 @@ class ChooseLibraryAction(InterfaceAction):
|
|||||||
db.dirtied(list(db.data.iterallids()))
|
db.dirtied(list(db.data.iterallids()))
|
||||||
info_dialog(self.gui, _('Backup metadata'),
|
info_dialog(self.gui, _('Backup metadata'),
|
||||||
_('Metadata will be backed up while calibre is running, at the '
|
_('Metadata will be backed up while calibre is running, at the '
|
||||||
'rate of approximately 1 book per second.'), show=True)
|
'rate of approximately 1 book every three seconds.'), show=True)
|
||||||
|
|
||||||
def check_library(self):
|
def check_library(self):
|
||||||
db = self.gui.library_view.model().db
|
db = self.gui.library_view.model().db
|
||||||
|
@ -10,7 +10,6 @@ import re, itertools, time, traceback
|
|||||||
from itertools import repeat
|
from itertools import repeat
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
from Queue import Empty
|
|
||||||
|
|
||||||
from calibre.utils.config import tweaks
|
from calibre.utils.config import tweaks
|
||||||
from calibre.utils.date import parse_date, now, UNDEFINED_DATE
|
from calibre.utils.date import parse_date, now, UNDEFINED_DATE
|
||||||
@ -38,7 +37,6 @@ class MetadataBackup(Thread): # {{{
|
|||||||
self.get_metadata_for_dump = FunctionDispatcher(db.get_metadata_for_dump)
|
self.get_metadata_for_dump = FunctionDispatcher(db.get_metadata_for_dump)
|
||||||
self.clear_dirtied = FunctionDispatcher(db.clear_dirtied)
|
self.clear_dirtied = FunctionDispatcher(db.clear_dirtied)
|
||||||
self.set_dirtied = FunctionDispatcher(db.dirtied)
|
self.set_dirtied = FunctionDispatcher(db.dirtied)
|
||||||
self.in_limbo = None
|
|
||||||
|
|
||||||
def stop(self):
|
def stop(self):
|
||||||
self.keep_running = False
|
self.keep_running = False
|
||||||
@ -50,34 +48,33 @@ class MetadataBackup(Thread): # {{{
|
|||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
while self.keep_running:
|
while self.keep_running:
|
||||||
self.in_limbo = None
|
|
||||||
try:
|
try:
|
||||||
time.sleep(0.5) # Limit to two per second
|
time.sleep(2) # Limit to two per second
|
||||||
id_ = self.db.dirtied_queue.get(True, 1.45)
|
(id_, sequence) = self.db.get_a_dirtied_book()
|
||||||
except Empty:
|
if id_ is None:
|
||||||
continue
|
continue
|
||||||
|
# print 'writer thread', id_, sequence
|
||||||
except:
|
except:
|
||||||
# Happens during interpreter shutdown
|
# Happens during interpreter shutdown
|
||||||
break
|
break
|
||||||
if not self.keep_running:
|
if not self.keep_running:
|
||||||
break
|
break
|
||||||
|
|
||||||
self.in_limbo = id_
|
|
||||||
try:
|
try:
|
||||||
path, mi = self.get_metadata_for_dump(id_)
|
path, mi, sequence = self.get_metadata_for_dump(id_)
|
||||||
except:
|
except:
|
||||||
prints('Failed to get backup metadata for id:', id_, 'once')
|
prints('Failed to get backup metadata for id:', id_, 'once')
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
try:
|
try:
|
||||||
path, mi = self.get_metadata_for_dump(id_)
|
path, mi, sequence = self.get_metadata_for_dump(id_)
|
||||||
except:
|
except:
|
||||||
prints('Failed to get backup metadata for id:', id_, 'again, giving up')
|
prints('Failed to get backup metadata for id:', id_, 'again, giving up')
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# at this point the dirty indication is off
|
|
||||||
if mi is None:
|
if mi is None:
|
||||||
|
self.clear_dirtied(id_, sequence)
|
||||||
continue
|
continue
|
||||||
if not self.keep_running:
|
if not self.keep_running:
|
||||||
break
|
break
|
||||||
@ -89,7 +86,6 @@ class MetadataBackup(Thread): # {{{
|
|||||||
try:
|
try:
|
||||||
raw = metadata_to_opf(mi)
|
raw = metadata_to_opf(mi)
|
||||||
except:
|
except:
|
||||||
self.set_dirtied([id_])
|
|
||||||
prints('Failed to convert to opf for id:', id_)
|
prints('Failed to convert to opf for id:', id_)
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
continue
|
continue
|
||||||
@ -106,24 +102,13 @@ class MetadataBackup(Thread): # {{{
|
|||||||
try:
|
try:
|
||||||
self.do_write(path, raw)
|
self.do_write(path, raw)
|
||||||
except:
|
except:
|
||||||
self.set_dirtied([id_])
|
|
||||||
prints('Failed to write backup metadata for id:', id_,
|
prints('Failed to write backup metadata for id:', id_,
|
||||||
'again, giving up')
|
'again, giving up')
|
||||||
continue
|
continue
|
||||||
|
|
||||||
self.in_limbo = None
|
self.clear_dirtied(id_, sequence)
|
||||||
self.flush()
|
|
||||||
self.break_cycles()
|
self.break_cycles()
|
||||||
|
|
||||||
def flush(self):
|
|
||||||
'Used during shutdown to ensure that a dirtied book is not missed'
|
|
||||||
if self.in_limbo is not None:
|
|
||||||
try:
|
|
||||||
self.db.dirtied([self.in_limbo])
|
|
||||||
except:
|
|
||||||
traceback.print_exc()
|
|
||||||
self.in_limbo = None
|
|
||||||
|
|
||||||
def write(self, path, raw):
|
def write(self, path, raw):
|
||||||
with lopen(path, 'wb') as f:
|
with lopen(path, 'wb') as f:
|
||||||
f.write(raw)
|
f.write(raw)
|
||||||
|
@ -7,9 +7,9 @@ __docformat__ = 'restructuredtext en'
|
|||||||
The database used to store ebook metadata
|
The database used to store ebook metadata
|
||||||
'''
|
'''
|
||||||
import os, sys, shutil, cStringIO, glob, time, functools, traceback, re, json
|
import os, sys, shutil, cStringIO, glob, time, functools, traceback, re, json
|
||||||
|
import threading, random
|
||||||
from itertools import repeat
|
from itertools import repeat
|
||||||
from math import ceil
|
from math import ceil
|
||||||
from Queue import Queue
|
|
||||||
|
|
||||||
from PyQt4.QtGui import QImage
|
from PyQt4.QtGui import QImage
|
||||||
|
|
||||||
@ -117,7 +117,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
def __init__(self, library_path, row_factory=False, default_prefs=None,
|
def __init__(self, library_path, row_factory=False, default_prefs=None,
|
||||||
read_only=False):
|
read_only=False):
|
||||||
self.field_metadata = FieldMetadata()
|
self.field_metadata = FieldMetadata()
|
||||||
self.dirtied_queue = Queue()
|
# Create the lock to be used to guard access to the metadata writer
|
||||||
|
# queues. This must be an RLock, not a Lock
|
||||||
|
self.dirtied_lock = threading.RLock()
|
||||||
if not os.path.exists(library_path):
|
if not os.path.exists(library_path):
|
||||||
os.makedirs(library_path)
|
os.makedirs(library_path)
|
||||||
self.listeners = set([])
|
self.listeners = set([])
|
||||||
@ -376,9 +378,12 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
loc=self.FIELD_MAP['sort']))
|
loc=self.FIELD_MAP['sort']))
|
||||||
|
|
||||||
d = self.conn.get('SELECT book FROM metadata_dirtied', all=True)
|
d = self.conn.get('SELECT book FROM metadata_dirtied', all=True)
|
||||||
|
with self.dirtied_lock:
|
||||||
|
self.dirtied_sequence = 0
|
||||||
|
self.dirtied_cache = {}
|
||||||
for x in d:
|
for x in d:
|
||||||
self.dirtied_queue.put(x[0])
|
self.dirtied_cache[x[0]] = self.dirtied_sequence
|
||||||
self.dirtied_cache = set([x[0] for x in d])
|
self.dirtied_sequence += 1
|
||||||
|
|
||||||
self.refresh_ondevice = functools.partial(self.data.refresh_ondevice, self)
|
self.refresh_ondevice = functools.partial(self.data.refresh_ondevice, self)
|
||||||
self.refresh()
|
self.refresh()
|
||||||
@ -605,21 +610,27 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
def metadata_for_field(self, key):
|
def metadata_for_field(self, key):
|
||||||
return self.field_metadata[key]
|
return self.field_metadata[key]
|
||||||
|
|
||||||
def clear_dirtied(self, book_ids):
|
def clear_dirtied(self, book_id, sequence):
|
||||||
'''
|
'''
|
||||||
Clear the dirtied indicator for the books. This is used when fetching
|
Clear the dirtied indicator for the books. This is used when fetching
|
||||||
metadata, creating an OPF, and writing a file are separated into steps.
|
metadata, creating an OPF, and writing a file are separated into steps.
|
||||||
The last step is clearing the indicator
|
The last step is clearing the indicator
|
||||||
'''
|
'''
|
||||||
for book_id in book_ids:
|
with self.dirtied_lock:
|
||||||
|
dc_sequence = self.dirtied_cache.get(book_id, None)
|
||||||
|
# print 'clear_dirty: check book', book_id, dc_sequence
|
||||||
|
if dc_sequence is None or sequence is None or dc_sequence == sequence:
|
||||||
|
# print 'needs to be cleaned'
|
||||||
self.conn.execute('DELETE FROM metadata_dirtied WHERE book=?',
|
self.conn.execute('DELETE FROM metadata_dirtied WHERE book=?',
|
||||||
(book_id,))
|
(book_id,))
|
||||||
# if a later exception prevents the commit, then the dirtied
|
|
||||||
# table will still have the book. No big deal, because the OPF
|
|
||||||
# is there and correct. We will simply do it again on next
|
|
||||||
# start
|
|
||||||
self.dirtied_cache.discard(book_id)
|
|
||||||
self.conn.commit()
|
self.conn.commit()
|
||||||
|
try:
|
||||||
|
del self.dirtied_cache[book_id]
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
elif dc_sequence is not None:
|
||||||
|
# print 'book needs to be done again'
|
||||||
|
pass
|
||||||
|
|
||||||
def dump_metadata(self, book_ids=None, remove_from_dirtied=True,
|
def dump_metadata(self, book_ids=None, remove_from_dirtied=True,
|
||||||
commit=True):
|
commit=True):
|
||||||
@ -632,38 +643,59 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
for book_id in book_ids:
|
for book_id in book_ids:
|
||||||
if not self.data.has_id(book_id):
|
if not self.data.has_id(book_id):
|
||||||
continue
|
continue
|
||||||
path, mi = self.get_metadata_for_dump(book_id,
|
path, mi, sequence = self.get_metadata_for_dump(book_id)
|
||||||
remove_from_dirtied=remove_from_dirtied)
|
|
||||||
if path is None:
|
if path is None:
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
raw = metadata_to_opf(mi)
|
raw = metadata_to_opf(mi)
|
||||||
with lopen(path, 'wb') as f:
|
with lopen(path, 'wb') as f:
|
||||||
f.write(raw)
|
f.write(raw)
|
||||||
|
if remove_from_dirtied:
|
||||||
|
self.clear_dirtied(book_id, sequence)
|
||||||
except:
|
except:
|
||||||
# Something went wrong. Put the book back on the dirty list
|
pass
|
||||||
self.dirtied([book_id])
|
|
||||||
if commit:
|
if commit:
|
||||||
self.conn.commit()
|
self.conn.commit()
|
||||||
|
|
||||||
def dirtied(self, book_ids, commit=True):
|
def dirtied(self, book_ids, commit=True):
|
||||||
for book in frozenset(book_ids) - self.dirtied_cache:
|
changed = False
|
||||||
|
for book in book_ids:
|
||||||
|
with self.dirtied_lock:
|
||||||
|
# print 'dirtied: check id', book
|
||||||
|
if book in self.dirtied_cache:
|
||||||
|
self.dirtied_cache[book] = self.dirtied_sequence
|
||||||
|
self.dirtied_sequence += 1
|
||||||
|
continue
|
||||||
|
# print 'book not already dirty'
|
||||||
try:
|
try:
|
||||||
self.conn.execute(
|
self.conn.execute(
|
||||||
'INSERT INTO metadata_dirtied (book) VALUES (?)',
|
'INSERT INTO metadata_dirtied (book) VALUES (?)',
|
||||||
(book,))
|
(book,))
|
||||||
self.dirtied_queue.put(book)
|
changed = True
|
||||||
except IntegrityError:
|
except IntegrityError:
|
||||||
# Already in table
|
# Already in table
|
||||||
pass
|
pass
|
||||||
# If the commit doesn't happen, then our cache will be wrong. This
|
self.dirtied_cache[book] = self.dirtied_sequence
|
||||||
# could lead to a problem because we won't put the book back into
|
self.dirtied_sequence += 1
|
||||||
# the dirtied table. We deal with this by writing the dirty cache
|
# If the commit doesn't happen, then the DB table will be wrong. This
|
||||||
|
# could lead to a problem because on restart, we won't put the book back
|
||||||
|
# into the dirtied_cache. We deal with this by writing the dirtied_cache
|
||||||
# back to the table on GUI exit. Not perfect, but probably OK
|
# back to the table on GUI exit. Not perfect, but probably OK
|
||||||
self.dirtied_cache.add(book)
|
if commit and changed:
|
||||||
if commit:
|
|
||||||
self.conn.commit()
|
self.conn.commit()
|
||||||
|
|
||||||
|
def get_a_dirtied_book(self):
|
||||||
|
with self.dirtied_lock:
|
||||||
|
l = len(self.dirtied_cache)
|
||||||
|
if l > 0:
|
||||||
|
# The random stuff is here to prevent a single book from
|
||||||
|
# blocking progress if its metadata cannot be written for some
|
||||||
|
# reason.
|
||||||
|
id_ = self.dirtied_cache.keys()[random.randint(0, l-1)]
|
||||||
|
sequence = self.dirtied_cache[id_]
|
||||||
|
return (id_, sequence)
|
||||||
|
return (None, None)
|
||||||
|
|
||||||
def dirty_queue_length(self):
|
def dirty_queue_length(self):
|
||||||
return len(self.dirtied_cache)
|
return len(self.dirtied_cache)
|
||||||
|
|
||||||
@ -676,12 +708,19 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
is no problem with setting a dirty indication for a book that isn't in
|
is no problem with setting a dirty indication for a book that isn't in
|
||||||
fact dirty. Just wastes a few cycles.
|
fact dirty. Just wastes a few cycles.
|
||||||
'''
|
'''
|
||||||
book_ids = list(self.dirtied_cache)
|
with self.dirtied_lock:
|
||||||
self.dirtied_cache = set()
|
book_ids = list(self.dirtied_cache.keys())
|
||||||
|
self.dirtied_cache = {}
|
||||||
self.dirtied(book_ids)
|
self.dirtied(book_ids)
|
||||||
|
|
||||||
def get_metadata_for_dump(self, idx, remove_from_dirtied=True):
|
def get_metadata_for_dump(self, idx):
|
||||||
path, mi = (None, None)
|
path, mi = (None, None)
|
||||||
|
# get the current sequence number for this book to pass back to the
|
||||||
|
# backup thread. This will avoid double calls in the case where the
|
||||||
|
# thread has not done the work between the put and the get_metadata
|
||||||
|
with self.dirtied_lock:
|
||||||
|
sequence = self.dirtied_cache.get(idx, None)
|
||||||
|
# print 'get_md_for_dump', idx, sequence
|
||||||
try:
|
try:
|
||||||
# While a book is being created, the path is empty. Don't bother to
|
# While a book is being created, the path is empty. Don't bother to
|
||||||
# try to write the opf, because it will go to the wrong folder.
|
# try to write the opf, because it will go to the wrong folder.
|
||||||
@ -696,16 +735,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
# This almost certainly means that the book has been deleted while
|
# This almost certainly means that the book has been deleted while
|
||||||
# the backup operation sat in the queue.
|
# the backup operation sat in the queue.
|
||||||
pass
|
pass
|
||||||
|
return (path, mi, sequence)
|
||||||
try:
|
|
||||||
# clear the dirtied indicator. The user must put it back if
|
|
||||||
# something goes wrong with writing the OPF
|
|
||||||
if remove_from_dirtied:
|
|
||||||
self.clear_dirtied([idx])
|
|
||||||
except:
|
|
||||||
# No real problem. We will just do it again.
|
|
||||||
pass
|
|
||||||
return (path, mi)
|
|
||||||
|
|
||||||
def get_metadata(self, idx, index_is_id=False, get_cover=False):
|
def get_metadata(self, idx, index_is_id=False, get_cover=False):
|
||||||
'''
|
'''
|
||||||
|
Loading…
x
Reference in New Issue
Block a user