diff --git a/src/calibre/gui2/actions/choose_library.py b/src/calibre/gui2/actions/choose_library.py index 001970f9db..d45843995e 100644 --- a/src/calibre/gui2/actions/choose_library.py +++ b/src/calibre/gui2/actions/choose_library.py @@ -343,7 +343,7 @@ class ChooseLibraryAction(InterfaceAction): db.dirtied(list(db.data.iterallids())) info_dialog(self.gui, _('Backup metadata'), _('Metadata will be backed up while calibre is running, at the ' - 'rate of approximately 1 book per second.'), show=True) + 'rate of approximately 1 book every three seconds.'), show=True) def check_library(self): db = self.gui.library_view.model().db diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py index 55045e7f98..67529397b3 100644 --- a/src/calibre/library/caches.py +++ b/src/calibre/library/caches.py @@ -10,7 +10,6 @@ import re, itertools, time, traceback from itertools import repeat from datetime import timedelta from threading import Thread -from Queue import Empty from calibre.utils.config import tweaks from calibre.utils.date import parse_date, now, UNDEFINED_DATE @@ -38,7 +37,6 @@ class MetadataBackup(Thread): # {{{ self.get_metadata_for_dump = FunctionDispatcher(db.get_metadata_for_dump) self.clear_dirtied = FunctionDispatcher(db.clear_dirtied) self.set_dirtied = FunctionDispatcher(db.dirtied) - self.in_limbo = None def stop(self): self.keep_running = False @@ -50,34 +48,33 @@ class MetadataBackup(Thread): # {{{ def run(self): while self.keep_running: - self.in_limbo = None try: - time.sleep(0.5) # Limit to two per second - id_ = self.db.dirtied_queue.get(True, 1.45) - except Empty: - continue + time.sleep(2) # Limit to two per second + (id_, sequence) = self.db.get_a_dirtied_book() + if id_ is None: + continue + # print 'writer thread', id_, sequence except: # Happens during interpreter shutdown break if not self.keep_running: break - self.in_limbo = id_ try: - path, mi = self.get_metadata_for_dump(id_) + path, mi, sequence = self.get_metadata_for_dump(id_) except: prints('Failed to get backup metadata for id:', id_, 'once') traceback.print_exc() time.sleep(2) try: - path, mi = self.get_metadata_for_dump(id_) + path, mi, sequence = self.get_metadata_for_dump(id_) except: prints('Failed to get backup metadata for id:', id_, 'again, giving up') traceback.print_exc() continue - # at this point the dirty indication is off if mi is None: + self.clear_dirtied(id_, sequence) continue if not self.keep_running: break @@ -89,7 +86,6 @@ class MetadataBackup(Thread): # {{{ try: raw = metadata_to_opf(mi) except: - self.set_dirtied([id_]) prints('Failed to convert to opf for id:', id_) traceback.print_exc() continue @@ -106,24 +102,13 @@ class MetadataBackup(Thread): # {{{ try: self.do_write(path, raw) except: - self.set_dirtied([id_]) prints('Failed to write backup metadata for id:', id_, 'again, giving up') continue - self.in_limbo = None - self.flush() + self.clear_dirtied(id_, sequence) self.break_cycles() - def flush(self): - 'Used during shutdown to ensure that a dirtied book is not missed' - if self.in_limbo is not None: - try: - self.db.dirtied([self.in_limbo]) - except: - traceback.print_exc() - self.in_limbo = None - def write(self, path, raw): with lopen(path, 'wb') as f: f.write(raw) diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index dbdeabf3d4..e1a1adc4ff 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -7,9 +7,9 @@ __docformat__ = 'restructuredtext en' The database used to store ebook metadata ''' import os, sys, shutil, cStringIO, glob, time, functools, traceback, re, json +import threading, random from itertools import repeat from math import ceil -from Queue import Queue from PyQt4.QtGui import QImage @@ -117,7 +117,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): def __init__(self, library_path, row_factory=False, default_prefs=None, read_only=False): self.field_metadata = FieldMetadata() - self.dirtied_queue = Queue() + # Create the lock to be used to guard access to the metadata writer + # queues. This must be an RLock, not a Lock + self.dirtied_lock = threading.RLock() if not os.path.exists(library_path): os.makedirs(library_path) self.listeners = set([]) @@ -376,9 +378,12 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): loc=self.FIELD_MAP['sort'])) d = self.conn.get('SELECT book FROM metadata_dirtied', all=True) - for x in d: - self.dirtied_queue.put(x[0]) - self.dirtied_cache = set([x[0] for x in d]) + with self.dirtied_lock: + self.dirtied_sequence = 0 + self.dirtied_cache = {} + for x in d: + self.dirtied_cache[x[0]] = self.dirtied_sequence + self.dirtied_sequence += 1 self.refresh_ondevice = functools.partial(self.data.refresh_ondevice, self) self.refresh() @@ -605,21 +610,27 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): def metadata_for_field(self, key): return self.field_metadata[key] - def clear_dirtied(self, book_ids): + def clear_dirtied(self, book_id, sequence): ''' Clear the dirtied indicator for the books. This is used when fetching metadata, creating an OPF, and writing a file are separated into steps. The last step is clearing the indicator ''' - for book_id in book_ids: - self.conn.execute('DELETE FROM metadata_dirtied WHERE book=?', - (book_id,)) - # if a later exception prevents the commit, then the dirtied - # table will still have the book. No big deal, because the OPF - # is there and correct. We will simply do it again on next - # start - self.dirtied_cache.discard(book_id) - self.conn.commit() + with self.dirtied_lock: + dc_sequence = self.dirtied_cache.get(book_id, None) +# print 'clear_dirty: check book', book_id, dc_sequence + if dc_sequence is None or sequence is None or dc_sequence == sequence: +# print 'needs to be cleaned' + self.conn.execute('DELETE FROM metadata_dirtied WHERE book=?', + (book_id,)) + self.conn.commit() + try: + del self.dirtied_cache[book_id] + except: + pass + elif dc_sequence is not None: +# print 'book needs to be done again' + pass def dump_metadata(self, book_ids=None, remove_from_dirtied=True, commit=True): @@ -632,38 +643,59 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): for book_id in book_ids: if not self.data.has_id(book_id): continue - path, mi = self.get_metadata_for_dump(book_id, - remove_from_dirtied=remove_from_dirtied) + path, mi, sequence = self.get_metadata_for_dump(book_id) if path is None: continue try: raw = metadata_to_opf(mi) with lopen(path, 'wb') as f: f.write(raw) + if remove_from_dirtied: + self.clear_dirtied(book_id, sequence) except: - # Something went wrong. Put the book back on the dirty list - self.dirtied([book_id]) + pass if commit: self.conn.commit() def dirtied(self, book_ids, commit=True): - for book in frozenset(book_ids) - self.dirtied_cache: - try: - self.conn.execute( - 'INSERT INTO metadata_dirtied (book) VALUES (?)', - (book,)) - self.dirtied_queue.put(book) - except IntegrityError: - # Already in table - pass - # If the commit doesn't happen, then our cache will be wrong. This - # could lead to a problem because we won't put the book back into - # the dirtied table. We deal with this by writing the dirty cache - # back to the table on GUI exit. Not perfect, but probably OK - self.dirtied_cache.add(book) - if commit: + changed = False + for book in book_ids: + with self.dirtied_lock: +# print 'dirtied: check id', book + if book in self.dirtied_cache: + self.dirtied_cache[book] = self.dirtied_sequence + self.dirtied_sequence += 1 + continue +# print 'book not already dirty' + try: + self.conn.execute( + 'INSERT INTO metadata_dirtied (book) VALUES (?)', + (book,)) + changed = True + except IntegrityError: + # Already in table + pass + self.dirtied_cache[book] = self.dirtied_sequence + self.dirtied_sequence += 1 + # If the commit doesn't happen, then the DB table will be wrong. This + # could lead to a problem because on restart, we won't put the book back + # into the dirtied_cache. We deal with this by writing the dirtied_cache + # back to the table on GUI exit. Not perfect, but probably OK + if commit and changed: self.conn.commit() + def get_a_dirtied_book(self): + with self.dirtied_lock: + l = len(self.dirtied_cache) + if l > 0: + # The random stuff is here to prevent a single book from + # blocking progress if its metadata cannot be written for some + # reason. + id_ = self.dirtied_cache.keys()[random.randint(0, l-1)] + sequence = self.dirtied_cache[id_] + return (id_, sequence) + return (None, None) + def dirty_queue_length(self): return len(self.dirtied_cache) @@ -676,12 +708,19 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): is no problem with setting a dirty indication for a book that isn't in fact dirty. Just wastes a few cycles. ''' - book_ids = list(self.dirtied_cache) - self.dirtied_cache = set() - self.dirtied(book_ids) + with self.dirtied_lock: + book_ids = list(self.dirtied_cache.keys()) + self.dirtied_cache = {} + self.dirtied(book_ids) - def get_metadata_for_dump(self, idx, remove_from_dirtied=True): + def get_metadata_for_dump(self, idx): path, mi = (None, None) + # get the current sequence number for this book to pass back to the + # backup thread. This will avoid double calls in the case where the + # thread has not done the work between the put and the get_metadata + with self.dirtied_lock: + sequence = self.dirtied_cache.get(idx, None) +# print 'get_md_for_dump', idx, sequence try: # While a book is being created, the path is empty. Don't bother to # try to write the opf, because it will go to the wrong folder. @@ -696,16 +735,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): # This almost certainly means that the book has been deleted while # the backup operation sat in the queue. pass - - try: - # clear the dirtied indicator. The user must put it back if - # something goes wrong with writing the OPF - if remove_from_dirtied: - self.clear_dirtied([idx]) - except: - # No real problem. We will just do it again. - pass - return (path, mi) + return (path, mi, sequence) def get_metadata(self, idx, index_is_id=False, get_cover=False): '''