From 02ce96cd688d14ba0b26bd4448d71d22ac704119 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 25 Sep 2010 20:46:45 -0600 Subject: [PATCH] Throttle OPF writer thread some more and framework for restore from OPFs --- src/calibre/library/caches.py | 2 +- src/calibre/library/database2.py | 49 ++++--- src/calibre/library/restore.py | 190 +++++++++++++++++++++++++ src/calibre/utils/pyconsole/console.py | 2 +- 4 files changed, 223 insertions(+), 20 deletions(-) create mode 100644 src/calibre/library/restore.py diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py index 1e52350e46..235584b9f7 100644 --- a/src/calibre/library/caches.py +++ b/src/calibre/library/caches.py @@ -48,7 +48,7 @@ class MetadataBackup(Thread): # {{{ time.sleep(2) if not self.dump_func([id_]): prints('Failed to backup metadata for id:', id_, 'again, giving up') - time.sleep(0.2) # Limit to five per second + time.sleep(0.9) # Limit to one per second # }}} diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 94550f2804..ee7c3206bf 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -1198,38 +1198,41 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): else: raise if mi.title: - self.set_title(id, mi.title) + self.set_title(id, mi.title, commit=False) if not mi.authors: mi.authors = [_('Unknown')] authors = [] for a in mi.authors: authors += string_to_authors(a) - self.set_authors(id, authors, notify=False) + self.set_authors(id, authors, notify=False, commit=False) if mi.author_sort: - doit(self.set_author_sort, id, mi.author_sort, notify=False) + doit(self.set_author_sort, id, mi.author_sort, notify=False, + commit=False) if mi.publisher: - doit(self.set_publisher, id, mi.publisher, notify=False) + doit(self.set_publisher, id, mi.publisher, notify=False, + commit=False) if mi.rating: - doit(self.set_rating, id, mi.rating, notify=False) + doit(self.set_rating, id, mi.rating, notify=False, commit=False) if mi.series: - doit(self.set_series, id, mi.series, notify=False) + doit(self.set_series, id, mi.series, notify=False, commit=False) if mi.cover_data[1] is not None: doit(self.set_cover, id, mi.cover_data[1]) # doesn't use commit elif mi.cover is not None and os.access(mi.cover, os.R_OK): doit(self.set_cover, id, open(mi.cover, 'rb')) if mi.tags: - doit(self.set_tags, id, mi.tags, notify=False) + doit(self.set_tags, id, mi.tags, notify=False, commit=False) if mi.comments: - doit(self.set_comment, id, mi.comments, notify=False) + doit(self.set_comment, id, mi.comments, notify=False, commit=False) if mi.isbn and mi.isbn.strip(): - doit(self.set_isbn, id, mi.isbn, notify=False) + doit(self.set_isbn, id, mi.isbn, notify=False, commit=False) if mi.series_index: - doit(self.set_series_index, id, mi.series_index, notify=False) + doit(self.set_series_index, id, mi.series_index, notify=False, + commit=False) if mi.pubdate: - doit(self.set_pubdate, id, mi.pubdate, notify=False) + doit(self.set_pubdate, id, mi.pubdate, notify=False, commit=False) if getattr(mi, 'timestamp', None) is not None: - doit(self.set_timestamp, id, mi.timestamp, notify=False) - self.set_path(id, True) + doit(self.set_timestamp, id, mi.timestamp, notify=False, + commit=False) user_mi = mi.get_all_user_metadata(make_copy=False) for key in user_mi.iterkeys(): @@ -1238,7 +1241,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): doit(self.set_custom, id, val=mi.get(key), extra=mi.get_extra(key), - label=user_mi[key]['label']) + label=user_mi[key]['label'], commit=False) + self.commit() self.notify('metadata', [id]) def authors_sort_strings(self, id, index_is_id=False): @@ -1929,7 +1933,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): else: mi.tags.append(tag) - def create_book_entry(self, mi, cover=None, add_duplicates=True): + def create_book_entry(self, mi, cover=None, add_duplicates=True, + force_id=None): self._add_newbook_tag(mi) if not add_duplicates and self.has_book(mi): return None @@ -1940,9 +1945,17 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): aus = aus.decode(preferred_encoding, 'replace') if isbytestring(title): title = title.decode(preferred_encoding, 'replace') - obj = self.conn.execute('INSERT INTO books(title, series_index, author_sort) VALUES (?, ?, ?)', - (title, series_index, aus)) - id = obj.lastrowid + if force_id is None: + obj = self.conn.execute('INSERT INTO books(title, series_index, author_sort) VALUES (?, ?, ?)', + (title, series_index, aus)) + id = obj.lastrowid + else: + id = force_id + obj = self.conn.execute( + 'INSERT INTO books(id, title, series_index, ' + 'author_sort) VALUES (?, ?, ?, ?)', + (id, title, series_index, aus)) + self.data.books_added([id], self) self.set_path(id, True) self.conn.commit() diff --git a/src/calibre/library/restore.py b/src/calibre/library/restore.py new file mode 100644 index 0000000000..bdbb5e314a --- /dev/null +++ b/src/calibre/library/restore.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai + +__license__ = 'GPL v3' +__copyright__ = '2010, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import re, os, traceback, shutil +from threading import Thread +from operator import itemgetter +from textwrap import TextWrapper + +from calibre.ptempfile import TemporaryDirectory +from calibre.ebooks.metadata.opf2 import OPF +from calibre.library.database2 import LibraryDatabase2 +from calibre.constants import filesystem_encoding +from calibre import isbytestring + +NON_EBOOK_EXTENSIONS = frozenset([ + 'jpg', 'jpeg', 'gif', 'png', 'bmp', + 'opf', 'swp', 'swo' + ]) + +class RestoreDatabase(LibraryDatabase2): + + def set_path(self, book_id, *args, **kwargs): + pass + +class Restore(Thread): + + def __init__(self, library_path, progress_callback=None): + if isbytestring(library_path): + library_path = library_path.decode(filesystem_encoding) + self.src_library_path = os.path.abspath(library_path) + self.progress_callback = progress_callback + self.db_id_regexp = re.compile(r'^.* \((\d+)\)$') + self.bad_ext_pat = re.compile(r'[^a-z]+') + if not callable(self.progress_callback): + self.progress_callback = lambda x, y: x + self.dirs = [] + self.ignored_dirs = [] + self.failed_dirs = [] + self.books = [] + self.conflicting_custom_cols = {} + self.failed_restores = [] + + @property + def errors_occurred(self): + return self.failed_dirs or \ + self.conflicting_custom_cols or self.failed_restores + + @property + def report(self): + ans = '' + failures = list(self.failed_dirs) + [(x['dirpath'], tb) for x, tb in + self.failed_restores] + if failures: + ans += 'Failed to restore the books in the following folders:\n' + wrap = TextWrapper(initial_indent='\t\t', width=85) + for dirpath, tb in failures: + ans += '\t' + dirpath + ' with error:\n' + ans += wrap.fill(tb) + ans += '\n' + + if self.conflicting_custom_cols: + ans += '\n\n' + ans += 'The following custom columns were not fully restored:\n' + for x in self.conflicting_custom_cols: + ans += '\t#'+x+'\n' + + return ans + + + def run(self): + with TemporaryDirectory('_library_restore') as tdir: + self.library_path = tdir + self.scan_library() + self.create_cc_metadata() + self.restore_books() + self.replace_db() + + def scan_library(self): + for dirpath, dirnames, filenames in os.walk(self.src_library_path): + leaf = os.path.basename(dirpath) + m = self.db_id_regexp.search(leaf) + if m is None or 'metadata.opf' not in filenames: + self.ignored_dirs.append(dirpath) + continue + self.dirs.append((dirpath, filenames, m.group(1))) + + self.progress_callback(None, len(self.dirs)) + for i, x in enumerate(self.dirs): + dirpath, filenames, book_id = x + try: + self.process_dir(dirpath, filenames, book_id) + except: + self.failed_dirs.append((dirpath, traceback.format_exc())) + self.progress_callback(_('Processed') + repr(dirpath), i+1) + + def is_ebook_file(self, filename): + ext = os.path.splitext(filename)[1] + if not ext: + return False + ext = ext[1:].lower() + if ext in NON_EBOOK_EXTENSIONS or \ + self.bad_ext_pat.search(ext) is not None: + return False + return True + + def process_dir(self, dirpath, filenames, book_id): + formats = filter(self.is_ebook_file, filenames) + fmts = [os.path.splitext(x)[1][1:].upper() for x in formats] + sizes = [os.path.getsize(os.path.join(dirpath, x)) for x in formats] + names = [os.path.splitext(x)[0] for x in formats] + opf = os.path.join(dirpath, 'metadata.opf') + mi = OPF(opf).to_book_metadata() + timestamp = os.path.getmtime(opf) + path = os.path.relpath(dirpath, self.src_library_path).replace(os.sep, + '/') + + self.books.append({ + 'mi': mi, + 'timestamp': timestamp, + 'formats': list(zip(fmts, sizes, names)), + 'id': int(book_id), + 'dirpath': dirpath, + 'path': path, + }) + + def create_cc_metadata(self): + self.books.sort(key=itemgetter('timestamp')) + m = {} + fields = ('label', 'name', 'datatype', 'is_multiple', 'editable', + 'display') + for b in self.books: + args = [] + for x in fields: + if x in b: + args.append(b[x]) + if len(args) == len(fields): + # TODO: Do series type columns need special handling? + label = b['label'] + if label in m and args != m[label]: + if label not in self.conflicting_custom_cols: + self.conflicting_custom_cols[label] = set([m[label]]) + self.conflicting_custom_cols[label].add(args) + m[b['label']] = args + + db = LibraryDatabase2(self.library_path) + for args in m.values(): + db.create_custom_column(*args) + db.conn.close() + + def restore_books(self): + self.progress_callback(None, len(self.books)) + self.books.sort(key=itemgetter('id')) + + db = RestoreDatabase(self.library_path) + + for i, book in enumerate(self.books): + try: + self.restore_book(book, db) + except: + self.failed_restores.append((book, traceback.format_exc())) + self.progress_callback(book['mi'].title, i+1) + + db.conn.close() + + def restore_book(self, book, db): + db.create_book_entry(book['mi'], add_duplicates=True, + force_id=book['id']) + db.conn.execute('UPDATE books SET path=? WHERE id=?', (book['path'], + book['id'])) + + for fmt, size, name in book['formats']: + db.conn.execute(''' + INSERT INTO data (book,format,uncompressed_size,name) + VALUES (?,?,?,?)''', (id, fmt, size, name)) + db.conn.commit() + + def replace_db(self): + dbpath = os.path.join(self.src_library_path, 'metadata.db') + ndbpath = os.path.join(self.library_path, 'metadata.db') + + save_path = os.path.splitext(dbpath)[0]+'_pre_restore.db' + if os.path.exists(save_path): + os.remove(save_path) + os.rename(dbpath, save_path) + shutil.copyfile(ndbpath, dbpath) + diff --git a/src/calibre/utils/pyconsole/console.py b/src/calibre/utils/pyconsole/console.py index 14670fdb59..13c22a928f 100644 --- a/src/calibre/utils/pyconsole/console.py +++ b/src/calibre/utils/pyconsole/console.py @@ -171,7 +171,7 @@ class Console(QTextEdit): def shutdown(self): dynamic.set('console_history', self.history.serialize()) - self.shutton_down = True + self.shutting_down = True for c in self.controllers: c.kill()