From 48dd69ba9cc291de86178d7b9b15e50ceb60dd0a Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Fri, 1 Oct 2010 14:41:26 +0100 Subject: [PATCH] Check library function --- src/calibre/gui2/dialogs/check_library.py | 89 +++++++++++ src/calibre/gui2/preferences/misc.py | 7 + src/calibre/gui2/preferences/misc.ui | 7 + src/calibre/library/check_library.py | 179 ++++++++++++++++++++++ src/calibre/library/database2.py | 9 ++ 5 files changed, 291 insertions(+) create mode 100644 src/calibre/gui2/dialogs/check_library.py create mode 100644 src/calibre/library/check_library.py diff --git a/src/calibre/gui2/dialogs/check_library.py b/src/calibre/gui2/dialogs/check_library.py new file mode 100644 index 0000000000..8eeeda117d --- /dev/null +++ b/src/calibre/gui2/dialogs/check_library.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python +__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' +__docformat__ = 'restructuredtext en' +__license__ = 'GPL v3' + +from PyQt4.Qt import QDialog, QVBoxLayout, QTreeWidget, QPushButton, \ + QDialogButtonBox, QApplication, QTreeWidgetItem + +from calibre.library.check_library import CheckLibrary + +class Item(QTreeWidgetItem): + pass + +class CheckLibraryDialog(QDialog): + + def __init__(self, parent, db): + QDialog.__init__(self, parent) + self.db = db + + self._layout = QVBoxLayout(self) + self.setLayout(self._layout) + self.log = QTreeWidget(self) + self._layout.addWidget(self.log) + self.setWindowTitle(_('Check Library')) + + self.check = QPushButton(_('Run the check')) + self.check.setDefault(False) + self.check.clicked.connect(self.run_the_check) + self.copy = QPushButton(_('Copy to clipboard')) + self.copy.setDefault(False) + self.copy.clicked.connect(self.copy_to_clipboard) + self.ok = QPushButton('&OK') + self.ok.setDefault(True) + self.ok.clicked.connect(self.accept) + self.bbox = QDialogButtonBox(self) + self.bbox.addButton(self.copy, QDialogButtonBox.ActionRole) + self.bbox.addButton(self.check, QDialogButtonBox.ActionRole) + self.bbox.addButton(self.ok, QDialogButtonBox.AcceptRole) + + self._layout.addWidget(self.bbox) + self.resize(750, 500) + self.bbox.setEnabled(True) + + self.run_the_check() + + def run_the_check(self): + checker = CheckLibrary(self.db.library_path, self.db) + checker.scan_library() + + plaintext = [] + + def builder(tree, checker, check): + attr = check[0] + list = getattr(checker, attr, None) + if list is None: + return + + h = check[1] + tl = Item([h]) + for problem in list: + it = Item() + it.setText(0, problem[0]) + it.setText(1, problem[1]) + p = ', '.join(problem[2]) + it.setText(2, p) + tl.addChild(it) + plaintext.append(','.join([h, problem[0], problem[1], p])) + tree.addTopLevelItem(tl) + + t = self.log + t.clear() + t.setColumnCount(3); + t.setHeaderLabels([_('Name'), _('Path from library'), _('Additional Information')]) + for check in checker.checks: + builder(t, checker, check) + + t.setColumnWidth(0, 200) + t.setColumnWidth(1, 400) + + self.text_results = '\n'.join(plaintext) + + def copy_to_clipboard(self): + QApplication.clipboard().setText(self.text_results) + + +if __name__ == '__main__': + app = QApplication([]) + d = CheckLibraryDialog() + d.exec_() diff --git a/src/calibre/gui2/preferences/misc.py b/src/calibre/gui2/preferences/misc.py index 582d110c6c..c9dc25caff 100644 --- a/src/calibre/gui2/preferences/misc.py +++ b/src/calibre/gui2/preferences/misc.py @@ -7,6 +7,7 @@ __docformat__ = 'restructuredtext en' from PyQt4.Qt import QProgressDialog, QThread, Qt, pyqtSignal +from calibre.gui2.dialogs.check_library import CheckLibraryDialog from calibre.gui2.preferences import ConfigWidgetBase, test_widget from calibre.gui2.preferences.misc_ui import Ui_Form from calibre.gui2 import error_dialog, config, warning_dialog, \ @@ -89,6 +90,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): self.device_detection_button.clicked.connect(self.debug_device_detection) self.compact_button.clicked.connect(self.compact) self.button_all_books_dirty.clicked.connect(self.mark_dirty) + self.button_check_library.clicked.connect(self.check_library) self.button_open_config_dir.clicked.connect(self.open_config_dir) self.button_osx_symlinks.clicked.connect(self.create_symlinks) self.button_osx_symlinks.setVisible(isosx) @@ -100,6 +102,11 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): _('Metadata will be backed up while calibre is running, at the ' 'rate of 30 books per minute.'), show=True) + def check_library(self): + db = self.gui.library_view.model().db + d = CheckLibraryDialog(self.gui.parent(), db) + d.exec_() + def debug_device_detection(self, *args): from calibre.gui2.preferences.device_debug import DebugDevice d = DebugDevice(self) diff --git a/src/calibre/gui2/preferences/misc.ui b/src/calibre/gui2/preferences/misc.ui index adf2a15c16..dd0ca15840 100644 --- a/src/calibre/gui2/preferences/misc.ui +++ b/src/calibre/gui2/preferences/misc.ui @@ -131,6 +131,13 @@ + + + + Check the library folders for potential problems + + + diff --git a/src/calibre/library/check_library.py b/src/calibre/library/check_library.py new file mode 100644 index 0000000000..d746018c85 --- /dev/null +++ b/src/calibre/library/check_library.py @@ -0,0 +1,179 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai + +__license__ = 'GPL v3' +__copyright__ = '2010, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import re, os, traceback + +from calibre import isbytestring +from calibre.constants import filesystem_encoding +from calibre.ebooks import BOOK_EXTENSIONS + +EBOOK_EXTENSIONS = frozenset(BOOK_EXTENSIONS) + +NORMALS = frozenset(['metadata.opf', 'cover.jpg']) + +class CheckLibrary(object): + + checks = [('invalid_titles', _('Invalid titles')), + ('extra_titles', _('Extra titles')), + ('invalid_authors', _('Invalid authors')), + ('extra_authors', _('Extra authors')), + ('missing_formats', _('Missing book formats')), + ('extra_formats', _('Extra book formats')), + ('extra_files', _('Unknown files in book')), + ('failed_folders', _('Folders raising exception')) + ] + + def __init__(self, library_path, db): + if isbytestring(library_path): + library_path = library_path.decode(filesystem_encoding) + self.src_library_path = os.path.abspath(library_path) + self.db = db + + self.all_authors = frozenset([x[1] for x in db.all_authors()]) + self.all_ids = frozenset([id for id in db.all_ids()]) + self.all_dbpaths = frozenset(self.dbpath(id) for id in self.all_ids) + + self.db_id_regexp = re.compile(r'^.* \((\d+)\)$') + self.bad_ext_pat = re.compile(r'[^a-z]+') + + self.dirs = [] + self.book_dirs = [] + + self.potential_authors = {} + self.invalid_authors = [] + self.extra_authors = [] + + self.invalid_titles = [] + self.extra_titles = [] + + self.unknown_book_files = [] + self.missing_formats = [] + self.extra_formats = [] + self.extra_files = [] + + self.failed_folders = [] + self.books = [] + self.conflicting_custom_cols = {} + self.failed_restores = [] + self.mismatched_dirs = [] + self.successes = 0 + self.tb = None + + def dbpath(self, id): + return self.db.path(id, index_is_id=True) + + @property + def errors_occurred(self): + return self.failed_folders or self.mismatched_dirs or \ + self.conflicting_custom_cols or self.failed_restores + + @property + def report(self): + ans = '' + failures = list(self.failed_folders) + [(x['dirpath'], tb) for x, tb in + self.failed_restores] + if failures: + ans += 'Failed to restore the books in the following folders:\n' + for dirpath, tb in failures: + ans += '\t' + dirpath + ' with error:\n' + ans += '\n'.join('\t\t'+x for x in tb.splitlines()) + ans += '\n\n' + + if self.conflicting_custom_cols: + ans += '\n\n' + ans += 'The following custom columns were not fully restored:\n' + for x in self.conflicting_custom_cols: + ans += '\t#'+x+'\n' + + if self.mismatched_dirs: + ans += '\n\n' + ans += 'The following folders were ignored:\n' + for x in self.mismatched_dirs: + ans += '\t'+x+'\n' + + + return ans + + + def scan_library(self): + lib = self.src_library_path + for auth_dir in os.listdir(lib): + auth_path = os.path.join(lib, auth_dir) + # First check: author must be a directory + if not os.path.isdir(auth_path): + self.invalid_authors.append((auth_dir, auth_dir, [])) + continue + + self.potential_authors[auth_dir] = {} + + # Look for titles in the author directories + found_titles = False + for title_dir in os.listdir(auth_path): + title_path = os.path.join(auth_path, title_dir) + db_path = os.path.join(auth_dir, title_dir) + m = self.db_id_regexp.search(title_dir) + # Second check: title must have an ID and must be a directory + if m is None or not os.path.isdir(title_path): + self.invalid_titles.append((auth_dir, db_path, [title_dir])) + continue + + id = m.group(1) + # Third check: the id must be in the DB and the paths must match + if int(id) not in self.all_ids or \ + db_path not in self.all_dbpaths: + self.extra_titles.append((title_dir, db_path, [])) + continue + + # Record the book to check its formats + self.book_dirs.append((db_path, title_dir, id)) + found_titles = True + + # Fourth check: author directories that contain no titles + if not found_titles: + self.extra_authors.append((auth_dir, auth_dir, [])) + + for x in self.book_dirs: + try: + self.process_book(lib, x) + except: + traceback.print_exc() + # Sort-of check: exception processing directory + self.failed_folders.append((title_path, traceback.format_exc(), [])) + + def is_ebook_file(self, filename): + ext = os.path.splitext(filename)[1] + if not ext: + return False + ext = ext[1:].lower() + if ext not in EBOOK_EXTENSIONS or \ + self.bad_ext_pat.search(ext) is not None: + return False + return True + + def process_book(self, lib, book_info): + (db_path, title_dir, book_id) = book_info + filenames = frozenset(os.listdir(os.path.join(lib, db_path))) + book_id = int(book_id) + formats = frozenset(filter(self.is_ebook_file, filenames)) + + unknowns = frozenset(filenames-formats-NORMALS) + # Check: any books that aren't formats or normally there? + if unknowns: + self.extra_files.append((title_dir, db_path, unknowns)) + + book_formats = frozenset([x[0]+'.'+x[1].lower() for x in + self.db.format_files(book_id, index_is_id=True)]) + + # Check: any book formats that should be there? + missing = book_formats - formats + if missing: + self.missing_formats.append((title_dir, db_path, missing)) + + # Check: any book formats that shouldn't be there? + extra = formats - book_formats + if extra: + self.extra_formats.append((title_dir, db_path, extra)) diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 4de8c3d552..e679780b46 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -850,6 +850,15 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): return set([]) return set([f[0] for f in formats]) + def format_files(self, index, index_is_id=False): + id = index if index_is_id else self.id(index) + try: + formats = self.conn.get('SELECT name,format FROM data WHERE book=?', (id,)) + formats = map(lambda x:(x[0], x[1]), formats) + return formats + except: + return [] + def formats(self, index, index_is_id=False, verify_formats=True): ''' Return available formats as a comma separated list or None if there are no available formats ''' id = index if index_is_id else self.id(index)