From 48dd69ba9cc291de86178d7b9b15e50ceb60dd0a Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Fri, 1 Oct 2010 14:41:26 +0100
Subject: [PATCH] Check library function
---
src/calibre/gui2/dialogs/check_library.py | 89 +++++++++++
src/calibre/gui2/preferences/misc.py | 7 +
src/calibre/gui2/preferences/misc.ui | 7 +
src/calibre/library/check_library.py | 179 ++++++++++++++++++++++
src/calibre/library/database2.py | 9 ++
5 files changed, 291 insertions(+)
create mode 100644 src/calibre/gui2/dialogs/check_library.py
create mode 100644 src/calibre/library/check_library.py
diff --git a/src/calibre/gui2/dialogs/check_library.py b/src/calibre/gui2/dialogs/check_library.py
new file mode 100644
index 0000000000..8eeeda117d
--- /dev/null
+++ b/src/calibre/gui2/dialogs/check_library.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python
+__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
+__docformat__ = 'restructuredtext en'
+__license__ = 'GPL v3'
+
+from PyQt4.Qt import QDialog, QVBoxLayout, QTreeWidget, QPushButton, \
+ QDialogButtonBox, QApplication, QTreeWidgetItem
+
+from calibre.library.check_library import CheckLibrary
+
+class Item(QTreeWidgetItem):
+ pass
+
+class CheckLibraryDialog(QDialog):
+
+ def __init__(self, parent, db):
+ QDialog.__init__(self, parent)
+ self.db = db
+
+ self._layout = QVBoxLayout(self)
+ self.setLayout(self._layout)
+ self.log = QTreeWidget(self)
+ self._layout.addWidget(self.log)
+ self.setWindowTitle(_('Check Library'))
+
+ self.check = QPushButton(_('Run the check'))
+ self.check.setDefault(False)
+ self.check.clicked.connect(self.run_the_check)
+ self.copy = QPushButton(_('Copy to clipboard'))
+ self.copy.setDefault(False)
+ self.copy.clicked.connect(self.copy_to_clipboard)
+ self.ok = QPushButton('&OK')
+ self.ok.setDefault(True)
+ self.ok.clicked.connect(self.accept)
+ self.bbox = QDialogButtonBox(self)
+ self.bbox.addButton(self.copy, QDialogButtonBox.ActionRole)
+ self.bbox.addButton(self.check, QDialogButtonBox.ActionRole)
+ self.bbox.addButton(self.ok, QDialogButtonBox.AcceptRole)
+
+ self._layout.addWidget(self.bbox)
+ self.resize(750, 500)
+ self.bbox.setEnabled(True)
+
+ self.run_the_check()
+
+ def run_the_check(self):
+ checker = CheckLibrary(self.db.library_path, self.db)
+ checker.scan_library()
+
+ plaintext = []
+
+ def builder(tree, checker, check):
+ attr = check[0]
+ list = getattr(checker, attr, None)
+ if list is None:
+ return
+
+ h = check[1]
+ tl = Item([h])
+ for problem in list:
+ it = Item()
+ it.setText(0, problem[0])
+ it.setText(1, problem[1])
+ p = ', '.join(problem[2])
+ it.setText(2, p)
+ tl.addChild(it)
+ plaintext.append(','.join([h, problem[0], problem[1], p]))
+ tree.addTopLevelItem(tl)
+
+ t = self.log
+ t.clear()
+ t.setColumnCount(3);
+ t.setHeaderLabels([_('Name'), _('Path from library'), _('Additional Information')])
+ for check in checker.checks:
+ builder(t, checker, check)
+
+ t.setColumnWidth(0, 200)
+ t.setColumnWidth(1, 400)
+
+ self.text_results = '\n'.join(plaintext)
+
+ def copy_to_clipboard(self):
+ QApplication.clipboard().setText(self.text_results)
+
+
+if __name__ == '__main__':
+ app = QApplication([])
+ d = CheckLibraryDialog()
+ d.exec_()
diff --git a/src/calibre/gui2/preferences/misc.py b/src/calibre/gui2/preferences/misc.py
index 582d110c6c..c9dc25caff 100644
--- a/src/calibre/gui2/preferences/misc.py
+++ b/src/calibre/gui2/preferences/misc.py
@@ -7,6 +7,7 @@ __docformat__ = 'restructuredtext en'
from PyQt4.Qt import QProgressDialog, QThread, Qt, pyqtSignal
+from calibre.gui2.dialogs.check_library import CheckLibraryDialog
from calibre.gui2.preferences import ConfigWidgetBase, test_widget
from calibre.gui2.preferences.misc_ui import Ui_Form
from calibre.gui2 import error_dialog, config, warning_dialog, \
@@ -89,6 +90,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
self.device_detection_button.clicked.connect(self.debug_device_detection)
self.compact_button.clicked.connect(self.compact)
self.button_all_books_dirty.clicked.connect(self.mark_dirty)
+ self.button_check_library.clicked.connect(self.check_library)
self.button_open_config_dir.clicked.connect(self.open_config_dir)
self.button_osx_symlinks.clicked.connect(self.create_symlinks)
self.button_osx_symlinks.setVisible(isosx)
@@ -100,6 +102,11 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
_('Metadata will be backed up while calibre is running, at the '
'rate of 30 books per minute.'), show=True)
+ def check_library(self):
+ db = self.gui.library_view.model().db
+ d = CheckLibraryDialog(self.gui.parent(), db)
+ d.exec_()
+
def debug_device_detection(self, *args):
from calibre.gui2.preferences.device_debug import DebugDevice
d = DebugDevice(self)
diff --git a/src/calibre/gui2/preferences/misc.ui b/src/calibre/gui2/preferences/misc.ui
index adf2a15c16..dd0ca15840 100644
--- a/src/calibre/gui2/preferences/misc.ui
+++ b/src/calibre/gui2/preferences/misc.ui
@@ -131,6 +131,13 @@
+ -
+
+
+ Check the library folders for potential problems
+
+
+
-
diff --git a/src/calibre/library/check_library.py b/src/calibre/library/check_library.py
new file mode 100644
index 0000000000..d746018c85
--- /dev/null
+++ b/src/calibre/library/check_library.py
@@ -0,0 +1,179 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+__license__ = 'GPL v3'
+__copyright__ = '2010, Kovid Goyal '
+__docformat__ = 'restructuredtext en'
+
+import re, os, traceback
+
+from calibre import isbytestring
+from calibre.constants import filesystem_encoding
+from calibre.ebooks import BOOK_EXTENSIONS
+
+EBOOK_EXTENSIONS = frozenset(BOOK_EXTENSIONS)
+
+NORMALS = frozenset(['metadata.opf', 'cover.jpg'])
+
+class CheckLibrary(object):
+
+ checks = [('invalid_titles', _('Invalid titles')),
+ ('extra_titles', _('Extra titles')),
+ ('invalid_authors', _('Invalid authors')),
+ ('extra_authors', _('Extra authors')),
+ ('missing_formats', _('Missing book formats')),
+ ('extra_formats', _('Extra book formats')),
+ ('extra_files', _('Unknown files in book')),
+ ('failed_folders', _('Folders raising exception'))
+ ]
+
+ def __init__(self, library_path, db):
+ if isbytestring(library_path):
+ library_path = library_path.decode(filesystem_encoding)
+ self.src_library_path = os.path.abspath(library_path)
+ self.db = db
+
+ self.all_authors = frozenset([x[1] for x in db.all_authors()])
+ self.all_ids = frozenset([id for id in db.all_ids()])
+ self.all_dbpaths = frozenset(self.dbpath(id) for id in self.all_ids)
+
+ self.db_id_regexp = re.compile(r'^.* \((\d+)\)$')
+ self.bad_ext_pat = re.compile(r'[^a-z]+')
+
+ self.dirs = []
+ self.book_dirs = []
+
+ self.potential_authors = {}
+ self.invalid_authors = []
+ self.extra_authors = []
+
+ self.invalid_titles = []
+ self.extra_titles = []
+
+ self.unknown_book_files = []
+ self.missing_formats = []
+ self.extra_formats = []
+ self.extra_files = []
+
+ self.failed_folders = []
+ self.books = []
+ self.conflicting_custom_cols = {}
+ self.failed_restores = []
+ self.mismatched_dirs = []
+ self.successes = 0
+ self.tb = None
+
+ def dbpath(self, id):
+ return self.db.path(id, index_is_id=True)
+
+ @property
+ def errors_occurred(self):
+ return self.failed_folders or self.mismatched_dirs or \
+ self.conflicting_custom_cols or self.failed_restores
+
+ @property
+ def report(self):
+ ans = ''
+ failures = list(self.failed_folders) + [(x['dirpath'], tb) for x, tb in
+ self.failed_restores]
+ if failures:
+ ans += 'Failed to restore the books in the following folders:\n'
+ for dirpath, tb in failures:
+ ans += '\t' + dirpath + ' with error:\n'
+ ans += '\n'.join('\t\t'+x for x in tb.splitlines())
+ ans += '\n\n'
+
+ if self.conflicting_custom_cols:
+ ans += '\n\n'
+ ans += 'The following custom columns were not fully restored:\n'
+ for x in self.conflicting_custom_cols:
+ ans += '\t#'+x+'\n'
+
+ if self.mismatched_dirs:
+ ans += '\n\n'
+ ans += 'The following folders were ignored:\n'
+ for x in self.mismatched_dirs:
+ ans += '\t'+x+'\n'
+
+
+ return ans
+
+
+ def scan_library(self):
+ lib = self.src_library_path
+ for auth_dir in os.listdir(lib):
+ auth_path = os.path.join(lib, auth_dir)
+ # First check: author must be a directory
+ if not os.path.isdir(auth_path):
+ self.invalid_authors.append((auth_dir, auth_dir, []))
+ continue
+
+ self.potential_authors[auth_dir] = {}
+
+ # Look for titles in the author directories
+ found_titles = False
+ for title_dir in os.listdir(auth_path):
+ title_path = os.path.join(auth_path, title_dir)
+ db_path = os.path.join(auth_dir, title_dir)
+ m = self.db_id_regexp.search(title_dir)
+ # Second check: title must have an ID and must be a directory
+ if m is None or not os.path.isdir(title_path):
+ self.invalid_titles.append((auth_dir, db_path, [title_dir]))
+ continue
+
+ id = m.group(1)
+ # Third check: the id must be in the DB and the paths must match
+ if int(id) not in self.all_ids or \
+ db_path not in self.all_dbpaths:
+ self.extra_titles.append((title_dir, db_path, []))
+ continue
+
+ # Record the book to check its formats
+ self.book_dirs.append((db_path, title_dir, id))
+ found_titles = True
+
+ # Fourth check: author directories that contain no titles
+ if not found_titles:
+ self.extra_authors.append((auth_dir, auth_dir, []))
+
+ for x in self.book_dirs:
+ try:
+ self.process_book(lib, x)
+ except:
+ traceback.print_exc()
+ # Sort-of check: exception processing directory
+ self.failed_folders.append((title_path, traceback.format_exc(), []))
+
+ def is_ebook_file(self, filename):
+ ext = os.path.splitext(filename)[1]
+ if not ext:
+ return False
+ ext = ext[1:].lower()
+ if ext not in EBOOK_EXTENSIONS or \
+ self.bad_ext_pat.search(ext) is not None:
+ return False
+ return True
+
+ def process_book(self, lib, book_info):
+ (db_path, title_dir, book_id) = book_info
+ filenames = frozenset(os.listdir(os.path.join(lib, db_path)))
+ book_id = int(book_id)
+ formats = frozenset(filter(self.is_ebook_file, filenames))
+
+ unknowns = frozenset(filenames-formats-NORMALS)
+ # Check: any books that aren't formats or normally there?
+ if unknowns:
+ self.extra_files.append((title_dir, db_path, unknowns))
+
+ book_formats = frozenset([x[0]+'.'+x[1].lower() for x in
+ self.db.format_files(book_id, index_is_id=True)])
+
+ # Check: any book formats that should be there?
+ missing = book_formats - formats
+ if missing:
+ self.missing_formats.append((title_dir, db_path, missing))
+
+ # Check: any book formats that shouldn't be there?
+ extra = formats - book_formats
+ if extra:
+ self.extra_formats.append((title_dir, db_path, extra))
diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index 4de8c3d552..e679780b46 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -850,6 +850,15 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
return set([])
return set([f[0] for f in formats])
+ def format_files(self, index, index_is_id=False):
+ id = index if index_is_id else self.id(index)
+ try:
+ formats = self.conn.get('SELECT name,format FROM data WHERE book=?', (id,))
+ formats = map(lambda x:(x[0], x[1]), formats)
+ return formats
+ except:
+ return []
+
def formats(self, index, index_is_id=False, verify_formats=True):
''' Return available formats as a comma separated list or None if there are no available formats '''
id = index if index_is_id else self.id(index)