diff --git a/src/calibre/devices/folder_device/driver.py b/src/calibre/devices/folder_device/driver.py index 5919d6d2fb..a3d591049d 100644 --- a/src/calibre/devices/folder_device/driver.py +++ b/src/calibre/devices/folder_device/driver.py @@ -16,7 +16,8 @@ class FOLDER_DEVICE_FOR_CONFIG(USBMS): description = _('Use an arbitrary folder as a device.') author = 'John Schember/Charles Haley' supported_platforms = ['windows', 'osx', 'linux'] - FORMATS = ['epub', 'fb2', 'mobi', 'azw', 'lrf', 'tcr', 'pmlz', 'lit', 'rtf', 'rb', 'pdf', 'oeb', 'txt', 'pdb'] + FORMATS = ['epub', 'fb2', 'mobi', 'azw', 'lrf', 'tcr', 'pmlz', 'lit', + 'rtf', 'rb', 'pdf', 'oeb', 'txt', 'pdb', 'prc'] VENDOR_ID = 0xffff PRODUCT_ID = 0xffff BCD = 0xffff diff --git a/src/calibre/gui2/dialogs/check_library.py b/src/calibre/gui2/dialogs/check_library.py new file mode 100644 index 0000000000..177e12db49 --- /dev/null +++ b/src/calibre/gui2/dialogs/check_library.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python +__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' +__docformat__ = 'restructuredtext en' +__license__ = 'GPL v3' + +from PyQt4.Qt import QDialog, QVBoxLayout, QTreeWidget, QPushButton, \ + QDialogButtonBox, QApplication, QTreeWidgetItem + +from calibre.library.check_library import CheckLibrary, CHECKS + +class Item(QTreeWidgetItem): + pass + +class CheckLibraryDialog(QDialog): + + def __init__(self, parent, db): + QDialog.__init__(self, parent) + self.db = db + + self._layout = QVBoxLayout(self) + self.setLayout(self._layout) + self.log = QTreeWidget(self) + self._layout.addWidget(self.log) + self.setWindowTitle(_('Check Library')) + + self.check = QPushButton(_('Run the check')) + self.check.setDefault(False) + self.check.clicked.connect(self.run_the_check) + self.copy = QPushButton(_('Copy to clipboard')) + self.copy.setDefault(False) + self.copy.clicked.connect(self.copy_to_clipboard) + self.ok = QPushButton('&OK') + self.ok.setDefault(True) + self.ok.clicked.connect(self.accept) + self.bbox = QDialogButtonBox(self) + self.bbox.addButton(self.copy, QDialogButtonBox.ActionRole) + self.bbox.addButton(self.check, QDialogButtonBox.ActionRole) + self.bbox.addButton(self.ok, QDialogButtonBox.AcceptRole) + + self._layout.addWidget(self.bbox) + self.resize(750, 500) + self.bbox.setEnabled(True) + + self.run_the_check() + + def run_the_check(self): + checker = CheckLibrary(self.db.library_path, self.db) + checker.scan_library() + + plaintext = [] + + def builder(tree, checker, check): + attr = check[0] + list = getattr(checker, attr, None) + if list is None: + return + + h = check[1] + tl = Item([h]) + for problem in list: + it = Item() + it.setText(0, problem[0]) + it.setText(1, problem[1]) + p = ', '.join(problem[2]) + it.setText(2, p) + tl.addChild(it) + plaintext.append(','.join([h, problem[0], problem[1], p])) + tree.addTopLevelItem(tl) + + t = self.log + t.clear() + t.setColumnCount(3); + t.setHeaderLabels([_('Name'), _('Path from library'), _('Additional Information')]) + for check in CHECKS: + builder(t, checker, check) + + t.setColumnWidth(0, 200) + t.setColumnWidth(1, 400) + + self.text_results = '\n'.join(plaintext) + + def copy_to_clipboard(self): + QApplication.clipboard().setText(self.text_results) + + +if __name__ == '__main__': + app = QApplication([]) + d = CheckLibraryDialog() + d.exec_() diff --git a/src/calibre/gui2/preferences/misc.py b/src/calibre/gui2/preferences/misc.py index 582d110c6c..c9dc25caff 100644 --- a/src/calibre/gui2/preferences/misc.py +++ b/src/calibre/gui2/preferences/misc.py @@ -7,6 +7,7 @@ __docformat__ = 'restructuredtext en' from PyQt4.Qt import QProgressDialog, QThread, Qt, pyqtSignal +from calibre.gui2.dialogs.check_library import CheckLibraryDialog from calibre.gui2.preferences import ConfigWidgetBase, test_widget from calibre.gui2.preferences.misc_ui import Ui_Form from calibre.gui2 import error_dialog, config, warning_dialog, \ @@ -89,6 +90,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): self.device_detection_button.clicked.connect(self.debug_device_detection) self.compact_button.clicked.connect(self.compact) self.button_all_books_dirty.clicked.connect(self.mark_dirty) + self.button_check_library.clicked.connect(self.check_library) self.button_open_config_dir.clicked.connect(self.open_config_dir) self.button_osx_symlinks.clicked.connect(self.create_symlinks) self.button_osx_symlinks.setVisible(isosx) @@ -100,6 +102,11 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): _('Metadata will be backed up while calibre is running, at the ' 'rate of 30 books per minute.'), show=True) + def check_library(self): + db = self.gui.library_view.model().db + d = CheckLibraryDialog(self.gui.parent(), db) + d.exec_() + def debug_device_detection(self, *args): from calibre.gui2.preferences.device_debug import DebugDevice d = DebugDevice(self) diff --git a/src/calibre/gui2/preferences/misc.ui b/src/calibre/gui2/preferences/misc.ui index adf2a15c16..dd0ca15840 100644 --- a/src/calibre/gui2/preferences/misc.ui +++ b/src/calibre/gui2/preferences/misc.ui @@ -131,6 +131,13 @@ + + + + Check the library folders for potential problems + + + diff --git a/src/calibre/library/check_library.py b/src/calibre/library/check_library.py new file mode 100644 index 0000000000..f78912544b --- /dev/null +++ b/src/calibre/library/check_library.py @@ -0,0 +1,181 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai + +__license__ = 'GPL v3' +__copyright__ = '2010, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import re, os, traceback + +from calibre import isbytestring +from calibre.constants import filesystem_encoding +from calibre.ebooks import BOOK_EXTENSIONS + +EBOOK_EXTENSIONS = frozenset(BOOK_EXTENSIONS) + +NORMALS = frozenset(['metadata.opf', 'cover.jpg']) + +CHECKS = [('invalid_titles', _('Invalid titles')), + ('extra_titles', _('Extra titles')), + ('invalid_authors', _('Invalid authors')), + ('extra_authors', _('Extra authors')), + ('missing_formats', _('Missing book formats')), + ('extra_formats', _('Extra book formats')), + ('extra_files', _('Unknown files in books')), + ('failed_folders', _('Folders raising exception')) + ] + + +class CheckLibrary(object): + + def __init__(self, library_path, db): + if isbytestring(library_path): + library_path = library_path.decode(filesystem_encoding) + self.src_library_path = os.path.abspath(library_path) + self.db = db + + self.is_case_sensitive = db.is_case_sensitive + + self.all_authors = frozenset([x[1] for x in db.all_authors()]) + self.all_ids = frozenset([id for id in db.all_ids()]) + self.all_dbpaths = frozenset(self.dbpath(id) for id in self.all_ids) + self.all_lc_dbpaths = frozenset([f.lower() for f in self.all_dbpaths]) + + self.db_id_regexp = re.compile(r'^.* \((\d+)\)$') + self.bad_ext_pat = re.compile(r'[^a-z]+') + + self.dirs = [] + self.book_dirs = [] + + self.potential_authors = {} + self.invalid_authors = [] + self.extra_authors = [] + + self.invalid_titles = [] + self.extra_titles = [] + + self.unknown_book_files = [] + self.missing_formats = [] + self.extra_formats = [] + self.extra_files = [] + + + def dbpath(self, id): + return self.db.path(id, index_is_id=True) + + @property + def errors_occurred(self): + return self.failed_folders or self.mismatched_dirs or \ + self.conflicting_custom_cols or self.failed_restores + + def scan_library(self): + lib = self.src_library_path + for auth_dir in os.listdir(lib): + auth_path = os.path.join(lib, auth_dir) + # First check: author must be a directory + if not os.path.isdir(auth_path): + self.invalid_authors.append((auth_dir, auth_dir, [])) + continue + + self.potential_authors[auth_dir] = {} + + # Look for titles in the author directories + found_titles = False + for title_dir in os.listdir(auth_path): + title_path = os.path.join(auth_path, title_dir) + db_path = os.path.join(auth_dir, title_dir) + m = self.db_id_regexp.search(title_dir) + # Second check: title must have an ID and must be a directory + if m is None or not os.path.isdir(title_path): + self.invalid_titles.append((auth_dir, db_path, [title_dir])) + continue + + id = m.group(1) + # Third check: the id must be in the DB and the paths must match + if self.is_case_sensitive: + if int(id) not in self.all_ids or \ + db_path not in self.all_dbpaths: + self.extra_titles.append((title_dir, db_path, [])) + continue + else: + if int(id) not in self.all_ids or \ + db_path.lower() not in self.all_lc_dbpaths: + self.extra_titles.append((title_dir, db_path, [])) + continue + + # Record the book to check its formats + self.book_dirs.append((db_path, title_dir, id)) + found_titles = True + + # Fourth check: author directories that contain no titles + if not found_titles: + self.extra_authors.append((auth_dir, auth_dir, [])) + + for x in self.book_dirs: + try: + self.process_book(lib, x) + except: + traceback.print_exc() + # Sort-of check: exception processing directory + self.failed_folders.append((title_path, traceback.format_exc(), [])) + + def is_ebook_file(self, filename): + ext = os.path.splitext(filename)[1] + if not ext: + return False + ext = ext[1:].lower() + if ext not in EBOOK_EXTENSIONS or \ + self.bad_ext_pat.search(ext) is not None: + return False + return True + + def process_book(self, lib, book_info): + (db_path, title_dir, book_id) = book_info + filenames = frozenset(os.listdir(os.path.join(lib, db_path))) + book_id = int(book_id) + formats = frozenset(filter(self.is_ebook_file, filenames)) + book_formats = frozenset([x[0]+'.'+x[1].lower() for x in + self.db.format_files(book_id, index_is_id=True)]) + + if self.is_case_sensitive: + unknowns = frozenset(filenames-formats-NORMALS) + # Check: any books that aren't formats or normally there? + if unknowns: + self.extra_files.append((title_dir, db_path, unknowns)) + + # Check: any book formats that should be there? + missing = book_formats - formats + if missing: + self.missing_formats.append((title_dir, db_path, missing)) + + # Check: any book formats that shouldn't be there? + extra = formats - book_formats + if extra: + self.extra_formats.append((title_dir, db_path, extra)) + else: + def lc_map(fnames, fset): + m = {} + for f in fnames: + m[f.lower()] = f + return [m[f] for f in fset] + + filenames_lc = frozenset([f.lower() for f in filenames]) + formats_lc = frozenset([f.lower() for f in formats]) + unknowns = frozenset(filenames_lc-formats_lc-NORMALS) + # Check: any books that aren't formats or normally there? + if unknowns: + self.extra_files.append((title_dir, db_path, + lc_map(filenames, unknowns))) + + book_formats_lc = frozenset([f.lower() for f in book_formats]) + # Check: any book formats that should be there? + missing = book_formats_lc - formats_lc + if missing: + self.missing_formats.append((title_dir, db_path, + lc_map(book_formats, missing))) + + # Check: any book formats that shouldn't be there? + extra = formats_lc - book_formats_lc + if extra: + self.extra_formats.append((title_dir, db_path, + lc_map(formats, extra))) diff --git a/src/calibre/library/cli.py b/src/calibre/library/cli.py index 19bd56bf55..d504e670e0 100644 --- a/src/calibre/library/cli.py +++ b/src/calibre/library/cli.py @@ -874,11 +874,76 @@ def command_saved_searches(args, dbpath): return 0 +def check_library_option_parser(): + from calibre.library.check_library import CHECKS + parser = get_parser(_('''\ +%prog check_library [options] + +Perform some checks on the filesystem representing a library. Reports are {0} +''').format(', '.join([c[0] for c in CHECKS]))) + + parser.add_option('-c', '--csv', default=False, action='store_true', + help=_('Output in CSV')) + + parser.add_option('-r', '--report', default=None, dest='report', + help=_("Comma-separated list of reports.\n" + "Default: all")) + return parser + +def command_check_library(args, dbpath): + from calibre.library.check_library import CheckLibrary, CHECKS + parser = check_library_option_parser() + opts, args = parser.parse_args(args) + if len(args) != 0: + parser.print_help() + return 1 + + if opts.library_path is not None: + dbpath = opts.library_path + + if isbytestring(dbpath): + dbpath = dbpath.decode(preferred_encoding) + + if opts.report is None: + checks = CHECKS + else: + checks = [] + for r in opts.report.split(','): + found = False + for c in CHECKS: + if c[0] == r: + checks.append(c) + found = True + break + if not found: + print _('Unknown report check'), r + return 1 + + def print_one(checker, check): + attr = check[0] + list = getattr(checker, attr, None) + if list is None: + return + if opts.csv: + for i in list: + print check[1] + ',' + i[0] + ',' + i[1] + ',' + '|'.join(i[2]) + else: + print check[1] + for i in list: + print ' %-30.30s - %-30.30s - %s'%(i[0], i[1], ', '.join(i[2])) + + db = LibraryDatabase2(dbpath) + checker = CheckLibrary(dbpath, db) + checker.scan_library() + for check in checks: + print_one(checker, check) + COMMANDS = ('list', 'add', 'remove', 'add_format', 'remove_format', 'show_metadata', 'set_metadata', 'export', 'catalog', 'saved_searches', 'add_custom_column', 'custom_columns', - 'remove_custom_column', 'set_custom', 'restore_database') + 'remove_custom_column', 'set_custom', 'restore_database', + 'check_library') def restore_database_option_parser(): parser = get_parser(_( diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 4de8c3d552..e679780b46 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -850,6 +850,15 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): return set([]) return set([f[0] for f in formats]) + def format_files(self, index, index_is_id=False): + id = index if index_is_id else self.id(index) + try: + formats = self.conn.get('SELECT name,format FROM data WHERE book=?', (id,)) + formats = map(lambda x:(x[0], x[1]), formats) + return formats + except: + return [] + def formats(self, index, index_is_id=False, verify_formats=True): ''' Return available formats as a comma separated list or None if there are no available formats ''' id = index if index_is_id else self.id(index)