From 12b603473a0bdf9788b4a293d6eb035be9a2bdc1 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Fri, 1 Oct 2010 08:43:25 +0100 Subject: [PATCH 1/7] Add PRC to folder device formats. --- src/calibre/devices/folder_device/driver.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/calibre/devices/folder_device/driver.py b/src/calibre/devices/folder_device/driver.py index 5919d6d2fb..a3d591049d 100644 --- a/src/calibre/devices/folder_device/driver.py +++ b/src/calibre/devices/folder_device/driver.py @@ -16,7 +16,8 @@ class FOLDER_DEVICE_FOR_CONFIG(USBMS): description = _('Use an arbitrary folder as a device.') author = 'John Schember/Charles Haley' supported_platforms = ['windows', 'osx', 'linux'] - FORMATS = ['epub', 'fb2', 'mobi', 'azw', 'lrf', 'tcr', 'pmlz', 'lit', 'rtf', 'rb', 'pdf', 'oeb', 'txt', 'pdb'] + FORMATS = ['epub', 'fb2', 'mobi', 'azw', 'lrf', 'tcr', 'pmlz', 'lit', + 'rtf', 'rb', 'pdf', 'oeb', 'txt', 'pdb', 'prc'] VENDOR_ID = 0xffff PRODUCT_ID = 0xffff BCD = 0xffff From 48dd69ba9cc291de86178d7b9b15e50ceb60dd0a Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Fri, 1 Oct 2010 14:41:26 +0100 Subject: [PATCH 2/7] Check library function --- src/calibre/gui2/dialogs/check_library.py | 89 +++++++++++ src/calibre/gui2/preferences/misc.py | 7 + src/calibre/gui2/preferences/misc.ui | 7 + src/calibre/library/check_library.py | 179 ++++++++++++++++++++++ src/calibre/library/database2.py | 9 ++ 5 files changed, 291 insertions(+) create mode 100644 src/calibre/gui2/dialogs/check_library.py create mode 100644 src/calibre/library/check_library.py diff --git a/src/calibre/gui2/dialogs/check_library.py b/src/calibre/gui2/dialogs/check_library.py new file mode 100644 index 0000000000..8eeeda117d --- /dev/null +++ b/src/calibre/gui2/dialogs/check_library.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python +__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' +__docformat__ = 'restructuredtext en' +__license__ = 'GPL v3' + +from PyQt4.Qt import QDialog, QVBoxLayout, QTreeWidget, QPushButton, \ + QDialogButtonBox, QApplication, QTreeWidgetItem + +from calibre.library.check_library import CheckLibrary + +class Item(QTreeWidgetItem): + pass + +class CheckLibraryDialog(QDialog): + + def __init__(self, parent, db): + QDialog.__init__(self, parent) + self.db = db + + self._layout = QVBoxLayout(self) + self.setLayout(self._layout) + self.log = QTreeWidget(self) + self._layout.addWidget(self.log) + self.setWindowTitle(_('Check Library')) + + self.check = QPushButton(_('Run the check')) + self.check.setDefault(False) + self.check.clicked.connect(self.run_the_check) + self.copy = QPushButton(_('Copy to clipboard')) + self.copy.setDefault(False) + self.copy.clicked.connect(self.copy_to_clipboard) + self.ok = QPushButton('&OK') + self.ok.setDefault(True) + self.ok.clicked.connect(self.accept) + self.bbox = QDialogButtonBox(self) + self.bbox.addButton(self.copy, QDialogButtonBox.ActionRole) + self.bbox.addButton(self.check, QDialogButtonBox.ActionRole) + self.bbox.addButton(self.ok, QDialogButtonBox.AcceptRole) + + self._layout.addWidget(self.bbox) + self.resize(750, 500) + self.bbox.setEnabled(True) + + self.run_the_check() + + def run_the_check(self): + checker = CheckLibrary(self.db.library_path, self.db) + checker.scan_library() + + plaintext = [] + + def builder(tree, checker, check): + attr = check[0] + list = getattr(checker, attr, None) + if list is None: + return + + h = check[1] + tl = Item([h]) + for problem in list: + it = Item() + it.setText(0, problem[0]) + it.setText(1, problem[1]) + p = ', '.join(problem[2]) + it.setText(2, p) + tl.addChild(it) + plaintext.append(','.join([h, problem[0], problem[1], p])) + tree.addTopLevelItem(tl) + + t = self.log + t.clear() + t.setColumnCount(3); + t.setHeaderLabels([_('Name'), _('Path from library'), _('Additional Information')]) + for check in checker.checks: + builder(t, checker, check) + + t.setColumnWidth(0, 200) + t.setColumnWidth(1, 400) + + self.text_results = '\n'.join(plaintext) + + def copy_to_clipboard(self): + QApplication.clipboard().setText(self.text_results) + + +if __name__ == '__main__': + app = QApplication([]) + d = CheckLibraryDialog() + d.exec_() diff --git a/src/calibre/gui2/preferences/misc.py b/src/calibre/gui2/preferences/misc.py index 582d110c6c..c9dc25caff 100644 --- a/src/calibre/gui2/preferences/misc.py +++ b/src/calibre/gui2/preferences/misc.py @@ -7,6 +7,7 @@ __docformat__ = 'restructuredtext en' from PyQt4.Qt import QProgressDialog, QThread, Qt, pyqtSignal +from calibre.gui2.dialogs.check_library import CheckLibraryDialog from calibre.gui2.preferences import ConfigWidgetBase, test_widget from calibre.gui2.preferences.misc_ui import Ui_Form from calibre.gui2 import error_dialog, config, warning_dialog, \ @@ -89,6 +90,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): self.device_detection_button.clicked.connect(self.debug_device_detection) self.compact_button.clicked.connect(self.compact) self.button_all_books_dirty.clicked.connect(self.mark_dirty) + self.button_check_library.clicked.connect(self.check_library) self.button_open_config_dir.clicked.connect(self.open_config_dir) self.button_osx_symlinks.clicked.connect(self.create_symlinks) self.button_osx_symlinks.setVisible(isosx) @@ -100,6 +102,11 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): _('Metadata will be backed up while calibre is running, at the ' 'rate of 30 books per minute.'), show=True) + def check_library(self): + db = self.gui.library_view.model().db + d = CheckLibraryDialog(self.gui.parent(), db) + d.exec_() + def debug_device_detection(self, *args): from calibre.gui2.preferences.device_debug import DebugDevice d = DebugDevice(self) diff --git a/src/calibre/gui2/preferences/misc.ui b/src/calibre/gui2/preferences/misc.ui index adf2a15c16..dd0ca15840 100644 --- a/src/calibre/gui2/preferences/misc.ui +++ b/src/calibre/gui2/preferences/misc.ui @@ -131,6 +131,13 @@ + + + + Check the library folders for potential problems + + + diff --git a/src/calibre/library/check_library.py b/src/calibre/library/check_library.py new file mode 100644 index 0000000000..d746018c85 --- /dev/null +++ b/src/calibre/library/check_library.py @@ -0,0 +1,179 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai + +__license__ = 'GPL v3' +__copyright__ = '2010, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import re, os, traceback + +from calibre import isbytestring +from calibre.constants import filesystem_encoding +from calibre.ebooks import BOOK_EXTENSIONS + +EBOOK_EXTENSIONS = frozenset(BOOK_EXTENSIONS) + +NORMALS = frozenset(['metadata.opf', 'cover.jpg']) + +class CheckLibrary(object): + + checks = [('invalid_titles', _('Invalid titles')), + ('extra_titles', _('Extra titles')), + ('invalid_authors', _('Invalid authors')), + ('extra_authors', _('Extra authors')), + ('missing_formats', _('Missing book formats')), + ('extra_formats', _('Extra book formats')), + ('extra_files', _('Unknown files in book')), + ('failed_folders', _('Folders raising exception')) + ] + + def __init__(self, library_path, db): + if isbytestring(library_path): + library_path = library_path.decode(filesystem_encoding) + self.src_library_path = os.path.abspath(library_path) + self.db = db + + self.all_authors = frozenset([x[1] for x in db.all_authors()]) + self.all_ids = frozenset([id for id in db.all_ids()]) + self.all_dbpaths = frozenset(self.dbpath(id) for id in self.all_ids) + + self.db_id_regexp = re.compile(r'^.* \((\d+)\)$') + self.bad_ext_pat = re.compile(r'[^a-z]+') + + self.dirs = [] + self.book_dirs = [] + + self.potential_authors = {} + self.invalid_authors = [] + self.extra_authors = [] + + self.invalid_titles = [] + self.extra_titles = [] + + self.unknown_book_files = [] + self.missing_formats = [] + self.extra_formats = [] + self.extra_files = [] + + self.failed_folders = [] + self.books = [] + self.conflicting_custom_cols = {} + self.failed_restores = [] + self.mismatched_dirs = [] + self.successes = 0 + self.tb = None + + def dbpath(self, id): + return self.db.path(id, index_is_id=True) + + @property + def errors_occurred(self): + return self.failed_folders or self.mismatched_dirs or \ + self.conflicting_custom_cols or self.failed_restores + + @property + def report(self): + ans = '' + failures = list(self.failed_folders) + [(x['dirpath'], tb) for x, tb in + self.failed_restores] + if failures: + ans += 'Failed to restore the books in the following folders:\n' + for dirpath, tb in failures: + ans += '\t' + dirpath + ' with error:\n' + ans += '\n'.join('\t\t'+x for x in tb.splitlines()) + ans += '\n\n' + + if self.conflicting_custom_cols: + ans += '\n\n' + ans += 'The following custom columns were not fully restored:\n' + for x in self.conflicting_custom_cols: + ans += '\t#'+x+'\n' + + if self.mismatched_dirs: + ans += '\n\n' + ans += 'The following folders were ignored:\n' + for x in self.mismatched_dirs: + ans += '\t'+x+'\n' + + + return ans + + + def scan_library(self): + lib = self.src_library_path + for auth_dir in os.listdir(lib): + auth_path = os.path.join(lib, auth_dir) + # First check: author must be a directory + if not os.path.isdir(auth_path): + self.invalid_authors.append((auth_dir, auth_dir, [])) + continue + + self.potential_authors[auth_dir] = {} + + # Look for titles in the author directories + found_titles = False + for title_dir in os.listdir(auth_path): + title_path = os.path.join(auth_path, title_dir) + db_path = os.path.join(auth_dir, title_dir) + m = self.db_id_regexp.search(title_dir) + # Second check: title must have an ID and must be a directory + if m is None or not os.path.isdir(title_path): + self.invalid_titles.append((auth_dir, db_path, [title_dir])) + continue + + id = m.group(1) + # Third check: the id must be in the DB and the paths must match + if int(id) not in self.all_ids or \ + db_path not in self.all_dbpaths: + self.extra_titles.append((title_dir, db_path, [])) + continue + + # Record the book to check its formats + self.book_dirs.append((db_path, title_dir, id)) + found_titles = True + + # Fourth check: author directories that contain no titles + if not found_titles: + self.extra_authors.append((auth_dir, auth_dir, [])) + + for x in self.book_dirs: + try: + self.process_book(lib, x) + except: + traceback.print_exc() + # Sort-of check: exception processing directory + self.failed_folders.append((title_path, traceback.format_exc(), [])) + + def is_ebook_file(self, filename): + ext = os.path.splitext(filename)[1] + if not ext: + return False + ext = ext[1:].lower() + if ext not in EBOOK_EXTENSIONS or \ + self.bad_ext_pat.search(ext) is not None: + return False + return True + + def process_book(self, lib, book_info): + (db_path, title_dir, book_id) = book_info + filenames = frozenset(os.listdir(os.path.join(lib, db_path))) + book_id = int(book_id) + formats = frozenset(filter(self.is_ebook_file, filenames)) + + unknowns = frozenset(filenames-formats-NORMALS) + # Check: any books that aren't formats or normally there? + if unknowns: + self.extra_files.append((title_dir, db_path, unknowns)) + + book_formats = frozenset([x[0]+'.'+x[1].lower() for x in + self.db.format_files(book_id, index_is_id=True)]) + + # Check: any book formats that should be there? + missing = book_formats - formats + if missing: + self.missing_formats.append((title_dir, db_path, missing)) + + # Check: any book formats that shouldn't be there? + extra = formats - book_formats + if extra: + self.extra_formats.append((title_dir, db_path, extra)) diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 4de8c3d552..e679780b46 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -850,6 +850,15 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): return set([]) return set([f[0] for f in formats]) + def format_files(self, index, index_is_id=False): + id = index if index_is_id else self.id(index) + try: + formats = self.conn.get('SELECT name,format FROM data WHERE book=?', (id,)) + formats = map(lambda x:(x[0], x[1]), formats) + return formats + except: + return [] + def formats(self, index, index_is_id=False, verify_formats=True): ''' Return available formats as a comma separated list or None if there are no available formats ''' id = index if index_is_id else self.id(index) From 81ad156e53ad465be6d3ef9182b153e46be49ba5 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Fri, 1 Oct 2010 14:58:39 +0100 Subject: [PATCH 3/7] Command line form of CheckLibrary --- src/calibre/library/cli.py | 48 +++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/src/calibre/library/cli.py b/src/calibre/library/cli.py index 19bd56bf55..05c24f1ce8 100644 --- a/src/calibre/library/cli.py +++ b/src/calibre/library/cli.py @@ -874,11 +874,57 @@ def command_saved_searches(args, dbpath): return 0 +def check_library_option_parser(): + from calibre.library.custom_columns import CustomColumns + parser = get_parser(_('''\ +%prog check_library [options] + +Perform some checks on the filesystem representing a library. +''').format(', '.join(CustomColumns.CUSTOM_DATA_TYPES))) + + parser.add_option('-c', '--csv', default=False, action='store_true', + help=_('Output in CSV')) + return parser + +def command_check_library(args, dbpath): + from calibre.library.check_library import CheckLibrary + parser = check_library_option_parser() + opts, args = parser.parse_args(args) + if len(args) != 0: + parser.print_help() + return 1 + + if opts.library_path is not None: + dbpath = opts.library_path + + if isbytestring(dbpath): + dbpath = dbpath.decode(preferred_encoding) + + def print_one(checker, check): + attr = check[0] + list = getattr(checker, attr, None) + if list is None: + return + if opts.csv: + for i in list: + print check[1] + ',' + i[0] + ',' + i[1] + ',' + '|'.join(i[2]) + else: + print check[1] + for i in list: + print ' %-30.30s - %-30.30s - %s'%(i[0], i[1], ', '.join(i[2])) + + db = LibraryDatabase2(dbpath) + checker = CheckLibrary(dbpath, db) + checker.scan_library() + for check in checker.checks: + print_one(checker, check) + COMMANDS = ('list', 'add', 'remove', 'add_format', 'remove_format', 'show_metadata', 'set_metadata', 'export', 'catalog', 'saved_searches', 'add_custom_column', 'custom_columns', - 'remove_custom_column', 'set_custom', 'restore_database') + 'remove_custom_column', 'set_custom', 'restore_database', + 'check_library') def restore_database_option_parser(): parser = get_parser(_( From 06bcd520b4a1eb9df89f482c35922a30a5941bf9 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Fri, 1 Oct 2010 15:35:17 +0100 Subject: [PATCH 4/7] Make report types optional in CLI --- src/calibre/gui2/dialogs/check_library.py | 4 +- src/calibre/library/check_library.py | 73 ++++++++--------------- src/calibre/library/cli.py | 29 +++++++-- 3 files changed, 50 insertions(+), 56 deletions(-) diff --git a/src/calibre/gui2/dialogs/check_library.py b/src/calibre/gui2/dialogs/check_library.py index 8eeeda117d..177e12db49 100644 --- a/src/calibre/gui2/dialogs/check_library.py +++ b/src/calibre/gui2/dialogs/check_library.py @@ -6,7 +6,7 @@ __license__ = 'GPL v3' from PyQt4.Qt import QDialog, QVBoxLayout, QTreeWidget, QPushButton, \ QDialogButtonBox, QApplication, QTreeWidgetItem -from calibre.library.check_library import CheckLibrary +from calibre.library.check_library import CheckLibrary, CHECKS class Item(QTreeWidgetItem): pass @@ -71,7 +71,7 @@ class CheckLibraryDialog(QDialog): t.clear() t.setColumnCount(3); t.setHeaderLabels([_('Name'), _('Path from library'), _('Additional Information')]) - for check in checker.checks: + for check in CHECKS: builder(t, checker, check) t.setColumnWidth(0, 200) diff --git a/src/calibre/library/check_library.py b/src/calibre/library/check_library.py index d746018c85..ec45b49f0a 100644 --- a/src/calibre/library/check_library.py +++ b/src/calibre/library/check_library.py @@ -15,17 +15,18 @@ EBOOK_EXTENSIONS = frozenset(BOOK_EXTENSIONS) NORMALS = frozenset(['metadata.opf', 'cover.jpg']) -class CheckLibrary(object): +CHECKS = [('invalid_titles', _('Invalid titles')), + ('extra_titles', _('Extra titles')), + ('invalid_authors', _('Invalid authors')), + ('extra_authors', _('Extra authors')), + ('missing_formats', _('Missing book formats')), + ('extra_formats', _('Extra book formats')), + ('extra_files', _('Unknown files in book')), + ('failed_folders', _('Folders raising exception')) + ] - checks = [('invalid_titles', _('Invalid titles')), - ('extra_titles', _('Extra titles')), - ('invalid_authors', _('Invalid authors')), - ('extra_authors', _('Extra authors')), - ('missing_formats', _('Missing book formats')), - ('extra_formats', _('Extra book formats')), - ('extra_files', _('Unknown files in book')), - ('failed_folders', _('Folders raising exception')) - ] + +class CheckLibrary(object): def __init__(self, library_path, db): if isbytestring(library_path): @@ -35,7 +36,10 @@ class CheckLibrary(object): self.all_authors = frozenset([x[1] for x in db.all_authors()]) self.all_ids = frozenset([id for id in db.all_ids()]) + self.is_case_sensitive = db.is_case_sensitive self.all_dbpaths = frozenset(self.dbpath(id) for id in self.all_ids) + self.all_lc_dbpaths = frozenset(self.dbpath(id).lower() + for id in self.all_ids) self.db_id_regexp = re.compile(r'^.* \((\d+)\)$') self.bad_ext_pat = re.compile(r'[^a-z]+') @@ -55,13 +59,6 @@ class CheckLibrary(object): self.extra_formats = [] self.extra_files = [] - self.failed_folders = [] - self.books = [] - self.conflicting_custom_cols = {} - self.failed_restores = [] - self.mismatched_dirs = [] - self.successes = 0 - self.tb = None def dbpath(self, id): return self.db.path(id, index_is_id=True) @@ -71,34 +68,6 @@ class CheckLibrary(object): return self.failed_folders or self.mismatched_dirs or \ self.conflicting_custom_cols or self.failed_restores - @property - def report(self): - ans = '' - failures = list(self.failed_folders) + [(x['dirpath'], tb) for x, tb in - self.failed_restores] - if failures: - ans += 'Failed to restore the books in the following folders:\n' - for dirpath, tb in failures: - ans += '\t' + dirpath + ' with error:\n' - ans += '\n'.join('\t\t'+x for x in tb.splitlines()) - ans += '\n\n' - - if self.conflicting_custom_cols: - ans += '\n\n' - ans += 'The following custom columns were not fully restored:\n' - for x in self.conflicting_custom_cols: - ans += '\t#'+x+'\n' - - if self.mismatched_dirs: - ans += '\n\n' - ans += 'The following folders were ignored:\n' - for x in self.mismatched_dirs: - ans += '\t'+x+'\n' - - - return ans - - def scan_library(self): lib = self.src_library_path for auth_dir in os.listdir(lib): @@ -123,10 +92,16 @@ class CheckLibrary(object): id = m.group(1) # Third check: the id must be in the DB and the paths must match - if int(id) not in self.all_ids or \ - db_path not in self.all_dbpaths: - self.extra_titles.append((title_dir, db_path, [])) - continue + if self.is_case_sensitive: + if int(id) not in self.all_ids or \ + db_path not in self.all_dbpaths: + self.extra_titles.append((title_dir, db_path, [])) + continue + else: + if int(id) not in self.all_ids or \ + db_path.lower() not in self.all_lc_dbpaths: + self.extra_titles.append((title_dir, db_path, [])) + continue # Record the book to check its formats self.book_dirs.append((db_path, title_dir, id)) diff --git a/src/calibre/library/cli.py b/src/calibre/library/cli.py index 05c24f1ce8..d504e670e0 100644 --- a/src/calibre/library/cli.py +++ b/src/calibre/library/cli.py @@ -875,19 +875,23 @@ def command_saved_searches(args, dbpath): return 0 def check_library_option_parser(): - from calibre.library.custom_columns import CustomColumns + from calibre.library.check_library import CHECKS parser = get_parser(_('''\ %prog check_library [options] -Perform some checks on the filesystem representing a library. -''').format(', '.join(CustomColumns.CUSTOM_DATA_TYPES))) +Perform some checks on the filesystem representing a library. Reports are {0} +''').format(', '.join([c[0] for c in CHECKS]))) parser.add_option('-c', '--csv', default=False, action='store_true', help=_('Output in CSV')) + + parser.add_option('-r', '--report', default=None, dest='report', + help=_("Comma-separated list of reports.\n" + "Default: all")) return parser def command_check_library(args, dbpath): - from calibre.library.check_library import CheckLibrary + from calibre.library.check_library import CheckLibrary, CHECKS parser = check_library_option_parser() opts, args = parser.parse_args(args) if len(args) != 0: @@ -900,6 +904,21 @@ def command_check_library(args, dbpath): if isbytestring(dbpath): dbpath = dbpath.decode(preferred_encoding) + if opts.report is None: + checks = CHECKS + else: + checks = [] + for r in opts.report.split(','): + found = False + for c in CHECKS: + if c[0] == r: + checks.append(c) + found = True + break + if not found: + print _('Unknown report check'), r + return 1 + def print_one(checker, check): attr = check[0] list = getattr(checker, attr, None) @@ -916,7 +935,7 @@ def command_check_library(args, dbpath): db = LibraryDatabase2(dbpath) checker = CheckLibrary(dbpath, db) checker.scan_library() - for check in checker.checks: + for check in checks: print_one(checker, check) From 266552431047307f751b46f46a1ce1c4dbb07ab0 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Fri, 1 Oct 2010 15:57:07 +0100 Subject: [PATCH 5/7] Make case sensitivity work --- src/calibre/library/check_library.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/calibre/library/check_library.py b/src/calibre/library/check_library.py index ec45b49f0a..65cfaaaa39 100644 --- a/src/calibre/library/check_library.py +++ b/src/calibre/library/check_library.py @@ -132,23 +132,35 @@ class CheckLibrary(object): def process_book(self, lib, book_info): (db_path, title_dir, book_id) = book_info filenames = frozenset(os.listdir(os.path.join(lib, db_path))) + filenames_lc = frozenset(f.lower() for f in filenames) book_id = int(book_id) formats = frozenset(filter(self.is_ebook_file, filenames)) + formats_lc = frozenset(f.lower() for f in formats) - unknowns = frozenset(filenames-formats-NORMALS) # Check: any books that aren't formats or normally there? + if self.is_case_sensitive: + unknowns = frozenset(filenames-formats-NORMALS) + else: + unknowns = frozenset(filenames_lc-formats_lc-NORMALS) if unknowns: self.extra_files.append((title_dir, db_path, unknowns)) book_formats = frozenset([x[0]+'.'+x[1].lower() for x in self.db.format_files(book_id, index_is_id=True)]) + book_formats_lc = frozenset(f.lower() for f in book_formats) # Check: any book formats that should be there? - missing = book_formats - formats + if self.is_case_sensitive: + missing = book_formats - formats + else: + missing = book_formats_lc - formats_lc if missing: self.missing_formats.append((title_dir, db_path, missing)) # Check: any book formats that shouldn't be there? - extra = formats - book_formats + if self.is_case_sensitive: + extra = formats - book_formats + else: + extra = formats_lc - book_formats_lc if extra: self.extra_formats.append((title_dir, db_path, extra)) From fc2dcd2020f9bccc0a5ca6f4e8101a5bb5b69903 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Fri, 1 Oct 2010 16:47:57 +0100 Subject: [PATCH 6/7] More case-sensitive work --- src/calibre/library/check_library.py | 63 +++++++++++++++++----------- 1 file changed, 39 insertions(+), 24 deletions(-) diff --git a/src/calibre/library/check_library.py b/src/calibre/library/check_library.py index 65cfaaaa39..6b188afec2 100644 --- a/src/calibre/library/check_library.py +++ b/src/calibre/library/check_library.py @@ -34,12 +34,12 @@ class CheckLibrary(object): self.src_library_path = os.path.abspath(library_path) self.db = db + self.is_case_sensitive = db.is_case_sensitive + self.all_authors = frozenset([x[1] for x in db.all_authors()]) self.all_ids = frozenset([id for id in db.all_ids()]) - self.is_case_sensitive = db.is_case_sensitive self.all_dbpaths = frozenset(self.dbpath(id) for id in self.all_ids) - self.all_lc_dbpaths = frozenset(self.dbpath(id).lower() - for id in self.all_ids) + self.all_lc_dbpaths = frozenset([f.lower() for f in self.all_dbpaths]) self.db_id_regexp = re.compile(r'^.* \((\d+)\)$') self.bad_ext_pat = re.compile(r'[^a-z]+') @@ -132,35 +132,50 @@ class CheckLibrary(object): def process_book(self, lib, book_info): (db_path, title_dir, book_id) = book_info filenames = frozenset(os.listdir(os.path.join(lib, db_path))) - filenames_lc = frozenset(f.lower() for f in filenames) book_id = int(book_id) formats = frozenset(filter(self.is_ebook_file, filenames)) - formats_lc = frozenset(f.lower() for f in formats) - - # Check: any books that aren't formats or normally there? - if self.is_case_sensitive: - unknowns = frozenset(filenames-formats-NORMALS) - else: - unknowns = frozenset(filenames_lc-formats_lc-NORMALS) - if unknowns: - self.extra_files.append((title_dir, db_path, unknowns)) - book_formats = frozenset([x[0]+'.'+x[1].lower() for x in self.db.format_files(book_id, index_is_id=True)]) - book_formats_lc = frozenset(f.lower() for f in book_formats) - # Check: any book formats that should be there? if self.is_case_sensitive: + unknowns = frozenset(filenames-formats-NORMALS) + # Check: any books that aren't formats or normally there? + if unknowns: + self.extra_files.append((title_dir, db_path, unknowns)) + + # Check: any book formats that should be there? missing = book_formats - formats - else: - missing = book_formats_lc - formats_lc - if missing: - self.missing_formats.append((title_dir, db_path, missing)) + if missing: + self.missing_formats.append((title_dir, db_path, missing)) - # Check: any book formats that shouldn't be there? - if self.is_case_sensitive: + # Check: any book formats that shouldn't be there? extra = formats - book_formats + if extra: + self.extra_formats.append((title_dir, db_path, extra)) else: + def lc_map(fnames, fset): + m = {} + for f in fnames: + m[f.lower()] = f + return [m[f] for f in fset] + + filenames_lc = frozenset([f.lower() for f in filenames]) + formats_lc = frozenset([f.lower() for f in formats]) + unknowns = frozenset(filenames_lc-formats_lc-NORMALS) + # Check: any books that aren't formats or normally there? + if unknowns: + self.extra_files.append((title_dir, db_path, + lc_map(filenames, unknowns))) + + book_formats_lc = frozenset([f.lower() for f in book_formats]) + # Check: any book formats that should be there? + missing = book_formats_lc - formats_lc + if missing: + self.missing_formats.append((title_dir, db_path, + lc_map(book_formats, missing))) + + # Check: any book formats that shouldn't be there? extra = formats_lc - book_formats_lc - if extra: - self.extra_formats.append((title_dir, db_path, extra)) + if extra: + self.extra_formats.append((title_dir, db_path, + lc_map(formats, extra))) From a7a37695fa7366e48595336e69b0d96b49d6a846 Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Fri, 1 Oct 2010 16:50:59 +0100 Subject: [PATCH 7/7] Fix typo in message --- src/calibre/library/check_library.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/library/check_library.py b/src/calibre/library/check_library.py index 6b188afec2..f78912544b 100644 --- a/src/calibre/library/check_library.py +++ b/src/calibre/library/check_library.py @@ -21,7 +21,7 @@ CHECKS = [('invalid_titles', _('Invalid titles')), ('extra_authors', _('Extra authors')), ('missing_formats', _('Missing book formats')), ('extra_formats', _('Extra book formats')), - ('extra_files', _('Unknown files in book')), + ('extra_files', _('Unknown files in books')), ('failed_folders', _('Folders raising exception')) ]