Start of new check library function

This commit is contained in:
Kovid Goyal 2010-10-01 10:03:00 -06:00
commit 4e91f3c018
7 changed files with 361 additions and 2 deletions

View File

@ -16,7 +16,8 @@ class FOLDER_DEVICE_FOR_CONFIG(USBMS):
description = _('Use an arbitrary folder as a device.')
author = 'John Schember/Charles Haley'
supported_platforms = ['windows', 'osx', 'linux']
FORMATS = ['epub', 'fb2', 'mobi', 'azw', 'lrf', 'tcr', 'pmlz', 'lit', 'rtf', 'rb', 'pdf', 'oeb', 'txt', 'pdb']
FORMATS = ['epub', 'fb2', 'mobi', 'azw', 'lrf', 'tcr', 'pmlz', 'lit',
'rtf', 'rb', 'pdf', 'oeb', 'txt', 'pdb', 'prc']
VENDOR_ID = 0xffff
PRODUCT_ID = 0xffff
BCD = 0xffff

View File

@ -0,0 +1,89 @@
#!/usr/bin/env python
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__license__ = 'GPL v3'
from PyQt4.Qt import QDialog, QVBoxLayout, QTreeWidget, QPushButton, \
QDialogButtonBox, QApplication, QTreeWidgetItem
from calibre.library.check_library import CheckLibrary, CHECKS
class Item(QTreeWidgetItem):
pass
class CheckLibraryDialog(QDialog):
def __init__(self, parent, db):
QDialog.__init__(self, parent)
self.db = db
self._layout = QVBoxLayout(self)
self.setLayout(self._layout)
self.log = QTreeWidget(self)
self._layout.addWidget(self.log)
self.setWindowTitle(_('Check Library'))
self.check = QPushButton(_('Run the check'))
self.check.setDefault(False)
self.check.clicked.connect(self.run_the_check)
self.copy = QPushButton(_('Copy to clipboard'))
self.copy.setDefault(False)
self.copy.clicked.connect(self.copy_to_clipboard)
self.ok = QPushButton('&OK')
self.ok.setDefault(True)
self.ok.clicked.connect(self.accept)
self.bbox = QDialogButtonBox(self)
self.bbox.addButton(self.copy, QDialogButtonBox.ActionRole)
self.bbox.addButton(self.check, QDialogButtonBox.ActionRole)
self.bbox.addButton(self.ok, QDialogButtonBox.AcceptRole)
self._layout.addWidget(self.bbox)
self.resize(750, 500)
self.bbox.setEnabled(True)
self.run_the_check()
def run_the_check(self):
checker = CheckLibrary(self.db.library_path, self.db)
checker.scan_library()
plaintext = []
def builder(tree, checker, check):
attr = check[0]
list = getattr(checker, attr, None)
if list is None:
return
h = check[1]
tl = Item([h])
for problem in list:
it = Item()
it.setText(0, problem[0])
it.setText(1, problem[1])
p = ', '.join(problem[2])
it.setText(2, p)
tl.addChild(it)
plaintext.append(','.join([h, problem[0], problem[1], p]))
tree.addTopLevelItem(tl)
t = self.log
t.clear()
t.setColumnCount(3);
t.setHeaderLabels([_('Name'), _('Path from library'), _('Additional Information')])
for check in CHECKS:
builder(t, checker, check)
t.setColumnWidth(0, 200)
t.setColumnWidth(1, 400)
self.text_results = '\n'.join(plaintext)
def copy_to_clipboard(self):
QApplication.clipboard().setText(self.text_results)
if __name__ == '__main__':
app = QApplication([])
d = CheckLibraryDialog()
d.exec_()

View File

@ -7,6 +7,7 @@ __docformat__ = 'restructuredtext en'
from PyQt4.Qt import QProgressDialog, QThread, Qt, pyqtSignal
from calibre.gui2.dialogs.check_library import CheckLibraryDialog
from calibre.gui2.preferences import ConfigWidgetBase, test_widget
from calibre.gui2.preferences.misc_ui import Ui_Form
from calibre.gui2 import error_dialog, config, warning_dialog, \
@ -89,6 +90,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
self.device_detection_button.clicked.connect(self.debug_device_detection)
self.compact_button.clicked.connect(self.compact)
self.button_all_books_dirty.clicked.connect(self.mark_dirty)
self.button_check_library.clicked.connect(self.check_library)
self.button_open_config_dir.clicked.connect(self.open_config_dir)
self.button_osx_symlinks.clicked.connect(self.create_symlinks)
self.button_osx_symlinks.setVisible(isosx)
@ -100,6 +102,11 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
_('Metadata will be backed up while calibre is running, at the '
'rate of 30 books per minute.'), show=True)
def check_library(self):
db = self.gui.library_view.model().db
d = CheckLibraryDialog(self.gui.parent(), db)
d.exec_()
def debug_device_detection(self, *args):
from calibre.gui2.preferences.device_debug import DebugDevice
d = DebugDevice(self)

View File

@ -131,6 +131,13 @@
</property>
</widget>
</item>
<item row="11" column="0" colspan="2">
<widget class="QPushButton" name="button_check_library">
<property name="text">
<string>Check the library folders for potential problems</string>
</property>
</widget>
</item>
<item row="20" column="0">
<spacer name="verticalSpacer_9">
<property name="orientation">

View File

@ -0,0 +1,181 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re, os, traceback
from calibre import isbytestring
from calibre.constants import filesystem_encoding
from calibre.ebooks import BOOK_EXTENSIONS
EBOOK_EXTENSIONS = frozenset(BOOK_EXTENSIONS)
NORMALS = frozenset(['metadata.opf', 'cover.jpg'])
CHECKS = [('invalid_titles', _('Invalid titles')),
('extra_titles', _('Extra titles')),
('invalid_authors', _('Invalid authors')),
('extra_authors', _('Extra authors')),
('missing_formats', _('Missing book formats')),
('extra_formats', _('Extra book formats')),
('extra_files', _('Unknown files in books')),
('failed_folders', _('Folders raising exception'))
]
class CheckLibrary(object):
def __init__(self, library_path, db):
if isbytestring(library_path):
library_path = library_path.decode(filesystem_encoding)
self.src_library_path = os.path.abspath(library_path)
self.db = db
self.is_case_sensitive = db.is_case_sensitive
self.all_authors = frozenset([x[1] for x in db.all_authors()])
self.all_ids = frozenset([id for id in db.all_ids()])
self.all_dbpaths = frozenset(self.dbpath(id) for id in self.all_ids)
self.all_lc_dbpaths = frozenset([f.lower() for f in self.all_dbpaths])
self.db_id_regexp = re.compile(r'^.* \((\d+)\)$')
self.bad_ext_pat = re.compile(r'[^a-z]+')
self.dirs = []
self.book_dirs = []
self.potential_authors = {}
self.invalid_authors = []
self.extra_authors = []
self.invalid_titles = []
self.extra_titles = []
self.unknown_book_files = []
self.missing_formats = []
self.extra_formats = []
self.extra_files = []
def dbpath(self, id):
return self.db.path(id, index_is_id=True)
@property
def errors_occurred(self):
return self.failed_folders or self.mismatched_dirs or \
self.conflicting_custom_cols or self.failed_restores
def scan_library(self):
lib = self.src_library_path
for auth_dir in os.listdir(lib):
auth_path = os.path.join(lib, auth_dir)
# First check: author must be a directory
if not os.path.isdir(auth_path):
self.invalid_authors.append((auth_dir, auth_dir, []))
continue
self.potential_authors[auth_dir] = {}
# Look for titles in the author directories
found_titles = False
for title_dir in os.listdir(auth_path):
title_path = os.path.join(auth_path, title_dir)
db_path = os.path.join(auth_dir, title_dir)
m = self.db_id_regexp.search(title_dir)
# Second check: title must have an ID and must be a directory
if m is None or not os.path.isdir(title_path):
self.invalid_titles.append((auth_dir, db_path, [title_dir]))
continue
id = m.group(1)
# Third check: the id must be in the DB and the paths must match
if self.is_case_sensitive:
if int(id) not in self.all_ids or \
db_path not in self.all_dbpaths:
self.extra_titles.append((title_dir, db_path, []))
continue
else:
if int(id) not in self.all_ids or \
db_path.lower() not in self.all_lc_dbpaths:
self.extra_titles.append((title_dir, db_path, []))
continue
# Record the book to check its formats
self.book_dirs.append((db_path, title_dir, id))
found_titles = True
# Fourth check: author directories that contain no titles
if not found_titles:
self.extra_authors.append((auth_dir, auth_dir, []))
for x in self.book_dirs:
try:
self.process_book(lib, x)
except:
traceback.print_exc()
# Sort-of check: exception processing directory
self.failed_folders.append((title_path, traceback.format_exc(), []))
def is_ebook_file(self, filename):
ext = os.path.splitext(filename)[1]
if not ext:
return False
ext = ext[1:].lower()
if ext not in EBOOK_EXTENSIONS or \
self.bad_ext_pat.search(ext) is not None:
return False
return True
def process_book(self, lib, book_info):
(db_path, title_dir, book_id) = book_info
filenames = frozenset(os.listdir(os.path.join(lib, db_path)))
book_id = int(book_id)
formats = frozenset(filter(self.is_ebook_file, filenames))
book_formats = frozenset([x[0]+'.'+x[1].lower() for x in
self.db.format_files(book_id, index_is_id=True)])
if self.is_case_sensitive:
unknowns = frozenset(filenames-formats-NORMALS)
# Check: any books that aren't formats or normally there?
if unknowns:
self.extra_files.append((title_dir, db_path, unknowns))
# Check: any book formats that should be there?
missing = book_formats - formats
if missing:
self.missing_formats.append((title_dir, db_path, missing))
# Check: any book formats that shouldn't be there?
extra = formats - book_formats
if extra:
self.extra_formats.append((title_dir, db_path, extra))
else:
def lc_map(fnames, fset):
m = {}
for f in fnames:
m[f.lower()] = f
return [m[f] for f in fset]
filenames_lc = frozenset([f.lower() for f in filenames])
formats_lc = frozenset([f.lower() for f in formats])
unknowns = frozenset(filenames_lc-formats_lc-NORMALS)
# Check: any books that aren't formats or normally there?
if unknowns:
self.extra_files.append((title_dir, db_path,
lc_map(filenames, unknowns)))
book_formats_lc = frozenset([f.lower() for f in book_formats])
# Check: any book formats that should be there?
missing = book_formats_lc - formats_lc
if missing:
self.missing_formats.append((title_dir, db_path,
lc_map(book_formats, missing)))
# Check: any book formats that shouldn't be there?
extra = formats_lc - book_formats_lc
if extra:
self.extra_formats.append((title_dir, db_path,
lc_map(formats, extra)))

View File

@ -874,11 +874,76 @@ def command_saved_searches(args, dbpath):
return 0
def check_library_option_parser():
from calibre.library.check_library import CHECKS
parser = get_parser(_('''\
%prog check_library [options]
Perform some checks on the filesystem representing a library. Reports are {0}
''').format(', '.join([c[0] for c in CHECKS])))
parser.add_option('-c', '--csv', default=False, action='store_true',
help=_('Output in CSV'))
parser.add_option('-r', '--report', default=None, dest='report',
help=_("Comma-separated list of reports.\n"
"Default: all"))
return parser
def command_check_library(args, dbpath):
from calibre.library.check_library import CheckLibrary, CHECKS
parser = check_library_option_parser()
opts, args = parser.parse_args(args)
if len(args) != 0:
parser.print_help()
return 1
if opts.library_path is not None:
dbpath = opts.library_path
if isbytestring(dbpath):
dbpath = dbpath.decode(preferred_encoding)
if opts.report is None:
checks = CHECKS
else:
checks = []
for r in opts.report.split(','):
found = False
for c in CHECKS:
if c[0] == r:
checks.append(c)
found = True
break
if not found:
print _('Unknown report check'), r
return 1
def print_one(checker, check):
attr = check[0]
list = getattr(checker, attr, None)
if list is None:
return
if opts.csv:
for i in list:
print check[1] + ',' + i[0] + ',' + i[1] + ',' + '|'.join(i[2])
else:
print check[1]
for i in list:
print ' %-30.30s - %-30.30s - %s'%(i[0], i[1], ', '.join(i[2]))
db = LibraryDatabase2(dbpath)
checker = CheckLibrary(dbpath, db)
checker.scan_library()
for check in checks:
print_one(checker, check)
COMMANDS = ('list', 'add', 'remove', 'add_format', 'remove_format',
'show_metadata', 'set_metadata', 'export', 'catalog',
'saved_searches', 'add_custom_column', 'custom_columns',
'remove_custom_column', 'set_custom', 'restore_database')
'remove_custom_column', 'set_custom', 'restore_database',
'check_library')
def restore_database_option_parser():
parser = get_parser(_(

View File

@ -850,6 +850,15 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
return set([])
return set([f[0] for f in formats])
def format_files(self, index, index_is_id=False):
id = index if index_is_id else self.id(index)
try:
formats = self.conn.get('SELECT name,format FROM data WHERE book=?', (id,))
formats = map(lambda x:(x[0], x[1]), formats)
return formats
except:
return []
def formats(self, index, index_is_id=False, verify_formats=True):
''' Return available formats as a comma separated list or None if there are no available formats '''
id = index if index_is_id else self.id(index)