diff --git a/src/calibre/db/backend.py b/src/calibre/db/backend.py index 0ebc9679b7..2c4dfb8395 100644 --- a/src/calibre/db/backend.py +++ b/src/calibre/db/backend.py @@ -8,7 +8,7 @@ __copyright__ = '2011, Kovid Goyal ' __docformat__ = 'restructuredtext en' # Imports {{{ -import os, shutil, uuid, json, glob, time, cPickle +import os, shutil, uuid, json, glob, time, cPickle, hashlib from functools import partial import apsw @@ -17,7 +17,9 @@ from calibre import isbytestring, force_unicode, prints from calibre.constants import (iswindows, filesystem_encoding, preferred_encoding) from calibre.ptempfile import PersistentTemporaryFile +from calibre.db import SPOOL_SIZE from calibre.db.schema_upgrades import SchemaUpgrade +from calibre.db.errors import NoSuchFormat from calibre.library.field_metadata import FieldMetadata from calibre.ebooks.metadata import title_sort, author_to_author_sort from calibre.utils.icu import sort_key @@ -926,6 +928,19 @@ class DB(object): shutil.copyfile(candidates[0], fmt_path) return fmt_path + def format_hash(self, book_id, fmt, fname, path): + path = self.format_abspath(book_id, fmt, fname, path) + if path is None: + raise NoSuchFormat('Record %d has no fmt: %s'%(book_id, fmt)) + sha = hashlib.sha256() + with lopen(path, 'rb') as f: + while True: + raw = f.read(SPOOL_SIZE) + sha.update(raw) + if len(raw) < SPOOL_SIZE: + break + return sha.hexdigest() + def format_metadata(self, book_id, fmt, fname, path): path = self.format_abspath(book_id, fmt, fname, path) ans = {} diff --git a/src/calibre/db/cache.py b/src/calibre/db/cache.py index 119e166c49..c615f62bf7 100644 --- a/src/calibre/db/cache.py +++ b/src/calibre/db/cache.py @@ -408,7 +408,16 @@ class Cache(object): return {aid:af.author_data(aid) for aid in author_ids if aid in af.table.id_map} @read_api - def format_metadata(self, book_id, fmt, allow_cache=True): + def format_hash(self, book_id, fmt): + try: + name = self.fields['formats'].format_fname(book_id, fmt) + path = self._field_for('path', book_id).replace('/', os.sep) + except: + raise NoSuchFormat('Record %d has no fmt: %s'%(book_id, fmt)) + return self.backend.format_hash(book_id, fmt, name, path) + + @api + def format_metadata(self, book_id, fmt, allow_cache=True, update_db=False): if not fmt: return {} fmt = fmt.upper() @@ -416,18 +425,30 @@ class Cache(object): x = self.format_metadata_cache[book_id].get(fmt, None) if x is not None: return x - try: - name = self.fields['formats'].format_fname(book_id, fmt) - path = self._field_for('path', book_id).replace('/', os.sep) - except: - return {} + with self.read_lock: + try: + name = self.fields['formats'].format_fname(book_id, fmt) + path = self._field_for('path', book_id).replace('/', os.sep) + except: + return {} + + ans = {} + if path and name: + ans = self.backend.format_metadata(book_id, fmt, name, path) + self.format_metadata_cache[book_id][fmt] = ans + if update_db and 'size' in ans: + with self.write_lock: + max_size = self.fields['formats'].table.update_fmt(book_id, fmt, name, ans['size'], self.backend) + self.fields['size'].table.update_sizes({book_id: max_size}) - ans = {} - if path and name: - ans = self.backend.format_metadata(book_id, fmt, name, path) - self.format_metadata_cache[book_id][fmt] = ans return ans + @read_api + def format_files(self, book_id): + field = self.fields['formats'] + fmts = field.table.book_col_map.get(book_id, ()) + return {fmt:field.format_fname(book_id, fmt) for fmt in fmts} + @read_api def pref(self, name, default=None): return self.backend.prefs.get(name, default) @@ -524,6 +545,7 @@ class Cache(object): the path is different from the current path (taking case sensitivity into account). ''' + fmt = (fmt or '').upper() try: name = self.fields['formats'].format_fname(book_id, fmt) path = self._field_for('path', book_id).replace('/', os.sep) @@ -544,6 +566,7 @@ class Cache(object): Apart from the viewer, I don't believe any of the others do any file I/O with the results of this call. ''' + fmt = (fmt or '').upper() try: name = self.fields['formats'].format_fname(book_id, fmt) path = self._field_for('path', book_id).replace('/', os.sep) @@ -555,6 +578,7 @@ class Cache(object): @read_api def has_format(self, book_id, fmt): 'Return True iff the format exists on disk' + fmt = (fmt or '').upper() try: name = self.fields['formats'].format_fname(book_id, fmt) path = self._field_for('path', book_id).replace('/', os.sep) @@ -601,6 +625,7 @@ class Cache(object): this means that repeated calls yield the same temp file (which is re-created each time) ''' + fmt = (fmt or '').upper() ext = ('.'+fmt.lower()) if fmt else '' if as_path: if preserve_filename: diff --git a/src/calibre/db/legacy.py b/src/calibre/db/legacy.py index 580fcc2ae6..0ec44e9670 100644 --- a/src/calibre/db/legacy.py +++ b/src/calibre/db/legacy.py @@ -17,6 +17,7 @@ from calibre.db.adding import ( import_book_directory, recursive_import, add_catalog, add_news) from calibre.db.backend import DB from calibre.db.cache import Cache +from calibre.db.errors import NoSuchFormat from calibre.db.categories import CATEGORY_SORTS from calibre.db.view import View from calibre.db.write import clean_identifier @@ -436,6 +437,43 @@ class LibraryDatabase(object): def has_id(self, book_id): return book_id in self.new_api.all_book_ids() + def format(self, index, fmt, index_is_id=False, as_file=False, mode='r+b', as_path=False, preserve_filename=False): + book_id = index if index_is_id else self.id(index) + return self.new_api.format(book_id, fmt, as_file=as_file, as_path=as_path, preserve_filename=preserve_filename) + + def format_abspath(self, index, fmt, index_is_id=False): + book_id = index if index_is_id else self.id(index) + return self.new_api.format_abspath(book_id, fmt) + + def format_path(self, index, fmt, index_is_id=False): + book_id = index if index_is_id else self.id(index) + ans = self.new_api.format_abspath(book_id, fmt) + if ans is None: + raise NoSuchFormat('Record %d has no format: %s'%(book_id, fmt)) + return ans + + def format_files(self, index, index_is_id=False): + book_id = index if index_is_id else self.id(index) + return [(v, k) for k, v in self.new_api.format_files(book_id).iteritems()] + + def format_metadata(self, book_id, fmt, allow_cache=True, update_db=False, commit=False): + return self.new_api.format_metadata(book_id, fmt, allow_cache=allow_cache, update_db=update_db) + + def format_last_modified(self, book_id, fmt): + m = self.format_metadata(book_id, fmt) + if m: + return m['mtime'] + + def formats(self, index, index_is_id=False, verify_formats=True): + book_id = index if index_is_id else self.id(index) + ans = self.new_api.formats(book_id, verify_formats=verify_formats) + if ans: + return ','.join(ans) + + def has_format(self, index, fmt, index_is_id=False): + book_id = index if index_is_id else self.id(index) + return self.new_api.has_format(book_id, fmt) + # Private interface {{{ def __iter__(self): for row in self.data.iterall(): @@ -463,6 +501,7 @@ for prop in ('author_sort', 'authors', 'comment', 'comments', 'publisher', return func setattr(LibraryDatabase, prop, MT(getter(prop))) +LibraryDatabase.format_hash = MT(lambda self, book_id, fmt:self.new_api.format_hash(book_id, fmt)) LibraryDatabase.index = MT(lambda self, book_id, cache=False:self.data.id_to_index(book_id)) LibraryDatabase.has_cover = MT(lambda self, book_id:self.new_api.field_for('cover', book_id)) LibraryDatabase.get_tags = MT(lambda self, book_id:set(self.new_api.field_for('tags', book_id))) diff --git a/src/calibre/db/tests/legacy.py b/src/calibre/db/tests/legacy.py index 8d07e0cff2..1cae34fd04 100644 --- a/src/calibre/db/tests/legacy.py +++ b/src/calibre/db/tests/legacy.py @@ -11,6 +11,7 @@ from io import BytesIO from repr import repr from functools import partial from tempfile import NamedTemporaryFile +from operator import itemgetter from calibre.db.tests.base import BaseTest @@ -159,6 +160,11 @@ class LegacyTest(BaseTest): for meth, args in { 'get_next_series_num_for': [('A Series One',)], + 'format':[(1, 'FMT1', True), (2, 'FMT1', True), (0, 'xxxxxx')], + 'has_format':[(1, 'FMT1', True), (2, 'FMT1', True), (0, 'xxxxxx')], + '@format_files':[(0,),(1,),(2,)], + 'formats':[(0,),(1,),(2,)], + 'format_hash':[(1, 'FMT1'),(1, 'FMT2'), (2, 'FMT1')], 'author_sort_from_authors': [(['Author One', 'Author Two', 'Unknown'],)], 'has_book':[(Metadata('title one'),), (Metadata('xxxx1111'),)], 'has_id':[(1,), (2,), (3,), (9999,)], @@ -330,6 +336,7 @@ class LegacyTest(BaseTest): 'author_id', # replaced by get_author_id 'books_for_author', # broken 'books_in_old_database', # unused + 'migrate_old', # no longer supported # Internal API 'clean_user_categories', 'cleanup_tags', 'books_list_filter', 'conn', 'connect', 'construct_file_name', @@ -337,6 +344,7 @@ class LegacyTest(BaseTest): 'run_import_plugins', 'vacuum', 'set_path', 'row', 'row_factory', 'rows', 'rmtree', 'series_index_pat', 'import_old_database', 'dirtied_lock', 'dirtied_cache', 'dirty_queue_length', 'dirty_books_referencing', 'windows_check_if_files_in_use', 'get_metadata_for_dump', 'get_a_dirtied_book', 'dirtied_sequence', + 'format_filename_cache', 'format_metadata_cache', 'filter', 'create_version1', } SKIP_ARGSPEC = { '__init__', @@ -411,6 +419,9 @@ class LegacyTest(BaseTest): ndb = self.init_legacy(self.cloned_library) db = self.init_old(self.cloned_library) run_funcs(self, db, ndb, ( + ('+format_metadata', 1, 'FMT1', itemgetter('size')), + ('+format_metadata', 1, 'FMT2', itemgetter('size')), + ('+format_metadata', 2, 'FMT1', itemgetter('size')), ('get_tags', 0), ('get_tags', 1), ('get_tags', 2), ('is_tag_used', 'News'), ('is_tag_used', 'xchkjgfh'), ('bulk_modify_tags', (1,), ['t1'], ['News']),