mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Speed up evaluation of composite columns
Use a ProxyMetadata object that lazily evaluates its fields on demand, thereby avoiding the overhead of get_metadata() on every composite field evaluation.
This commit is contained in:
parent
f37de3d33c
commit
70f1dbb832
@ -23,7 +23,7 @@ from calibre.db.fields import create_field
|
||||
from calibre.db.search import Search
|
||||
from calibre.db.tables import VirtualTable
|
||||
from calibre.db.write import get_series_values
|
||||
from calibre.db.lazy import FormatMetadata, FormatsList
|
||||
from calibre.db.lazy import FormatMetadata, FormatsList, ProxyMetadata
|
||||
from calibre.ebooks import check_ebook_format
|
||||
from calibre.ebooks.metadata import string_to_authors, author_to_author_sort, get_title_sort_pat
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
@ -338,7 +338,7 @@ class Cache(object):
|
||||
def fast_field_for(self, field_obj, book_id, default_value=None):
|
||||
' Same as field_for, except that it avoids the extra lookup to get the field object '
|
||||
if field_obj.is_composite:
|
||||
return field_obj.get_value_with_cache(book_id, partial(self._get_metadata, get_user_categories=False))
|
||||
return field_obj.get_value_with_cache(book_id, self._get_proxy_metadata)
|
||||
try:
|
||||
return field_obj.for_book(book_id, default_value=default_value)
|
||||
except (KeyError, IndexError):
|
||||
@ -358,8 +358,7 @@ class Cache(object):
|
||||
return default_value
|
||||
|
||||
if mi is None:
|
||||
return f.get_value_with_cache(book_id, partial(self._get_metadata,
|
||||
get_user_categories=False))
|
||||
return f.get_value_with_cache(book_id, self._get_proxy_metadata)
|
||||
else:
|
||||
return f.render_composite(book_id, mi)
|
||||
|
||||
@ -534,6 +533,10 @@ class Cache(object):
|
||||
|
||||
return mi
|
||||
|
||||
@read_api
|
||||
def get_proxy_metadata(self, book_id):
|
||||
return ProxyMetadata(self, book_id)
|
||||
|
||||
@api
|
||||
def cover(self, book_id,
|
||||
as_file=False, as_image=False, as_path=False):
|
||||
@ -781,7 +784,7 @@ class Cache(object):
|
||||
'''
|
||||
all_book_ids = frozenset(self._all_book_ids() if ids_to_sort is None
|
||||
else ids_to_sort)
|
||||
get_metadata = partial(self._get_metadata, get_user_categories=False)
|
||||
get_metadata = self._get_proxy_metadata
|
||||
lang_map = self.fields['languages'].book_value_map
|
||||
|
||||
fm = {'title':'sort', 'authors':'author_sort'}
|
||||
@ -1189,7 +1192,7 @@ class Cache(object):
|
||||
sf = self.fields[field]
|
||||
if series:
|
||||
q = icu_lower(series)
|
||||
for val, book_ids in sf.iter_searchable_values(self._get_metadata, frozenset(self._all_book_ids())):
|
||||
for val, book_ids in sf.iter_searchable_values(self._get_proxy_metadata, frozenset(self._all_book_ids())):
|
||||
if q == icu_lower(val):
|
||||
books = book_ids
|
||||
break
|
||||
@ -1499,7 +1502,7 @@ class Cache(object):
|
||||
f = self.fields[category]
|
||||
if hasattr(f, 'get_books_for_val'):
|
||||
# Composite field
|
||||
return f.get_books_for_val(item_id_or_composite_value, self._get_metadata, self._all_book_ids())
|
||||
return f.get_books_for_val(item_id_or_composite_value, self._get_proxy_metadata, self._all_book_ids())
|
||||
return self._books_for_field(f.name, int(item_id_or_composite_value))
|
||||
|
||||
@read_api
|
||||
|
@ -10,14 +10,19 @@ __docformat__ = 'restructuredtext en'
|
||||
import weakref
|
||||
from functools import wraps
|
||||
from collections import MutableMapping, MutableSequence
|
||||
from copy import deepcopy
|
||||
|
||||
from calibre.ebooks.metadata.book.base import Metadata, SIMPLE_GET, TOP_LEVEL_IDENTIFIERS, NULL_VALUES
|
||||
from calibre.ebooks.metadata.book.formatter import SafeFormat
|
||||
from calibre.utils.date import utcnow
|
||||
|
||||
# Lazy format metadata retrieval {{{
|
||||
'''
|
||||
Avoid doing stats on all files in a book when getting metadata for that book.
|
||||
Speeds up calibre startup with large libraries/libraries on a network share,
|
||||
with a composite custom column.
|
||||
'''
|
||||
|
||||
# Lazy format metadata retrieval {{{
|
||||
def resolved(f):
|
||||
@wraps(f)
|
||||
def wrapper(self, *args, **kwargs):
|
||||
@ -97,3 +102,232 @@ class FormatsList(MutableBase, MutableSequence):
|
||||
|
||||
# }}}
|
||||
|
||||
# Lazy metadata getters {{{
|
||||
ga = object.__getattribute__
|
||||
sa = object.__setattr__
|
||||
|
||||
def simple_getter(field, default_value=None):
|
||||
def func(dbref, book_id, cache):
|
||||
try:
|
||||
return cache[field]
|
||||
except KeyError:
|
||||
db = dbref()
|
||||
cache[field] = ret = db.field_for(field, book_id, default_value=default_value)
|
||||
return ret
|
||||
return func
|
||||
|
||||
def pp_getter(field, postprocess, default_value=None):
|
||||
def func(dbref, book_id, cache):
|
||||
try:
|
||||
return cache[field]
|
||||
except KeyError:
|
||||
db = dbref()
|
||||
cache[field] = ret = postprocess(db.field_for(field, book_id, default_value=default_value))
|
||||
return ret
|
||||
return func
|
||||
|
||||
def adata_getter(field):
|
||||
def func(dbref, book_id, cache):
|
||||
try:
|
||||
author_ids, adata = cache['adata']
|
||||
except KeyError:
|
||||
db = dbref()
|
||||
with db.read_lock:
|
||||
author_ids = db._field_ids_for('authors', book_id)
|
||||
adata = db._author_data(author_ids)
|
||||
cache['adata'] = (author_ids, adata)
|
||||
k = 'sort' if field == 'author_sort_map' else 'link'
|
||||
return {adata[i]['name']:adata[i][k] for i in author_ids}
|
||||
return func
|
||||
|
||||
def dt_getter(field):
|
||||
def func(dbref, book_id, cache):
|
||||
try:
|
||||
return cache[field]
|
||||
except KeyError:
|
||||
db = dbref()
|
||||
cache[field] = ret = db.field_for(field, book_id, default_value=utcnow())
|
||||
return ret
|
||||
return func
|
||||
|
||||
def item_getter(field, default_value=None, key=0):
|
||||
def func(dbref, book_id, cache):
|
||||
try:
|
||||
return cache[field]
|
||||
except KeyError:
|
||||
db = dbref()
|
||||
ret = cache[field] = db.field_for(field, book_id, default_value=default_value)
|
||||
try:
|
||||
return ret[key]
|
||||
except (IndexError, KeyError):
|
||||
return default_value
|
||||
return func
|
||||
|
||||
def fmt_getter(field):
|
||||
def func(dbref, book_id, cache):
|
||||
try:
|
||||
format_metadata = cache['format_metadata']
|
||||
except KeyError:
|
||||
db = dbref()
|
||||
format_metadata = {}
|
||||
for fmt in db.formats(book_id, verify_formats=False):
|
||||
m = db.format_metadata(book_id, fmt)
|
||||
if m:
|
||||
format_metadata[fmt] = m
|
||||
if field == 'formats':
|
||||
return list(format_metadata) or None
|
||||
return format_metadata
|
||||
return func
|
||||
|
||||
def approx_fmts_getter(dbref, book_id, cache):
|
||||
try:
|
||||
return cache['formats']
|
||||
except KeyError:
|
||||
db = dbref()
|
||||
cache['formats'] = ret = list(db.field_for('formats', book_id))
|
||||
return ret
|
||||
|
||||
def series_index_getter(field='series'):
|
||||
def func(dbref, book_id, cache):
|
||||
try:
|
||||
series = getters[field](dbref, book_id, cache)
|
||||
except KeyError:
|
||||
series = custom_getter(field, dbref, book_id, cache)
|
||||
if series:
|
||||
try:
|
||||
return cache[field + '_index']
|
||||
except KeyError:
|
||||
db = dbref()
|
||||
cache[field + '_index'] = ret = db.field_for(field + '_index', book_id, default_value=1.0)
|
||||
return ret
|
||||
return func
|
||||
|
||||
def has_cover_getter(dbref, book_id, cache):
|
||||
try:
|
||||
return cache['has_cover']
|
||||
except KeyError:
|
||||
db = dbref()
|
||||
cache['has_cover'] = ret = _('Yes') if db.field_for('cover', book_id, default_value=False) else ''
|
||||
return ret
|
||||
|
||||
fmt_custom = lambda x:list(x) if isinstance(x, tuple) else x
|
||||
def custom_getter(field, dbref, book_id, cache):
|
||||
try:
|
||||
return cache[field]
|
||||
except KeyError:
|
||||
db = dbref()
|
||||
cache[field] = ret = fmt_custom(db.field_for(field, book_id))
|
||||
return ret
|
||||
|
||||
def composite_getter(mi, field, metadata, book_id, cache, formatter, template_cache):
|
||||
try:
|
||||
return cache[field]
|
||||
except KeyError:
|
||||
ret = cache[field] = formatter.safe_format(
|
||||
metadata['display']['composite_template'],
|
||||
mi,
|
||||
_('TEMPLATE ERROR'),
|
||||
mi, column_name=field,
|
||||
template_cache=template_cache).strip()
|
||||
return ret
|
||||
|
||||
getters = {
|
||||
'title':simple_getter('title', _('Unknown')),
|
||||
'title_sort':simple_getter('sort', _('Unknown')),
|
||||
'authors':pp_getter('authors', list, (_('Unknown'),)),
|
||||
'author_sort':simple_getter('author_sort', _('Unknown')),
|
||||
'uuid':simple_getter('uuid', 'dummy'),
|
||||
'book_size':simple_getter('size', 0),
|
||||
'ondevice_col':simple_getter('ondevice', ''),
|
||||
'languages':pp_getter('languages', list),
|
||||
'language':item_getter('languages', default_value=NULL_VALUES['language']),
|
||||
'db_approx_formats': approx_fmts_getter,
|
||||
'has_cover': has_cover_getter,
|
||||
'tags':pp_getter('tags', list, (_('Unknown'),)),
|
||||
'series_index':series_index_getter(),
|
||||
'application_id':lambda x, book_id, y: book_id,
|
||||
'id':lambda x, book_id, y: book_id,
|
||||
}
|
||||
|
||||
for field in ('comments', 'publisher', 'identifiers', 'series', 'rating'):
|
||||
getters[field] = simple_getter(field)
|
||||
|
||||
for field in ('author_sort_map', 'author_link_map'):
|
||||
getters[field] = adata_getter(field)
|
||||
|
||||
for field in ('timestamp', 'pubdate', 'last_modified'):
|
||||
getters[field] = dt_getter(field)
|
||||
|
||||
for field in TOP_LEVEL_IDENTIFIERS:
|
||||
getters[field] = item_getter('identifiers', key=field)
|
||||
|
||||
for field in ('formats', 'format_metadata'):
|
||||
getters[field] = fmt_getter(field)
|
||||
# }}}
|
||||
|
||||
class ProxyMetadata(Metadata):
|
||||
|
||||
def __init__(self, db, book_id):
|
||||
sa(self, 'template_cache', db.formatter_template_cache)
|
||||
sa(self, 'formatter', SafeFormat())
|
||||
sa(self, '_db', weakref.ref(db))
|
||||
sa(self, '_book_id', book_id)
|
||||
sa(self, '_cache', {'user_categories':{}, 'cover_data':(None,None), 'device_collections':[]})
|
||||
sa(self, '_user_metadata', db.field_metadata)
|
||||
|
||||
def __getattribute__(self, field):
|
||||
getter = getters.get(field, None)
|
||||
if getter is not None:
|
||||
return getter(ga(self, '_db'), ga(self, '_book_id'), ga(self, '_cache'))
|
||||
if field in SIMPLE_GET:
|
||||
return ga(self, '_cache').get(field, None)
|
||||
try:
|
||||
return ga(self, field)
|
||||
except AttributeError:
|
||||
pass
|
||||
um = ga(self, '_user_metadata')
|
||||
d = um.get(field, None)
|
||||
if d is not None:
|
||||
dt = d['datatype']
|
||||
if dt != 'composite':
|
||||
if field.endswith('_index') and dt == 'float':
|
||||
return series_index_getter(field[:-6])(ga(self, '_db'), ga(self, '_book_id'), ga(self, '_cache'))
|
||||
return custom_getter(field, ga(self, '_db'), ga(self, '_book_id'), ga(self, '_cache'))
|
||||
return composite_getter(self, field, d, ga(self, '_book_id'), ga(self, '_cache'), ga(self, 'formatter'), ga(self, 'template_cache'))
|
||||
|
||||
try:
|
||||
return ga(self, '_cache')[field]
|
||||
except KeyError:
|
||||
raise AttributeError('Metadata object has no attribute named: %r' % field)
|
||||
|
||||
def __setattr__(self, field, val, extra=None):
|
||||
cache = ga(self, '_cache')
|
||||
cache[field] = val
|
||||
if extra is not None:
|
||||
cache[field + '_index'] = val
|
||||
|
||||
def get_user_metadata(self, field, make_copy=False):
|
||||
um = ga(self, '_user_metadata')
|
||||
try:
|
||||
ans = um[field]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
if make_copy:
|
||||
ans = deepcopy(ans)
|
||||
return ans
|
||||
|
||||
def get_extra(self, field, default=None):
|
||||
um = ga(self, '_user_metadata')
|
||||
if field + '_index' in um:
|
||||
try:
|
||||
return getattr(self, field + '_index')
|
||||
except AttributeError:
|
||||
return default
|
||||
raise AttributeError(
|
||||
'Metadata object has no attribute named: '+ repr(field))
|
||||
|
||||
def custom_field_keys(self):
|
||||
um = ga(self, '_user_metadata')
|
||||
return iter(um.custom_field_keys())
|
||||
|
||||
|
@ -460,3 +460,29 @@ class ReadingTest(BaseTest):
|
||||
test(True, {3}, 'Unknown', 'unknown')
|
||||
# }}}
|
||||
|
||||
def test_proxy_metadata(self): # {{{
|
||||
' Test the ProxyMetadata object used for composite columns '
|
||||
from calibre.ebooks.metadata.book.base import STANDARD_METADATA_FIELDS
|
||||
cache = self.init_cache()
|
||||
for book_id in cache.all_book_ids():
|
||||
mi = cache.get_metadata(book_id, get_user_categories=False)
|
||||
pmi = cache.get_proxy_metadata(book_id)
|
||||
self.assertSetEqual(set(mi.custom_field_keys()), set(pmi.custom_field_keys()))
|
||||
|
||||
for field in STANDARD_METADATA_FIELDS | {'#series_index'}:
|
||||
f = lambda x: x
|
||||
if field == 'formats':
|
||||
f = lambda x: x if x is None else set(x)
|
||||
self.assertEqual(f(getattr(mi, field)), f(getattr(pmi, field)),
|
||||
'Standard field: %s not the same for book %s' % (field, book_id))
|
||||
self.assertEqual(mi.format_field(field), pmi.format_field(field),
|
||||
'Standard field format: %s not the same for book %s' % (field, book_id))
|
||||
for field, meta in cache.field_metadata.custom_iteritems():
|
||||
if meta['datatype'] != 'composite':
|
||||
self.assertEqual(f(getattr(mi, field)), f(getattr(pmi, field)),
|
||||
'Custom field: %s not the same for book %s' % (field, book_id))
|
||||
self.assertEqual(mi.format_field(field), pmi.format_field(field),
|
||||
'Custom field format: %s not the same for book %s' % (field, book_id))
|
||||
|
||||
# }}}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user