From 90f973e7ac999bd7d143b5152cd9fcbe2e9c1f3a Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Thu, 6 Jan 2011 13:30:40 +0000 Subject: [PATCH 01/12] Improve performance of get_metadata. Up to 4 times faster when connecting to a device --- src/calibre/ebooks/metadata/book/base.py | 14 +++-- src/calibre/gui2/device.py | 20 +++--- src/calibre/library/custom_columns.py | 4 +- src/calibre/library/database2.py | 78 +++++++++++++----------- src/calibre/library/field_metadata.py | 15 +++++ 5 files changed, 79 insertions(+), 52 deletions(-) diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py index 17f2c6705c..799bdef8e6 100644 --- a/src/calibre/ebooks/metadata/book/base.py +++ b/src/calibre/ebooks/metadata/book/base.py @@ -324,14 +324,16 @@ class Metadata(object): if metadata is None: traceback.print_stack() return - metadata = copy.deepcopy(metadata) - if '#value#' not in metadata: - if metadata['datatype'] == 'text' and metadata['is_multiple']: - metadata['#value#'] = [] + m = {} + for k in metadata: + m[k] = copy.copy(metadata[k]) + if '#value#' not in m: + if m['datatype'] == 'text' and m['is_multiple']: + m['#value#'] = [] else: - metadata['#value#'] = None + m['#value#'] = None _data = object.__getattribute__(self, '_data') - _data['user_metadata'][field] = metadata + _data['user_metadata'][field] = m def template_to_attribute(self, other, ops): ''' diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py index 6d289a3e5c..944ce03305 100644 --- a/src/calibre/gui2/device.py +++ b/src/calibre/gui2/device.py @@ -637,7 +637,7 @@ class DeviceMixin(object): # {{{ self.device_manager.mount_device(kls=FOLDER_DEVICE, kind='folder', path=dir) def connect_to_bambook(self): - self.device_manager.mount_device(kls=BAMBOOKWifi, kind='bambook', + self.device_manager.mount_device(kls=BAMBOOKWifi, kind='bambook', path=BAMBOOK.settings().extra_customization) def connect_to_itunes(self): @@ -1266,8 +1266,8 @@ class DeviceMixin(object): # {{{ # Force a reset if the caches are not initialized if reset or not hasattr(self, 'db_book_title_cache'): # Build a cache (map) of the library, so the search isn't On**2 - self.db_book_title_cache = {} - self.db_book_uuid_cache = {} + db_book_title_cache = {} + db_book_uuid_cache = {} # It might be possible to get here without having initialized the # library view. In this case, simply give up try: @@ -1278,8 +1278,8 @@ class DeviceMixin(object): # {{{ for id in db.data.iterallids(): mi = db.get_metadata(id, index_is_id=True) title = clean_string(mi.title) - if title not in self.db_book_title_cache: - self.db_book_title_cache[title] = \ + if title not in db_book_title_cache: + db_book_title_cache[title] = \ {'authors':{}, 'author_sort':{}, 'db_ids':{}} # If there are multiple books in the library with the same title # and author, then remember the last one. That is OK, because as @@ -1287,12 +1287,14 @@ class DeviceMixin(object): # {{{ # as another. if mi.authors: authors = clean_string(authors_to_string(mi.authors)) - self.db_book_title_cache[title]['authors'][authors] = mi + db_book_title_cache[title]['authors'][authors] = mi if mi.author_sort: aus = clean_string(mi.author_sort) - self.db_book_title_cache[title]['author_sort'][aus] = mi - self.db_book_title_cache[title]['db_ids'][mi.application_id] = mi - self.db_book_uuid_cache[mi.uuid] = mi + db_book_title_cache[title]['author_sort'][aus] = mi + db_book_title_cache[title]['db_ids'][mi.application_id] = mi + db_book_uuid_cache[mi.uuid] = mi + self.db_book_title_cache = db_book_title_cache + self.db_book_uuid_cache = db_book_uuid_cache # Now iterate through all the books on the device, setting the # in_library field. If the UUID matches a book in the library, then diff --git a/src/calibre/library/custom_columns.py b/src/calibre/library/custom_columns.py index ba218c3ecc..d925f7c91d 100644 --- a/src/calibre/library/custom_columns.py +++ b/src/calibre/library/custom_columns.py @@ -195,8 +195,8 @@ class CustomColumns(object): data = self.custom_column_num_map[num] row = self.data._data[idx] if index_is_id else self.data[idx] ans = row[self.FIELD_MAP[data['num']]] - if data['is_multiple'] and data['datatype'] == 'text': - ans = ans.split('|') if ans else [] + if ans and data['is_multiple'] and data['datatype'] == 'text': + ans = ans.split('|') if data['display'].get('sort_alpha', False): ans.sort(cmp=lambda x,y:cmp(x.lower(), y.lower())) return ans diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 611aa1cc89..96f21b88ee 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -256,7 +256,12 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): 'pubdate', 'flags', 'uuid', - 'has_cover' + 'has_cover', + '''(SELECT group_concat(authors.name || ':::' || authors.sort, ':#:') + FROM authors, books_authors_link as bl + WHERE bl.book=books.id and authors.id=bl.author + ORDER BY bl.id) au_map''', + '(SELECT group_concat(format) FROM data WHERE book=books.id) formats' ] lines = [] for col in columns: @@ -275,7 +280,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): 'size':4, 'rating':5, 'tags':6, 'comments':7, 'series':8, 'publisher':9, 'series_index':10, 'sort':11, 'author_sort':12, 'formats':13, 'isbn':14, 'path':15, - 'lccn':16, 'pubdate':17, 'flags':18, 'uuid':19, 'cover':20} + 'lccn':16, 'pubdate':17, 'flags':18, 'uuid':19, 'cover':20, + 'au_map':21, 'formats':22} for k,v in self.FIELD_MAP.iteritems(): self.field_metadata.set_field_record_index(k, v, prefer_custom=False) @@ -687,9 +693,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): Convenience method to return metadata as a :class:`Metadata` object. Note that the list of formats is not verified. ''' + row = self.data._data[idx] if index_is_id else self.data[idx] + fm = self.FIELD_MAP + self.gm_count += 1 - mi = self.data.get(idx, self.FIELD_MAP['all_metadata'], - row_is_id = index_is_id) + mi = row[self.FIELD_MAP['all_metadata']] if mi is not None: if get_cover: # Always get the cover, because the value can be wrong if the @@ -699,49 +707,47 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): self.gm_missed += 1 mi = Metadata(None) - self.data.set(idx, self.FIELD_MAP['all_metadata'], mi, - row_is_id = index_is_id) + self.data.set(idx, fm['all_metadata'], mi, row_is_id = index_is_id) - aut_list = self.authors_with_sort_strings(idx, index_is_id=index_is_id) + aut_list = row[fm['au_map']] + aut_list = [p.split(':::') for p in aut_list.split(':#:')] aum = [] aus = {} for (author, author_sort) in aut_list: aum.append(author) aus[author] = author_sort - mi.title = self.title(idx, index_is_id=index_is_id) + mi.title = row[fm['title']] mi.authors = aum - mi.author_sort = self.author_sort(idx, index_is_id=index_is_id) + mi.author_sort = row[fm['author_sort']] mi.author_sort_map = aus - mi.comments = self.comments(idx, index_is_id=index_is_id) - mi.publisher = self.publisher(idx, index_is_id=index_is_id) - mi.timestamp = self.timestamp(idx, index_is_id=index_is_id) - mi.pubdate = self.pubdate(idx, index_is_id=index_is_id) - mi.uuid = self.uuid(idx, index_is_id=index_is_id) - mi.title_sort = self.title_sort(idx, index_is_id=index_is_id) - mi.formats = self.formats(idx, index_is_id=index_is_id, - verify_formats=False) - if hasattr(mi.formats, 'split'): - mi.formats = mi.formats.split(',') + mi.comments = row[fm['comments']] + mi.publisher = row[fm['publisher']] + mi.timestamp = row[fm['timestamp']] + mi.pubdate = row[fm['pubdate']] + mi.uuid = row[fm['uuid']] + mi.title_sort = row[fm['sort']] + formats = row[fm['formats']] + if hasattr(formats, 'split'): + mi.formats = formats.split(',') else: mi.formats = None - tags = self.tags(idx, index_is_id=index_is_id) + tags = row[fm['tags']] if tags: mi.tags = [i.strip() for i in tags.split(',')] - mi.series = self.series(idx, index_is_id=index_is_id) + mi.series = row[fm['series']] if mi.series: - mi.series_index = self.series_index(idx, index_is_id=index_is_id) - mi.rating = self.rating(idx, index_is_id=index_is_id) - mi.isbn = self.isbn(idx, index_is_id=index_is_id) + mi.series_index = row[fm['series_index']] + mi.rating = row[fm['rating']] + mi.isbn = row[fm['isbn']] id = idx if index_is_id else self.id(idx) mi.application_id = id mi.id = id - for key,meta in self.field_metadata.iteritems(): - if meta['is_custom']: - mi.set_user_metadata(key, meta) - mi.set(key, val=self.get_custom(idx, label=meta['label'], - index_is_id=index_is_id), - extra=self.get_custom_extra(idx, label=meta['label'], - index_is_id=index_is_id)) + for key,meta in self.field_metadata.custom_iteritems(): + mi.set_user_metadata(key, meta) + mi.set(key, val=self.get_custom(idx, label=meta['label'], + index_is_id=index_is_id), + extra=self.get_custom_extra(idx, label=meta['label'], + index_is_id=index_is_id)) if get_cover: mi.cover = self.cover(id, index_is_id=True, as_path=True) return mi @@ -878,10 +884,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): def formats(self, index, index_is_id=False, verify_formats=True): ''' Return available formats as a comma separated list or None if there are no available formats ''' id = index if index_is_id else self.id(index) - try: - formats = self.conn.get('SELECT format FROM data WHERE book=?', (id,)) - formats = map(lambda x:x[0], formats) - except: + formats = self.data.get(id, self.FIELD_MAP['formats'], row_is_id = True) + if not formats: return None if not verify_formats: return ','.join(formats) @@ -1607,6 +1611,10 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): ','.join([a.replace(',', '|') for a in authors]), row_is_id=True) self.data.set(id, self.FIELD_MAP['author_sort'], ss, row_is_id=True) + aum = self.authors_with_sort_strings(id, index_is_id=True) + self.data.set(id, self.FIELD_MAP['au_map'], + ':#:'.join([':::'.join((au.replace(',', '|'), aus)) for (au, aus) in aum]), + row_is_id=True) def set_authors(self, id, authors, notify=True, commit=True): ''' diff --git a/src/calibre/library/field_metadata.py b/src/calibre/library/field_metadata.py index 1be6604d5d..676eb13d2b 100644 --- a/src/calibre/library/field_metadata.py +++ b/src/calibre/library/field_metadata.py @@ -180,6 +180,15 @@ class FieldMetadata(dict): 'search_terms':['author_sort'], 'is_custom':False, 'is_category':False}), + ('au_map', {'table':None, + 'column':None, + 'datatype':'text', + 'is_multiple':',', + 'kind':'field', + 'name':None, + 'search_terms':[], + 'is_custom':False, + 'is_category':False}), ('comments', {'table':None, 'column':None, 'datatype':'text', @@ -400,6 +409,12 @@ class FieldMetadata(dict): for key in self._tb_cats: yield (key, self._tb_cats[key]) + def custom_iteritems(self): + for key in self._tb_cats: + fm = self._tb_cats[key] + if fm['is_custom']: + yield (key, self._tb_cats[key]) + def items(self): return list(self.iteritems()) From ae759ebd109d1d4040a54ee676bd203070bc8f1f Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Thu, 6 Jan 2011 19:35:04 +0000 Subject: [PATCH 02/12] Fix ticket #8214 (Detect 'senseless' user input) --- src/calibre/gui2/preferences/save_template.py | 8 ++++++-- src/calibre/utils/formatter.py | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/calibre/gui2/preferences/save_template.py b/src/calibre/gui2/preferences/save_template.py index a7f57536d5..4c00a14c0f 100644 --- a/src/calibre/gui2/preferences/save_template.py +++ b/src/calibre/gui2/preferences/save_template.py @@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en' from PyQt4.Qt import QWidget, pyqtSignal -from calibre.gui2 import error_dialog +from calibre.gui2 import error_dialog, question_dialog from calibre.gui2.preferences.save_template_ui import Ui_Form from calibre.library.save_to_disk import FORMAT_ARG_DESCS, preprocess_template from calibre.utils.formatter import validation_formatter @@ -52,7 +52,11 @@ class SaveTemplate(QWidget, Ui_Form): ''' tmpl = preprocess_template(self.opt_template.text()) try: - validation_formatter.validate(tmpl) + t = validation_formatter.validate(tmpl) + if t.find(validation_formatter._validation_string) < 0: + return question_dialog(self, _('Constant template'), + _('The template contains no {fields}, so all ' + 'books will have the same name. Is this OK?')) except Exception, err: error_dialog(self, _('Invalid template'), '
'+_('The template %s is invalid:')%tmpl + \
diff --git a/src/calibre/utils/formatter.py b/src/calibre/utils/formatter.py
index f4e687b419..23763a25bf 100644
--- a/src/calibre/utils/formatter.py
+++ b/src/calibre/utils/formatter.py
@@ -453,7 +453,7 @@ class TemplateFormatter(string.Formatter):
class ValidateFormatter(TemplateFormatter):
'''
- Provides a format function that substitutes '' for any missing value
+ Provides a formatter that substitutes the validation string for every value
'''
def get_value(self, key, args, kwargs):
return self._validation_string
From 2cdbc5ea1195038fc55ab1f3fd1c10fba7a5545d Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Thu, 6 Jan 2011 21:58:07 +0000
Subject: [PATCH 03/12] Use sort_concat instead of group_concat for the au_map
meta2 field
---
src/calibre/library/database2.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index 96f21b88ee..0f3a1a72fa 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -257,7 +257,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
'flags',
'uuid',
'has_cover',
- '''(SELECT group_concat(authors.name || ':::' || authors.sort, ':#:')
+ '''(SELECT sortconcat(bl.id, authors.name || ':::' || REPLACE(authors.sort, ',','|'))
FROM authors, books_authors_link as bl
WHERE bl.book=books.id and authors.id=bl.author
ORDER BY bl.id) au_map''',
@@ -710,12 +710,12 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
self.data.set(idx, fm['all_metadata'], mi, row_is_id = index_is_id)
aut_list = row[fm['au_map']]
- aut_list = [p.split(':::') for p in aut_list.split(':#:')]
+ aut_list = [p.split(':::') for p in aut_list.split(',')]
aum = []
aus = {}
for (author, author_sort) in aut_list:
aum.append(author)
- aus[author] = author_sort
+ aus[author] = author_sort.replace('|', ',')
mi.title = row[fm['title']]
mi.authors = aum
mi.author_sort = row[fm['author_sort']]
From cb55c9a4f9fc0bc1cbe71afb2082a657100730d0 Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Fri, 7 Jan 2011 08:12:05 +0000
Subject: [PATCH 04/12] Add another motorola droid...
---
src/calibre/devices/android/driver.py | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py
index c2db8ddd77..b7e2f0fd2e 100644
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@@ -27,8 +27,9 @@ class ANDROID(USBMS):
0x040d : { 0x8510 : [0x0001], 0x0851 : [0x1] },
# Motorola
- 0x22b8 : { 0x41d9 : [0x216], 0x2d61: [0x100], 0x2d67 : [0x100],
- 0x41db : [0x216], 0x4285 : [0x216], 0x42a3 : [0x216] },
+ 0x22b8 : { 0x41d9 : [0x216], 0x2d61 : [0x100], 0x2d67 : [0x100],
+ 0x41db : [0x216], 0x4285 : [0x216], 0x42a3 : [0x216],
+ 0x4286 : [0x216] },
# Sony Ericsson
0xfce : { 0xd12e : [0x0100]},
From 9e82bd6f238145788ec8a58288f1f9dfafb9365e Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Fri, 7 Jan 2011 08:25:21 +0000
Subject: [PATCH 05/12] Fix problem with formatter intepreting a missing format
letter as ERROR instead of 's'.
---
src/calibre/utils/formatter.py | 2 --
1 file changed, 2 deletions(-)
diff --git a/src/calibre/utils/formatter.py b/src/calibre/utils/formatter.py
index 23763a25bf..46b52b9ce5 100644
--- a/src/calibre/utils/formatter.py
+++ b/src/calibre/utils/formatter.py
@@ -316,8 +316,6 @@ class TemplateFormatter(string.Formatter):
except:
raise ValueError(
_('format: type {0} requires a decimal (float) value, got {1}').format(typ, val))
- else:
- raise ValueError(_('format: unknown format type letter {0}').format(typ))
return unicode(('{0:'+fmt+'}').format(val))
def _explode_format_string(self, fmt):
From 21392dc27caa3f450047b4aaa127ba5ca9448cde Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Fri, 7 Jan 2011 10:45:06 +0000
Subject: [PATCH 06/12] 1) add a special aggragate class to build the au_map.
2) remove the extra format column (should never have been added)
---
src/calibre/library/database2.py | 15 +++++----------
src/calibre/library/sqlite.py | 18 ++++++++++++++++++
2 files changed, 23 insertions(+), 10 deletions(-)
diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index 0f3a1a72fa..8fef5d36bc 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -257,11 +257,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
'flags',
'uuid',
'has_cover',
- '''(SELECT sortconcat(bl.id, authors.name || ':::' || REPLACE(authors.sort, ',','|'))
- FROM authors, books_authors_link as bl
- WHERE bl.book=books.id and authors.id=bl.author
- ORDER BY bl.id) au_map''',
- '(SELECT group_concat(format) FROM data WHERE book=books.id) formats'
+ ('au_map', 'authors', 'author', 'aum_sortconcat(link.id, authors.name, authors.sort)')
]
lines = []
for col in columns:
@@ -278,10 +274,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
self.FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'timestamp':3,
'size':4, 'rating':5, 'tags':6, 'comments':7, 'series':8,
- 'publisher':9, 'series_index':10,
- 'sort':11, 'author_sort':12, 'formats':13, 'isbn':14, 'path':15,
- 'lccn':16, 'pubdate':17, 'flags':18, 'uuid':19, 'cover':20,
- 'au_map':21, 'formats':22}
+ 'publisher':9, 'series_index':10, 'sort':11, 'author_sort':12,
+ 'formats':13, 'isbn':14, 'path':15, 'lccn':16, 'pubdate':17,
+ 'flags':18, 'uuid':19, 'cover':20, 'au_map':21}
for k,v in self.FIELD_MAP.iteritems():
self.field_metadata.set_field_record_index(k, v, prefer_custom=False)
@@ -710,7 +705,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
self.data.set(idx, fm['all_metadata'], mi, row_is_id = index_is_id)
aut_list = row[fm['au_map']]
- aut_list = [p.split(':::') for p in aut_list.split(',')]
+ aut_list = [p.split(':::') for p in aut_list.split(':#:')]
aum = []
aus = {}
for (author, author_sort) in aut_list:
diff --git a/src/calibre/library/sqlite.py b/src/calibre/library/sqlite.py
index 0458ada27b..75856dd0f6 100644
--- a/src/calibre/library/sqlite.py
+++ b/src/calibre/library/sqlite.py
@@ -87,6 +87,23 @@ class SortedConcatenate(object):
class SafeSortedConcatenate(SortedConcatenate):
sep = '|'
+class AumSortedConcatenate(object):
+ '''String concatenation aggregator for the author sort map'''
+ def __init__(self):
+ self.ans = {}
+
+ def step(self, ndx, author, sort):
+ if author is not None:
+ self.ans[ndx] = author + ':::' + sort
+
+ def finalize(self):
+ keys = self.ans.keys()
+ if len(keys) == 0:
+ return None
+ if len(keys) == 1:
+ return self.ans[keys[0]]
+ return ':#:'.join([self.ans[v] for v in sorted(keys)])
+
class Connection(sqlite.Connection):
def get(self, *args, **kw):
@@ -155,6 +172,7 @@ class DBThread(Thread):
c_ext_loaded = load_c_extensions(self.conn)
self.conn.row_factory = sqlite.Row if self.row_factory else lambda cursor, row : list(row)
self.conn.create_aggregate('concat', 1, Concatenate)
+ self.conn.create_aggregate('aum_sortconcat', 3, AumSortedConcatenate)
if not c_ext_loaded:
self.conn.create_aggregate('sortconcat', 2, SortedConcatenate)
self.conn.create_aggregate('sort_concat', 2, SafeSortedConcatenate)
From e1a5bef8967c18c773b506b9c29a9a8849cb8b37 Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Fri, 7 Jan 2011 12:41:48 +0000
Subject: [PATCH 07/12] Fix scrolling to column zero when clicking on a column
to sort.
---
src/calibre/gui2/library/views.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/calibre/gui2/library/views.py b/src/calibre/gui2/library/views.py
index c1dd5b3766..e1e9cf4456 100644
--- a/src/calibre/gui2/library/views.py
+++ b/src/calibre/gui2/library/views.py
@@ -612,7 +612,7 @@ class BooksView(QTableView): # {{{
if row > -1:
h = self.horizontalHeader()
for i in range(h.count()):
- if not h.isSectionHidden(i):
+ if not h.isSectionHidden(i) and h.sectionViewportPosition(i) >= 0:
self.scrollTo(self.model().index(row, i))
break
From 910a3f1accbb8692a8dbbf75614a5d826d515493 Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Fri, 7 Jan 2011 13:22:58 +0000
Subject: [PATCH 08/12] Add the list_item function to the template processor,
to avoid using complicated regexp to carve them out. Add it to the
documentation.
---
src/calibre/manual/template_lang.rst | 1 +
src/calibre/utils/formatter.py | 15 +++++++++++++--
2 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/src/calibre/manual/template_lang.rst b/src/calibre/manual/template_lang.rst
index 1bf08c11f9..f64a413d3e 100644
--- a/src/calibre/manual/template_lang.rst
+++ b/src/calibre/manual/template_lang.rst
@@ -121,6 +121,7 @@ The functions available are:
* ``contains(pattern, text if match, text if not match`` -- checks if field contains matches for the regular expression `pattern`. Returns `text if match` if matches are found, otherwise it returns `text if no match`.
* ``count(separator)`` -- interprets the value as a list of items separated by `separator`, returning the number of items in the list. Most lists use a comma as the separator, but authors uses an ampersand. Examples: `{tags:count(,)}`, `{authors:count(&)}`
* ``ifempty(text)`` -- if the field is not empty, return the value of the field. Otherwise return `text`.
+ * ``list_item(index, separator)`` -- interpret the value as a list of items separated by `separator`, returning the `index`th item. The first item is number zero. The last item can be returned using `list_item(-1,separator)`. If the item is not in the list, then the empty value is returned. The separator has the same meaning as in the `count` function.
* ``lookup(pattern, field, pattern, field, ..., else_field)`` -- like switch, except the arguments are field (metadata) names, not text. The value of the appropriate field will be fetched and used. Note that because composite columns are fields, you can use this function in one composite field to use the value of some other composite field. This is extremely useful when constructing variable save paths (more later).
* ``re(pattern, replacement)`` -- return the field after applying the regular expression. All instances of `pattern` are replaced with `replacement`. As in all of |app|, these are python-compatible regular expressions.
* ``shorten(left chars, middle text, right chars)`` -- Return a shortened version of the field, consisting of `left chars` characters from the beginning of the field, followed by `middle text`, followed by `right chars` characters from the end of the string. `Left chars` and `right chars` must be integers. For example, assume the title of the book is `Ancient English Laws in the Times of Ivanhoe`, and you want it to fit in a space of at most 15 characters. If you use ``{title:shorten(9,-,5)}``, the result will be `Ancient E-nhoe`. If the field's length is less than ``left chars`` + ``right chars`` + the length of ``middle text``, then the field will be used intact. For example, the title `The Dome` would not be changed.
diff --git a/src/calibre/utils/formatter.py b/src/calibre/utils/formatter.py
index 46b52b9ce5..2e4f843c3d 100644
--- a/src/calibre/utils/formatter.py
+++ b/src/calibre/utils/formatter.py
@@ -281,19 +281,30 @@ class TemplateFormatter(string.Formatter):
def _count(self, val, sep):
return unicode(len(val.split(sep)))
+ def _list_item(self, val, index, sep):
+ if not val:
+ return ''
+ index = int(index)
+ val = val.split(sep)
+ try:
+ return val[index]
+ except:
+ return ''
+
functions = {
'uppercase' : (0, lambda s,x: x.upper()),
'lowercase' : (0, lambda s,x: x.lower()),
'titlecase' : (0, lambda s,x: titlecase(x)),
'capitalize' : (0, lambda s,x: capitalize(x)),
'contains' : (3, _contains),
+ 'count' : (1, _count),
'ifempty' : (1, _ifempty),
+ 'list_item' : (2, _list_item),
'lookup' : (-1, _lookup),
're' : (2, _re),
'shorten' : (3, _shorten),
'switch' : (-1, _switch),
- 'test' : (2, _test),
- 'count' : (1, _count),
+ 'test' : (2, _test)
}
def _do_format(self, val, fmt):
From b0a3912867f44e68db7c00d3bb2cb8149c4f1884 Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Fri, 7 Jan 2011 15:25:56 +0000
Subject: [PATCH 09/12] Optimized search, doing and/or/not shortcutting
---
src/calibre/library/caches.py | 23 ++++---
src/calibre/utils/search_query_parser.py | 78 ++++++++++++++++++------
2 files changed, 72 insertions(+), 29 deletions(-)
diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py
index 980c9f1fa9..d56111b30a 100644
--- a/src/calibre/library/caches.py
+++ b/src/calibre/library/caches.py
@@ -181,7 +181,7 @@ class ResultCache(SearchQueryParser): # {{{
self.search_restriction = ''
self.field_metadata = field_metadata
self.all_search_locations = field_metadata.get_search_terms()
- SearchQueryParser.__init__(self, self.all_search_locations)
+ SearchQueryParser.__init__(self, self.all_search_locations, optimize=True)
self.build_date_relop_dict()
self.build_numeric_relop_dict()
@@ -264,7 +264,7 @@ class ResultCache(SearchQueryParser): # {{{
'<=':[2, relop_le]
}
- def get_dates_matches(self, location, query):
+ def get_dates_matches(self, location, query, candidates):
matches = set([])
if len(query) < 2:
return matches
@@ -274,13 +274,13 @@ class ResultCache(SearchQueryParser): # {{{
loc = self.field_metadata[location]['rec_index']
if query == 'false':
- for item in self._data:
+ for item in [self._data[id] for id in candidates]:
if item is None: continue
if item[loc] is None or item[loc] <= UNDEFINED_DATE:
matches.add(item[0])
return matches
if query == 'true':
- for item in self._data:
+ for item in [self._data[id] for id in candidates]:
if item is None: continue
if item[loc] is not None and item[loc] > UNDEFINED_DATE:
matches.add(item[0])
@@ -319,7 +319,7 @@ class ResultCache(SearchQueryParser): # {{{
field_count = query.count('-') + 1
else:
field_count = query.count('/') + 1
- for item in self._data:
+ for item in [self._data[id] for id in candidates]:
if item is None or item[loc] is None: continue
if relop(item[loc], qd, field_count):
matches.add(item[0])
@@ -335,7 +335,7 @@ class ResultCache(SearchQueryParser): # {{{
'<=':[2, lambda r, q: r <= q]
}
- def get_numeric_matches(self, location, query, val_func = None):
+ def get_numeric_matches(self, location, query, candidates, val_func = None):
matches = set([])
if len(query) == 0:
return matches
@@ -381,7 +381,7 @@ class ResultCache(SearchQueryParser): # {{{
except:
return matches
- for item in self._data:
+ for item in [self._data[id] for id in candidates]:
if item is None:
continue
v = val_func(item)
@@ -393,8 +393,13 @@ class ResultCache(SearchQueryParser): # {{{
matches.add(item[0])
return matches
- def get_matches(self, location, query, allow_recursion=True):
+ def get_matches(self, location, query, allow_recursion=True, candidates=None):
matches = set([])
+ if candidates is None:
+ candidates = self.universal_set()
+ if len(candidates) == 0:
+ return matches
+
if query and query.strip():
# get metadata key associated with the search term. Eliminates
# dealing with plurals and other aliases
@@ -476,7 +481,7 @@ class ResultCache(SearchQueryParser): # {{{
else:
q = query
- for item in self._data:
+ for item in [self._data[id] for id in candidates]:
if item is None: continue
if col_datatype[loc] == 'bool': # complexity caused by the two-/three-value tweak
diff --git a/src/calibre/utils/search_query_parser.py b/src/calibre/utils/search_query_parser.py
index db7c7bde5f..4f2c899bce 100644
--- a/src/calibre/utils/search_query_parser.py
+++ b/src/calibre/utils/search_query_parser.py
@@ -118,8 +118,9 @@ class SearchQueryParser(object):
failed.append(test[0])
return failed
- def __init__(self, locations, test=False):
+ def __init__(self, locations, test=False, optimize=False):
self._tests_failed = False
+ self.optimize = optimize
# Define a token
standard_locations = map(lambda x : CaselessLiteral(x)+Suppress(':'),
locations)
@@ -182,38 +183,50 @@ class SearchQueryParser(object):
# empty the list of searches used for recursion testing
self.recurse_level = 0
self.searches_seen = set([])
- return self._parse(query)
+ candidates = self.universal_set()
+ return self._parse(query, candidates)
# this parse is used internally because it doesn't clear the
# recursive search test list. However, we permit seeing the
# same search a few times because the search might appear within
# another search.
- def _parse(self, query):
+ def _parse(self, query, candidates):
self.recurse_level += 1
res = self._parser.parseString(query)[0]
- t = self.evaluate(res)
+ t = self.evaluate(res, candidates)
self.recurse_level -= 1
return t
def method(self, group_name):
return getattr(self, 'evaluate_'+group_name)
- def evaluate(self, parse_result):
- return self.method(parse_result.getName())(parse_result)
+ def evaluate(self, parse_result, candidates):
+ return self.method(parse_result.getName())(parse_result, candidates)
- def evaluate_and(self, argument):
- return self.evaluate(argument[0]).intersection(self.evaluate(argument[1]))
+ def evaluate_and(self, argument, candidates):
+ # RHS checks only those items matched by LHS
+ # returns result of RHS check: RHmatches(LHmatches(c))
+ # return self.evaluate(argument[0]).intersection(self.evaluate(argument[1]))
+ l = self.evaluate(argument[0], candidates)
+ return l.intersection(self.evaluate(argument[1], l))
- def evaluate_or(self, argument):
- return self.evaluate(argument[0]).union(self.evaluate(argument[1]))
+ def evaluate_or(self, argument, candidates):
+ # RHS checks only those elements not matched by LHS
+ # returns LHS union RHS: LHmatches(c) + RHmatches(c-LHmatches(c))
+ # return self.evaluate(argument[0]).union(self.evaluate(argument[1]))
+ l = self.evaluate(argument[0], candidates)
+ return l.union(self.evaluate(argument[1], candidates.difference(l)))
- def evaluate_not(self, argument):
- return self.universal_set().difference(self.evaluate(argument[0]))
+ def evaluate_not(self, argument, candidates):
+ # unary op checks only candidates. Result: list of items matching
+ # returns: c - matches(c)
+ # return self.universal_set().difference(self.evaluate(argument[0]))
+ return candidates.difference(self.evaluate(argument[0], candidates))
- def evaluate_parenthesis(self, argument):
- return self.evaluate(argument[0])
+ def evaluate_parenthesis(self, argument, candidates):
+ return self.evaluate(argument[0], candidates)
- def evaluate_token(self, argument):
+ def evaluate_token(self, argument, candidates):
location = argument[0]
query = argument[1]
if location.lower() == 'search':
@@ -224,17 +237,26 @@ class SearchQueryParser(object):
raise ParseException(query, len(query), 'undefined saved search', self)
if self.recurse_level > 5:
self.searches_seen.add(query)
- return self._parse(saved_searches().lookup(query))
+ return self._parse(saved_searches().lookup(query), candidates)
except: # convert all exceptions (e.g., missing key) to a parse error
raise ParseException(query, len(query), 'undefined saved search', self)
- return self.get_matches(location, query)
+ return self._get_matches(location, query, candidates)
+
+ def _get_matches(self, location, query, candidates):
+ if self.optimize:
+ return self.get_matches(location, query, candidates=candidates)
+ else:
+ return self.get_matches(location, query)
def get_matches(self, location, query):
'''
Should return the set of matches for :param:'location` and :param:`query`.
+ If you set the optimized parameter in __init__, this method must accept
+ a named parameter 'candidates'
:param:`location` is one of the items in :member:`SearchQueryParser.DEFAULT_LOCATIONS`.
:param:`query` is a string literal.
+ :param: optional named parameter candidates, a set of items to check.
'''
return set([])
@@ -561,7 +583,7 @@ class Tester(SearchQueryParser):
def universal_set(self):
return self._universal_set
- def get_matches(self, location, query):
+ def get_matches(self, location, query, candidates=None):
location = location.lower()
if location in self.fields.keys():
getter = operator.itemgetter(self.fields[location])
@@ -573,8 +595,13 @@ class Tester(SearchQueryParser):
if not query:
return set([])
query = query.lower()
- return set(key for key, val in self.texts.items() \
- if query and query in getattr(getter(val), 'lower', lambda : '')())
+ if candidates:
+ return set(key for key, val in self.texts.items() \
+ if key in candidates and query and query
+ in getattr(getter(val), 'lower', lambda : '')())
+ else:
+ return set(key for key, val in self.texts.items() \
+ if query and query in getattr(getter(val), 'lower', lambda : '')())
@@ -592,6 +619,7 @@ class Tester(SearchQueryParser):
def main(args=sys.argv):
+ print 'testing unoptimized'
tester = Tester(['authors', 'author', 'series', 'formats', 'format',
'publisher', 'rating', 'tags', 'tag', 'comments', 'comment', 'cover',
'isbn', 'ondevice', 'pubdate', 'size', 'date', 'title', u'#read',
@@ -601,6 +629,16 @@ def main(args=sys.argv):
print '>>>>>>>>>>>>>> Tests Failed <<<<<<<<<<<<<<<'
return 1
+ print '\n\ntesting optimized'
+ tester = Tester(['authors', 'author', 'series', 'formats', 'format',
+ 'publisher', 'rating', 'tags', 'tag', 'comments', 'comment', 'cover',
+ 'isbn', 'ondevice', 'pubdate', 'size', 'date', 'title', u'#read',
+ 'all', 'search'], test=True, optimize=True)
+ failed = tester.run_tests()
+ if tester._tests_failed or failed:
+ print '>>>>>>>>>>>>>> Tests Failed <<<<<<<<<<<<<<<'
+ return 1
+
return 0
if __name__ == '__main__':
From 8bcdb0fed79c7b0f0b9fbb80d9b3a5b0c683c5d0 Mon Sep 17 00:00:00 2001
From: Kovid Goyal