Speed up and simplify tag browser partitioning

This commit is contained in:
Kovid Goyal 2012-07-29 09:25:10 +05:30
commit 3e35bdcd70
2 changed files with 13 additions and 64 deletions

View File

@ -506,16 +506,6 @@ compile_gpm_templates = True
# default_tweak_format = 'remember' # default_tweak_format = 'remember'
default_tweak_format = None default_tweak_format = None
#: Enable multi-character first-letters in the tag browser
# Some languages have letters that can be represented by multiple characters.
# For example, Czech has a 'character' "ch" that sorts between "h" and "i".
# If this tweak is True, then the tag browser will take these characters into
# consideration when partitioning by first letter.
# Examples:
# enable_multicharacters_in_tag_browser = True
# enable_multicharacters_in_tag_browser = False
enable_multicharacters_in_tag_browser = True
#: Do not preselect a completion when editing authors/tags/series/etc. #: Do not preselect a completion when editing authors/tags/series/etc.
# This means that you can make changes and press Enter and your changes will # This means that you can make changes and press Enter and your changes will
# not be overwritten by a matching completion. However, if you wish to use the # not be overwritten by a matching completion. However, if you wish to use the

View File

@ -9,7 +9,6 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import traceback, cPickle, copy import traceback, cPickle, copy
from itertools import repeat
from PyQt4.Qt import (QAbstractItemModel, QIcon, QVariant, QFont, Qt, from PyQt4.Qt import (QAbstractItemModel, QIcon, QVariant, QFont, Qt,
QMimeData, QModelIndex, pyqtSignal, QObject) QMimeData, QModelIndex, pyqtSignal, QObject)
@ -17,7 +16,7 @@ from PyQt4.Qt import (QAbstractItemModel, QIcon, QVariant, QFont, Qt,
from calibre.gui2 import NONE, gprefs, config, error_dialog from calibre.gui2 import NONE, gprefs, config, error_dialog
from calibre.library.database2 import Tag from calibre.library.database2 import Tag
from calibre.utils.config import tweaks from calibre.utils.config import tweaks
from calibre.utils.icu import sort_key, lower, strcmp, contractions from calibre.utils.icu import sort_key, lower, strcmp, collation_order
from calibre.library.field_metadata import TagsIcons, category_icon_map from calibre.library.field_metadata import TagsIcons, category_icon_map
from calibre.gui2.dialogs.confirm_delete import confirm from calibre.gui2.dialogs.confirm_delete import confirm
from calibre.utils.formatter import EvalFormatter from calibre.utils.formatter import EvalFormatter
@ -258,16 +257,6 @@ class TagsModel(QAbstractItemModel): # {{{
self.hidden_categories.add(cat) self.hidden_categories.add(cat)
db.prefs.set('tag_browser_hidden_categories', list(self.hidden_categories)) db.prefs.set('tag_browser_hidden_categories', list(self.hidden_categories))
conts = contractions()
if len(conts) == 0 or not tweaks['enable_multicharacters_in_tag_browser']:
self.do_contraction = False
else:
self.do_contraction = True
nconts = set()
for s in conts:
nconts.add(icu_upper(s))
self.contraction_set = frozenset(nconts)
self.db = db self.db = db
self._run_rebuild() self._run_rebuild()
self.endResetModel() self.endResetModel()
@ -416,53 +405,23 @@ class TagsModel(QAbstractItemModel): # {{{
tt = key if in_uc else None tt = key if in_uc else None
if collapse_model == 'first letter': if collapse_model == 'first letter':
# Build a list of 'equal' first letters by looking for # Build a list of 'equal' first letters by noticing changes
# overlapping ranges. If a range overlaps another, then the # in ICU's 'ordinal' for the first letter. In this case, the
# letters are assumed to be equivalent. ICU collating is complex # first letter can actually be more than one letter long.
# beyond belief. This mechanism lets us determine the logical cl_list = [None] * len(data[key])
# first character from ICU's standpoint. last_ordnum = 0
chardict = {}
for idx,tag in enumerate(data[key]): for idx,tag in enumerate(data[key]):
if not tag.sort: if not tag.sort:
c = ' ' c = ' '
else: else:
if not self.do_contraction: c = tag.sort
c = icu_upper(tag.sort)[0] ordnum, ordlen = collation_order(c)
else: if last_ordnum != ordnum:
v = icu_upper(tag.sort) last_c = icu_upper(c[0:ordlen])
c = v[0] last_ordnum = ordnum
for s in self.contraction_set: cl_list[idx] = last_c
if len(s) > len(c) and v.startswith(s): top_level_component = 'z' + data[key][0].original_name
c = s
if c not in chardict:
chardict[c] = [idx, idx]
else:
chardict[c][1] = idx
# sort the ranges to facilitate detecting overlap
if len(chardict) == 1 and ' ' in chardict:
# The category could not be partitioned.
collapse_model = 'disable'
else:
ranges = sorted([(v[0], v[1], c) for c,v in chardict.items()])
# Create a list of 'first letters' to use for each item in
# the category. The list is generated using the ranges. Overlaps
# are filled with the character that first occurs.
cl_list = list(repeat(None, len(data[key])))
for t in ranges:
start = t[0]
c = t[2]
if cl_list[start] is None:
nc = c
else:
nc = cl_list[start]
for i in range(start, t[1]+1):
cl_list[i] = nc
if len(data[key]) > 0:
top_level_component = 'z' + data[key][0].original_name
else:
top_level_component = ''
last_idx = -collapse last_idx = -collapse
category_is_hierarchical = not ( category_is_hierarchical = not (
key in ['authors', 'publisher', 'news', 'formats', 'rating'] or key in ['authors', 'publisher', 'news', 'formats', 'rating'] or