Speed up and simplify tag browser partitioning

2025-06-23 15:30:45 -04:00 · 2012-07-29 09:25:10 +05:30 · 2012-07-29 09:25:10 +05:30 · 3e35bdcd70
commit 3e35bdcd70
parent 3245270e9e 64a3f0e0c7
2 changed files with 13 additions and 64 deletions
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@ -506,16 +506,6 @@ compile_gpm_templates = True
 #   default_tweak_format = 'remember'
 default_tweak_format = None
 #: Enable multi-character first-letters in the tag browser
 # Some languages have letters that can be represented by multiple characters.
 # For example, Czech has a 'character' "ch" that sorts between "h" and "i".
 # If this tweak is True, then the tag browser will take these characters into
 # consideration when partitioning by first letter.
 # Examples:
 #    enable_multicharacters_in_tag_browser = True
 #    enable_multicharacters_in_tag_browser = False
 enable_multicharacters_in_tag_browser = True
 #: Do not preselect a completion when editing authors/tags/series/etc.
 # This means that you can make changes and press Enter and your changes will
 # not be overwritten by a matching completion. However, if you wish to use the
--- a/src/calibre/gui2/tag_browser/model.py
+++ b/src/calibre/gui2/tag_browser/model.py
@ -9,7 +9,6 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import traceback, cPickle, copy
 from itertools import repeat
 from PyQt4.Qt import (QAbstractItemModel, QIcon, QVariant, QFont, Qt,
        QMimeData, QModelIndex, pyqtSignal, QObject)
@ -17,7 +16,7 @@ from PyQt4.Qt import (QAbstractItemModel, QIcon, QVariant, QFont, Qt,
 from calibre.gui2 import NONE, gprefs, config, error_dialog
 from calibre.library.database2 import Tag
 from calibre.utils.config import tweaks
-from calibre.utils.icu import sort_key, lower, strcmp, contractions
+from calibre.utils.icu import sort_key, lower, strcmp, collation_order
 from calibre.library.field_metadata import TagsIcons, category_icon_map
 from calibre.gui2.dialogs.confirm_delete import confirm
 from calibre.utils.formatter import EvalFormatter
@ -258,16 +257,6 @@ class TagsModel(QAbstractItemModel): # {{{
                self.hidden_categories.add(cat)
        db.prefs.set('tag_browser_hidden_categories', list(self.hidden_categories))
        conts = contractions()
        if len(conts) == 0 or not tweaks['enable_multicharacters_in_tag_browser']:
            self.do_contraction = False
        else:
            self.do_contraction = True
            nconts = set()
            for s in conts:
                nconts.add(icu_upper(s))
            self.contraction_set = frozenset(nconts)
        self.db = db
        self._run_rebuild()
        self.endResetModel()
@ -416,53 +405,23 @@ class TagsModel(QAbstractItemModel): # {{{
            tt = key if in_uc else None
            if collapse_model == 'first letter':
-                # Build a list of 'equal' first letters by looking for
+                # Build a list of 'equal' first letters by noticing changes
-                # overlapping ranges. If a range overlaps another, then the
+                # in ICU's 'ordinal' for the first letter. In this case, the
-                # letters are assumed to be equivalent. ICU collating is complex
+                # first letter can actually be more than one letter long.
-                # beyond belief. This mechanism lets us determine the logical
+                cl_list = [None] * len(data[key])
-                # first character from ICU's standpoint.
+                last_ordnum = 0
                chardict = {}
                for idx,tag in enumerate(data[key]):
                    if not tag.sort:
                        c = ' '
                    else:
-                        if not self.do_contraction:
+                        c = tag.sort
-                            c = icu_upper(tag.sort)[0]
+                    ordnum, ordlen = collation_order(c)
-                        else:
+                    if last_ordnum != ordnum:
-                            v = icu_upper(tag.sort)
+                        last_c = icu_upper(c[0:ordlen])
-                            c = v[0]
+                        last_ordnum = ordnum
-                            for s in self.contraction_set:
+                    cl_list[idx] = last_c
-                                if len(s) > len(c) and v.startswith(s):
+            top_level_component = 'z' + data[key][0].original_name
                                    c = s
                    if c not in chardict:
                        chardict[c] = [idx, idx]
                    else:
                        chardict[c][1] = idx
                # sort the ranges to facilitate detecting overlap
                if len(chardict) == 1 and ' ' in chardict:
                    # The category could not be partitioned.
                    collapse_model = 'disable'
                else:
                    ranges = sorted([(v[0], v[1], c) for c,v in chardict.items()])
                    # Create a list of 'first letters' to use for each item in
                    # the category. The list is generated using the ranges. Overlaps
                    # are filled with the character that first occurs.
                    cl_list = list(repeat(None, len(data[key])))
                    for t in ranges:
                        start = t[0]
                        c = t[2]
                        if cl_list[start] is None:
                            nc = c
                        else:
                            nc = cl_list[start]
                        for i in range(start, t[1]+1):
                            cl_list[i] = nc
            if len(data[key]) > 0:
                top_level_component = 'z' + data[key][0].original_name
            else:
                top_level_component = ''
            last_idx = -collapse
            category_is_hierarchical = not (
                key in ['authors', 'publisher', 'news', 'formats', 'rating'] or