Edit book: Insert special character: When searching by name match prefixes in addition to whole words. So you can now type "horiz" to match "horizontal".

2025-07-09 03:04:10 -04:00 · 2018-05-01 14:57:17 +05:30 · 2018-05-01 14:57:17 +05:30 · b71584154a
commit b71584154a
parent b1ee6be746
2 changed files with 36 additions and 44 deletions
--- a/src/calibre/gui2/tweak_book/char_select.py
+++ b/src/calibre/gui2/tweak_book/char_select.py
@ -6,10 +6,9 @@ from __future__ import (unicode_literals, division, absolute_import,
 __license__ = 'GPL v3'
 __copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'

-import unicodedata, re, os, cPickle, textwrap
+import re, textwrap
 from bisect import bisect
 from functools import partial
-from collections import defaultdict

 from PyQt5.Qt import (
    QAbstractItemModel, QModelIndex, Qt, pyqtSignal, QApplication,
@ -17,12 +16,12 @@ from PyQt5.Qt import (
    QStyledItemDelegate, QSplitter, QLabel, QSizePolicy, QIcon, QMimeData,
    QPushButton, QToolButton, QInputMethodEvent)

-from calibre.constants import plugins, cache_dir
+from calibre.constants import plugins
 from calibre.gui2.widgets2 import HistoryLineEdit2
 from calibre.gui2.tweak_book import tprefs
 from calibre.gui2.tweak_book.widgets import Dialog, BusyCursor
-from calibre.utils.icu import safe_chr as chr, icu_unicode_version
-from calibre.utils.unicode_names import character_name_from_code
+from calibre.utils.icu import safe_chr as chr
+from calibre.utils.unicode_names import character_name_from_code, points_for_word

 ROOT = QModelIndex()

@ -34,44 +33,9 @@ non_printing = {
    0x206e: 'nads', 0x206f: 'nods', 0x20: 'sp', 0x7f: 'del', 0x2e3a: '2m', 0x2e3b: '3m', 0xad: 'shy',
 }

+
 # Searching {{{
-
-
-def load_search_index():
-    topchar = 0x10ffff
-    ver = (1, topchar, icu_unicode_version or unicodedata.unidata_version)  # Increment this when you make any changes to the index
-    name_map = {}
-    path = os.path.join(cache_dir(), 'unicode-name-index.pickle')
-    if os.path.exists(path):
-        with open(path, 'rb') as f:
-            name_map = cPickle.load(f)
-        if name_map.pop('calibre-nm-version:', None) != ver:
-            name_map = {}
-    if not name_map:
-        name_map = defaultdict(set)
-        for x in xrange(1, topchar + 1):
-            for word in character_name_from_code(x).split():
-                name_map[word.lower()].add(x)
-        from calibre.ebooks.html_entities import html5_entities
-        for name, char in html5_entities.iteritems():
-            try:
-                name_map[name.lower()].add(ord(char))
-            except TypeError:
-                continue
-        name_map['nnbsp'].add(0x202F)
-        name_map['calibre-nm-version:'] = ver
-        cPickle.dump(dict(name_map), open(path, 'wb'), -1)
-        del name_map['calibre-nm-version:']
-    return name_map
-
-
-_index = None
-
-
 def search_for_chars(query, and_tokens=False):
-    global _index
-    if _index is None:
-        _index = load_search_index()
    ans = set()
    for token in query.split():
        token = token.lower()
@ -79,7 +43,7 @@ def search_for_chars(query, and_tokens=False):
        if m is not None:
            chars = {int(m.group(1), 16)}
        else:
-            chars = _index.get(token, None)
+            chars = points_for_word(token)
        if chars is not None:
            if and_tokens:
                ans &= chars
--- a/src/calibre/utils/unicode_names.py
+++ b/src/calibre/utils/unicode_names.py
@ -2,11 +2,39 @@
 # vim:fileencoding=utf-8
 # License: GPLv3 Copyright: 2018, Kovid Goyal <kovid at kovidgoyal.net>

-from __future__ import (absolute_import, division, print_function,
-                        unicode_literals)
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+from collections import defaultdict

 from calibre.constants import plugins
+from calibre.utils.icu import ord_string


 def character_name_from_code(code):
    return plugins['unicode_names'][0].name_for_codepoint(code) or 'U+{:X}'.format(code)
+
+
+def html_entities():
+    ans = getattr(html_entities, 'ans', None)
+    if ans is None:
+        from calibre.ebooks.html_entities import html5_entities
+        ans = defaultdict(set)
+        for name, char in html5_entities.iteritems():
+            try:
+                ans[name.lower()].add(ord_string(char)[0])
+            except TypeError:
+                continue
+        ans['nnbsp'].add(0x202F)
+        ans = dict(ans)
+        html_entities.ans = ans
+    return ans
+
+
+def points_for_word(w):
+    w = w.lower()
+    ans = points_for_word.cache.get(w)
+    if ans is None:
+        ans = plugins['unicode_names'][0].codepoints_for_word(w.encode('utf-8')) | html_entities().get(w, set())
+        points_for_word.cache[w] = ans
+    return ans
+points_for_word.cache = {}  # noqa