mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Insert special char: Allow searching for non BMP characters, by using the ICU database of names rather than python's outdated one.
This commit is contained in:
parent
b36c6211b0
commit
63cba4c884
@ -6,7 +6,7 @@ from __future__ import (unicode_literals, division, absolute_import,
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
import unicodedata, re, os, cPickle, sys, textwrap
|
import unicodedata, re, os, cPickle, textwrap
|
||||||
from bisect import bisect
|
from bisect import bisect
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
@ -22,7 +22,7 @@ from calibre.gui2 import NONE
|
|||||||
from calibre.gui2.widgets2 import HistoryLineEdit2
|
from calibre.gui2.widgets2 import HistoryLineEdit2
|
||||||
from calibre.gui2.tweak_book import tprefs
|
from calibre.gui2.tweak_book import tprefs
|
||||||
from calibre.gui2.tweak_book.widgets import Dialog
|
from calibre.gui2.tweak_book.widgets import Dialog
|
||||||
from calibre.utils.icu import safe_chr as chr
|
from calibre.utils.icu import safe_chr as chr, icu_unicode_version, character_name_from_code
|
||||||
|
|
||||||
ROOT = QModelIndex()
|
ROOT = QModelIndex()
|
||||||
|
|
||||||
@ -35,9 +35,10 @@ non_printing = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
# Searching {{{
|
# Searching {{{
|
||||||
|
|
||||||
def load_search_index():
|
def load_search_index():
|
||||||
topchar = sys.maxunicode
|
topchar = 0x10ffff
|
||||||
ver = (1, topchar, unicodedata.unidata_version) # Increment this when you make any changes to the index
|
ver = (1, topchar, icu_unicode_version or unicodedata.unidata_version) # Increment this when you make any changes to the index
|
||||||
name_map = {}
|
name_map = {}
|
||||||
path = os.path.join(cache_dir(), 'unicode-name-index.pickle')
|
path = os.path.join(cache_dir(), 'unicode-name-index.pickle')
|
||||||
if os.path.exists(path):
|
if os.path.exists(path):
|
||||||
@ -48,7 +49,7 @@ def load_search_index():
|
|||||||
if not name_map:
|
if not name_map:
|
||||||
name_map = defaultdict(set)
|
name_map = defaultdict(set)
|
||||||
for x in xrange(1, topchar + 1):
|
for x in xrange(1, topchar + 1):
|
||||||
for word in unicodedata.name(chr(x), '').split():
|
for word in character_name_from_code(x).split():
|
||||||
name_map[word.lower()].add(x)
|
name_map[word.lower()].add(x)
|
||||||
from calibre.ebooks.html_entities import html5_entities
|
from calibre.ebooks.html_entities import html5_entities
|
||||||
for name, char in html5_entities.iteritems():
|
for name, char in html5_entities.iteritems():
|
||||||
@ -465,7 +466,7 @@ class CategoryModel(QAbstractItemModel):
|
|||||||
category, subcategory = self.category_map[self.starts[ipos]]
|
category, subcategory = self.category_map[self.starts[ipos]]
|
||||||
except IndexError:
|
except IndexError:
|
||||||
category = subcategory = _('Unknown')
|
category = subcategory = _('Unknown')
|
||||||
return category, subcategory, unicodedata.name(chr(char_code), _('Unknown'))
|
return category, subcategory, (character_name_from_code(char_code) or _('Unknown'))
|
||||||
|
|
||||||
class CategoryDelegate(QStyledItemDelegate):
|
class CategoryDelegate(QStyledItemDelegate):
|
||||||
|
|
||||||
|
@ -738,6 +738,29 @@ end:
|
|||||||
return (PyErr_Occurred()) ? NULL : Py_BuildValue("s#", name, sz);
|
return (PyErr_Occurred()) ? NULL : Py_BuildValue("s#", name, sz);
|
||||||
} // }}}
|
} // }}}
|
||||||
|
|
||||||
|
// character_name {{{
|
||||||
|
static PyObject *
|
||||||
|
icu_character_name_from_code(PyObject *self, PyObject *args) {
|
||||||
|
char name[512] = {0};
|
||||||
|
int32_t sz, alias = 0;
|
||||||
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
|
PyObject *palias = NULL;
|
||||||
|
UChar32 code = 0;
|
||||||
|
|
||||||
|
if (!PyArg_ParseTuple(args, "I|O", &code, &palias)) return NULL;
|
||||||
|
|
||||||
|
if (palias != NULL && PyObject_IsTrue(palias)) alias = 1;
|
||||||
|
|
||||||
|
if (alias) {
|
||||||
|
sz = u_charName(code, U_CHAR_NAME_ALIAS, name, 511, &status);
|
||||||
|
} else {
|
||||||
|
sz = u_charName(code, U_UNICODE_CHAR_NAME, name, 511, &status);
|
||||||
|
}
|
||||||
|
if (U_FAILURE(status)) { PyErr_SetString(PyExc_ValueError, "Failed to get name for code"); goto end; }
|
||||||
|
end:
|
||||||
|
return (PyErr_Occurred()) ? NULL : Py_BuildValue("s#", name, sz);
|
||||||
|
} // }}}
|
||||||
|
|
||||||
// chr {{{
|
// chr {{{
|
||||||
static PyObject *
|
static PyObject *
|
||||||
icu_chr(PyObject *self, PyObject *args) {
|
icu_chr(PyObject *self, PyObject *args) {
|
||||||
@ -786,6 +809,10 @@ static PyMethodDef icu_methods[] = {
|
|||||||
"character_name(char, alias=False) -> Return name for the first character in char, which must be a unicode string."
|
"character_name(char, alias=False) -> Return name for the first character in char, which must be a unicode string."
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{"character_name_from_code", icu_character_name_from_code, METH_VARARGS,
|
||||||
|
"character_name_from_code(code, alias=False) -> Return the name for the specified unicode code point"
|
||||||
|
},
|
||||||
|
|
||||||
{"chr", icu_chr, METH_VARARGS,
|
{"chr", icu_chr, METH_VARARGS,
|
||||||
"chr(code) -> Return a python unicode string corresponding to the specified character code. The string can have length 1 or 2 (for non BMP codes on narrow python builds)."
|
"chr(code) -> Return a python unicode string corresponding to the specified character code. The string can have length 1 or 2 (for non BMP codes on narrow python builds)."
|
||||||
},
|
},
|
||||||
|
@ -140,6 +140,16 @@ def character_name(string):
|
|||||||
except (TypeError, ValueError, KeyError):
|
except (TypeError, ValueError, KeyError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def character_name_from_code(code):
|
||||||
|
try:
|
||||||
|
try:
|
||||||
|
return _icu.character_name_from_code(code).decode('utf-8') or ''
|
||||||
|
except AttributeError:
|
||||||
|
import unicodedata
|
||||||
|
return unicodedata.name(py_safe_chr(code), '')
|
||||||
|
except (TypeError, ValueError, KeyError):
|
||||||
|
return ''
|
||||||
|
|
||||||
if sys.maxunicode >= 0x10ffff:
|
if sys.maxunicode >= 0x10ffff:
|
||||||
try:
|
try:
|
||||||
py_safe_chr = unichr
|
py_safe_chr = unichr
|
||||||
@ -212,6 +222,7 @@ def icu_collation_order(collator, a):
|
|||||||
load_icu()
|
load_icu()
|
||||||
load_collator()
|
load_collator()
|
||||||
_icu_not_ok = _icu is None or _collator is None
|
_icu_not_ok = _icu is None or _collator is None
|
||||||
|
icu_unicode_version = getattr(_icu, 'unicode_version', None)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
senc = sys.getdefaultencoding()
|
senc = sys.getdefaultencoding()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user