mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Edit Book: Fix names for some control characters not being displayed in the status bar
Uses the unicode names database I created for kitty. Much more comprehensive than the one in ICU.
This commit is contained in:
parent
88e9494e6b
commit
5d95d13935
@ -12,6 +12,12 @@
|
|||||||
"sources": "calibre/utils/monotonic.c",
|
"sources": "calibre/utils/monotonic.c",
|
||||||
"linux_libraries": "rt"
|
"linux_libraries": "rt"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "unicode_names",
|
||||||
|
"headers": "unicode_names/names.h unicode_names/data-types.h",
|
||||||
|
"sources": "unicode_names/unicode_names.c",
|
||||||
|
"optimize_level": 3
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "speedup",
|
"name": "speedup",
|
||||||
"sources": "calibre/utils/speedup.c",
|
"sources": "calibre/utils/speedup.c",
|
||||||
|
@ -169,6 +169,7 @@ class Plugins(collections.Mapping):
|
|||||||
'icu',
|
'icu',
|
||||||
'speedup',
|
'speedup',
|
||||||
'monotonic',
|
'monotonic',
|
||||||
|
'unicode_names',
|
||||||
'zlib2',
|
'zlib2',
|
||||||
'html',
|
'html',
|
||||||
'freetype',
|
'freetype',
|
||||||
|
@ -21,7 +21,8 @@ from calibre.constants import plugins, cache_dir
|
|||||||
from calibre.gui2.widgets2 import HistoryLineEdit2
|
from calibre.gui2.widgets2 import HistoryLineEdit2
|
||||||
from calibre.gui2.tweak_book import tprefs
|
from calibre.gui2.tweak_book import tprefs
|
||||||
from calibre.gui2.tweak_book.widgets import Dialog, BusyCursor
|
from calibre.gui2.tweak_book.widgets import Dialog, BusyCursor
|
||||||
from calibre.utils.icu import safe_chr as chr, icu_unicode_version, character_name_from_code
|
from calibre.utils.icu import safe_chr as chr, icu_unicode_version
|
||||||
|
from calibre.utils.unicode_names import character_name_from_code
|
||||||
|
|
||||||
ROOT = QModelIndex()
|
ROOT = QModelIndex()
|
||||||
|
|
||||||
@ -469,7 +470,7 @@ class CategoryModel(QAbstractItemModel):
|
|||||||
category, subcategory = self.category_map[self.starts[ipos]]
|
category, subcategory = self.category_map[self.starts[ipos]]
|
||||||
except IndexError:
|
except IndexError:
|
||||||
category = subcategory = _('Unknown')
|
category = subcategory = _('Unknown')
|
||||||
return category, subcategory, (character_name_from_code(char_code) or _('Unknown'))
|
return category, subcategory, character_name_from_code(char_code)
|
||||||
|
|
||||||
|
|
||||||
class CategoryDelegate(QStyledItemDelegate):
|
class CategoryDelegate(QStyledItemDelegate):
|
||||||
|
@ -33,7 +33,8 @@ from calibre.gui2 import error_dialog, question_dialog, choose_save_file, open_u
|
|||||||
from calibre.gui2.tweak_book import current_container, tprefs, dictionaries
|
from calibre.gui2.tweak_book import current_container, tprefs, dictionaries
|
||||||
from calibre.gui2.tweak_book.widgets import Dialog
|
from calibre.gui2.tweak_book.widgets import Dialog
|
||||||
from calibre.gui2.progress_indicator import ProgressIndicator
|
from calibre.gui2.progress_indicator import ProgressIndicator
|
||||||
from calibre.utils.icu import primary_contains, numeric_sort_key, character_name_from_code
|
from calibre.utils.icu import primary_contains, numeric_sort_key
|
||||||
|
from calibre.utils.unicode_names import character_name_from_code
|
||||||
from calibre.utils.localization import calibre_langcode_to_name, canonicalize_lang
|
from calibre.utils.localization import calibre_langcode_to_name, canonicalize_lang
|
||||||
|
|
||||||
# Utils {{{
|
# Utils {{{
|
||||||
|
@ -45,7 +45,8 @@ from calibre.gui2.tweak_book.manage_fonts import ManageFonts
|
|||||||
from calibre.gui2.tweak_book.function_replace import DebugOutput
|
from calibre.gui2.tweak_book.function_replace import DebugOutput
|
||||||
from calibre.gui2.tweak_book.editor.widget import register_text_editor_actions
|
from calibre.gui2.tweak_book.editor.widget import register_text_editor_actions
|
||||||
from calibre.gui2.tweak_book.editor.insert_resource import InsertImage
|
from calibre.gui2.tweak_book.editor.insert_resource import InsertImage
|
||||||
from calibre.utils.icu import character_name, sort_key
|
from calibre.utils.icu import sort_key, ord_string
|
||||||
|
from calibre.utils.unicode_names import character_name_from_code
|
||||||
from calibre.utils.localization import localize_user_manual_link
|
from calibre.utils.localization import localize_user_manual_link
|
||||||
|
|
||||||
|
|
||||||
@ -219,7 +220,7 @@ class CursorPositionWidget(QWidget): # {{{
|
|||||||
self.la.setText('')
|
self.la.setText('')
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
name = character_name(character) if character and tprefs['editor_show_char_under_cursor'] else None
|
name = character_name_from_code(ord_string(character)[0]) if character and tprefs['editor_show_char_under_cursor'] else None
|
||||||
except Exception:
|
except Exception:
|
||||||
name = None
|
name = None
|
||||||
text = _('Line: {0} : {1}').format(line, col)
|
text = _('Line: {0} : {1}').format(line, col)
|
||||||
|
@ -145,7 +145,12 @@ class TestICU(unittest.TestCase):
|
|||||||
|
|
||||||
def test_character_name(self):
|
def test_character_name(self):
|
||||||
' Test character naming '
|
' Test character naming '
|
||||||
self.ae(icu.character_name('\U0001f431'), 'CAT FACE')
|
from calibre.utils.unicode_names import character_name_from_code
|
||||||
|
for q, e in {
|
||||||
|
'\U0001f431': 'CAT FACE'
|
||||||
|
}.items():
|
||||||
|
self.ae(icu.character_name(q), e)
|
||||||
|
self.ae(character_name_from_code(icu.ord_string(q)[0]), e)
|
||||||
|
|
||||||
def test_contractions(self):
|
def test_contractions(self):
|
||||||
' Test contractions '
|
' Test contractions '
|
||||||
|
12
src/calibre/utils/unicode_names.py
Normal file
12
src/calibre/utils/unicode_names.py
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
#!/usr/bin/env python2
|
||||||
|
# vim:fileencoding=utf-8
|
||||||
|
# License: GPLv3 Copyright: 2018, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
|
||||||
|
from __future__ import (absolute_import, division, print_function,
|
||||||
|
unicode_literals)
|
||||||
|
|
||||||
|
from calibre.constants import plugins
|
||||||
|
|
||||||
|
|
||||||
|
def character_name_from_code(code):
|
||||||
|
return plugins['unicode_names'][0].name_for_codepoint(code) or 'U+{:X}'.format(code)
|
20
src/unicode_names/data-types.h
Normal file
20
src/unicode_names/data-types.h
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2018 Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
*
|
||||||
|
* Distributed under terms of the GPL3 license.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <Python.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
typedef uint32_t char_type;
|
||||||
|
typedef int bool;
|
||||||
|
#define false 0
|
||||||
|
#define true 1
|
||||||
|
#define EXPORTED CALIBRE_MODINIT_FUNC
|
||||||
|
#define START_ALLOW_CASE_RANGE
|
||||||
|
#define END_ALLOW_CASE_RANGE
|
||||||
|
#define UNUSED
|
||||||
|
#define PYNOARG PyObject *__a1 UNUSED, PyObject *__a2 UNUSED
|
||||||
|
#define arraysz(x) (sizeof(x)/sizeof(x[0]))
|
64658
src/unicode_names/names.h
Normal file
64658
src/unicode_names/names.h
Normal file
File diff suppressed because one or more lines are too long
121
src/unicode_names/unicode_names.c
Normal file
121
src/unicode_names/unicode_names.c
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
/*
|
||||||
|
* unicode_names.c
|
||||||
|
* Copyright (C) 2018 Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
*
|
||||||
|
* Distributed under terms of the GPL3 license.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "names.h"
|
||||||
|
|
||||||
|
static PyObject*
|
||||||
|
all_words(PYNOARG) {
|
||||||
|
PyObject *ans = PyTuple_New(arraysz(all_words_map));
|
||||||
|
if (!ans) return NULL;
|
||||||
|
for (size_t i = 0; i < arraysz(all_words_map); i++) {
|
||||||
|
PyObject *w = PyUnicode_FromString(all_words_map[i]);
|
||||||
|
if (w == NULL) { Py_DECREF(ans); return NULL; }
|
||||||
|
PyTuple_SET_ITEM(ans, i, w);
|
||||||
|
}
|
||||||
|
return ans;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
add_matches(const word_trie *wt, char_type *codepoints, size_t *pos, const size_t sz) {
|
||||||
|
size_t num = mark_groups[wt->match_offset];
|
||||||
|
for (size_t i = wt->match_offset + 1; i < wt->match_offset + 1 + num && *pos < sz; i++, (*pos)++) {
|
||||||
|
codepoints[*pos] = mark_to_cp[mark_groups[i]];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
process_trie_node(const word_trie *wt, char_type *codepoints, size_t *pos, const size_t sz) {
|
||||||
|
if (wt->match_offset) add_matches(wt, codepoints, pos, sz);
|
||||||
|
size_t num_children = children_array[wt->children_offset];
|
||||||
|
if (!num_children) return;
|
||||||
|
for (size_t c = wt->children_offset + 1; c < wt->children_offset + 1 + num_children; c++) {
|
||||||
|
if (*pos > sz) return;
|
||||||
|
uint32_t x = children_array[c];
|
||||||
|
process_trie_node(&all_trie_nodes[x >> 8], codepoints, pos, sz);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline PyObject*
|
||||||
|
codepoints_for_word(const char *word, size_t len) {
|
||||||
|
const word_trie *wt = all_trie_nodes;
|
||||||
|
for (size_t i = 0; i < len; i++) {
|
||||||
|
unsigned char ch = word[i];
|
||||||
|
size_t num_children = children_array[wt->children_offset];
|
||||||
|
if (!num_children) return PyFrozenSet_New(NULL);
|
||||||
|
bool found = false;
|
||||||
|
for (size_t c = wt->children_offset + 1; c < wt->children_offset + 1 + num_children; c++) {
|
||||||
|
uint32_t x = children_array[c];
|
||||||
|
if ((x & 0xff) == ch) {
|
||||||
|
found = true;
|
||||||
|
wt = &all_trie_nodes[x >> 8];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!found) return PyFrozenSet_New(NULL);
|
||||||
|
}
|
||||||
|
static char_type codepoints[1024];
|
||||||
|
size_t cpos = 0;
|
||||||
|
process_trie_node(wt, codepoints, &cpos, arraysz(codepoints));
|
||||||
|
PyObject *ans = PyFrozenSet_New(NULL); if (ans == NULL) return NULL;
|
||||||
|
for (size_t i = 0; i < cpos; i++) {
|
||||||
|
PyObject *t = PyLong_FromUnsignedLong(codepoints[i]); if (t == NULL) { Py_DECREF(ans); return NULL; }
|
||||||
|
int ret = PySet_Add(ans, t); Py_DECREF(t); if (ret != 0) { Py_DECREF(ans); return NULL; }
|
||||||
|
}
|
||||||
|
return ans;
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject*
|
||||||
|
cfw(PyObject *self UNUSED, PyObject *args) {
|
||||||
|
const char *word;
|
||||||
|
if (!PyArg_ParseTuple(args, "s", &word)) return NULL;
|
||||||
|
return codepoints_for_word(word, strlen(word));
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject*
|
||||||
|
nfc(PyObject *self UNUSED, PyObject *args) {
|
||||||
|
unsigned int cp;
|
||||||
|
if (!PyArg_ParseTuple(args, "I", &cp)) return NULL;
|
||||||
|
const char *n = name_for_codepoint(cp);
|
||||||
|
if (n == NULL) Py_RETURN_NONE;
|
||||||
|
return PyUnicode_FromString(n);
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyMethodDef module_methods[] = {
|
||||||
|
{"all_words", (PyCFunction)all_words, METH_NOARGS, ""},
|
||||||
|
{"codepoints_for_word", (PyCFunction)cfw, METH_VARARGS, ""},
|
||||||
|
{"name_for_codepoint", (PyCFunction)nfc, METH_VARARGS, ""},
|
||||||
|
{NULL, NULL, 0, NULL} /* Sentinel */
|
||||||
|
};
|
||||||
|
|
||||||
|
#if PY_VERSION_HEX >= 0x03000000
|
||||||
|
static struct PyModuleDef module = {
|
||||||
|
.m_base = PyModuleDef_HEAD_INIT,
|
||||||
|
.m_name = "unicode_names", /* name of module */
|
||||||
|
.m_doc = NULL,
|
||||||
|
.m_size = -1,
|
||||||
|
.m_methods = module_methods
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
EXPORTED PyMODINIT_FUNC
|
||||||
|
PyInit_unicode_names(void) {
|
||||||
|
PyObject *m;
|
||||||
|
|
||||||
|
m = PyModule_Create(&module);
|
||||||
|
if (m == NULL) return NULL;
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
EXPORTED
|
||||||
|
initunicode_names(void) {
|
||||||
|
PyObject *m;
|
||||||
|
m = Py_InitModule3("unicode_names", module_methods,
|
||||||
|
""
|
||||||
|
);
|
||||||
|
if (m == NULL) return;
|
||||||
|
}
|
||||||
|
#endif
|
Loading…
x
Reference in New Issue
Block a user