diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index 3d858548a6..5eaf6f69b4 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -449,8 +449,15 @@ def my_unichr(num): except (ValueError, OverflowError): return '?' +XML_ENTITIES = { + '"' : '"', + "'" : ''', + '<' : '<', + '>' : '>', + '&' : '&' +} -def entity_to_unicode(match, exceptions=[], encoding='cp1252', +def entity_to_unicode(match, exceptions=(), encoding='cp1252', result_exceptions={}): ''' :param match: A match object such that '&'+match.group(1)';' is the entity. @@ -502,12 +509,7 @@ def entity_to_unicode(match, exceptions=[], encoding='cp1252', _ent_pat = re.compile(r'&(\S+?);') -xml_entity_to_unicode = partial(entity_to_unicode, result_exceptions={ - '"' : '"', - "'" : ''', - '<' : '<', - '>' : '>', - '&' : '&'}) +xml_entity_to_unicode = partial(entity_to_unicode, result_exceptions=XML_ENTITIES) def replace_entities(raw, encoding='cp1252'): diff --git a/src/calibre/ebooks/html_entities.c b/src/calibre/ebooks/html_entities.c index 8b1b9ac9bd..9dfb8c479c 100644 --- a/src/calibre/ebooks/html_entities.c +++ b/src/calibre/ebooks/html_entities.c @@ -142,7 +142,7 @@ replace(const char *input, size_t input_sz, char *output, int keep_xml_entities) } static PyObject* -replace_entities(PyObject *self, PyObject *const *args, Py_ssize_t nargs) { +replace_all_entities(PyObject *self, PyObject *const *args, Py_ssize_t nargs) { if (nargs < 1) { PyErr_SetString(PyExc_TypeError, "Must specify string tp process"); return NULL; } const char *input = NULL; Py_ssize_t input_sz = 0; int keep_xml_entities = false; @@ -168,8 +168,8 @@ replace_entities(PyObject *self, PyObject *const *args, Py_ssize_t nargs) { } static PyMethodDef methods[] = { - {"replace_entities", (PyCFunction)replace_entities, METH_FASTCALL, - "Replace entities in the specified string" + {"replace_all_entities", (PyCFunction)replace_all_entities, METH_FASTCALL, + "Replace all entities in the specified string" }, {NULL, NULL, 0, NULL} }; diff --git a/src/calibre/ebooks/html_entities.py b/src/calibre/ebooks/html_entities.py index 8e60cac80b..239f09e2f5 100644 --- a/src/calibre/ebooks/html_entities.py +++ b/src/calibre/ebooks/html_entities.py @@ -2139,11 +2139,11 @@ def find_tests(): import unittest class TestHTMLEntityReplacement(unittest.TestCase): def test_html_entity_replacement(self): - from calibre_extensions.fast_html_entities import replace_entities + from calibre_extensions.fast_html_entities import replace_all_entities def t(inp, exp): - self.assertEqual(exp, replace_entities(inp), f'Failed for input: {inp!r}') + self.assertEqual(exp, replace_all_entities(inp), f'Failed for input: {inp!r}') def x(inp, exp): - self.assertEqual(exp, replace_entities(inp, True), f'Failed for input: {inp!r}') + self.assertEqual(exp, replace_all_entities(inp, True), f'Failed for input: {inp!r}') t('aӒb', 'aӒb') t('', '') t('a', 'a')