mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
py3: Fix clean_xml_text implementation
Fixes #1863517 [Characters are dropped from title](https://bugs.launchpad.net/calibre/+bug/1863517)
This commit is contained in:
parent
a91ed25d9b
commit
1ba8e64468
@ -8,15 +8,13 @@ from polyglot.builtins import codepoint_to_chr, map, range, filter
|
||||
from polyglot.html_entities import name2codepoint
|
||||
from calibre.constants import plugins, preferred_encoding
|
||||
|
||||
try:
|
||||
_ncxc = plugins['speedup'][0].clean_xml_chars
|
||||
except AttributeError:
|
||||
native_clean_xml_chars = None
|
||||
else:
|
||||
def native_clean_xml_chars(x):
|
||||
if isinstance(x, bytes):
|
||||
x = x.decode(preferred_encoding)
|
||||
return _ncxc(x)
|
||||
_ncxc = plugins['speedup'][0].clean_xml_chars
|
||||
|
||||
|
||||
def native_clean_xml_chars(x):
|
||||
if isinstance(x, bytes):
|
||||
x = x.decode(preferred_encoding)
|
||||
return _ncxc(x)
|
||||
|
||||
|
||||
def ascii_pat(for_binary=False):
|
||||
|
@ -394,8 +394,11 @@ clean_xml_chars(PyObject *self, PyObject *text) {
|
||||
// based on https://en.wikipedia.org/wiki/Valid_characters_in_XML#Non-restricted_characters
|
||||
// python 3.3+ unicode strings never contain surrogate pairs, since if
|
||||
// they did, they would be represented as UTF-32
|
||||
if ((0x20 <= ch && ch <= 0xd7ff && ch != 0x7f) ||
|
||||
ch == 9 || ch == 10 || ch == 13 ||
|
||||
if ((0x20 <= ch && ch <= 0x7e) ||
|
||||
ch == 0x9 || ch == 0xa || ch == 0xd || ch == 0x85 ||
|
||||
(0x00A0 <= ch && ch <= 0xD7FF) ||
|
||||
(0xE000 <= ch && ch <= 0xFDCF) ||
|
||||
(0xFDF0 <= ch && ch <= 0xFFFD) ||
|
||||
(0xffff < ch && ch <= 0x10ffff)) {
|
||||
PyUnicode_WRITE(text_kind, result_text, target_i, ch);
|
||||
target_i += 1;
|
||||
|
Loading…
x
Reference in New Issue
Block a user