mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
py3: Fix clean_xml_text implementation
Fixes #1863517 [Characters are dropped from title](https://bugs.launchpad.net/calibre/+bug/1863517)
This commit is contained in:
parent
a91ed25d9b
commit
1ba8e64468
@ -8,12 +8,10 @@ from polyglot.builtins import codepoint_to_chr, map, range, filter
|
|||||||
from polyglot.html_entities import name2codepoint
|
from polyglot.html_entities import name2codepoint
|
||||||
from calibre.constants import plugins, preferred_encoding
|
from calibre.constants import plugins, preferred_encoding
|
||||||
|
|
||||||
try:
|
_ncxc = plugins['speedup'][0].clean_xml_chars
|
||||||
_ncxc = plugins['speedup'][0].clean_xml_chars
|
|
||||||
except AttributeError:
|
|
||||||
native_clean_xml_chars = None
|
def native_clean_xml_chars(x):
|
||||||
else:
|
|
||||||
def native_clean_xml_chars(x):
|
|
||||||
if isinstance(x, bytes):
|
if isinstance(x, bytes):
|
||||||
x = x.decode(preferred_encoding)
|
x = x.decode(preferred_encoding)
|
||||||
return _ncxc(x)
|
return _ncxc(x)
|
||||||
|
@ -394,8 +394,11 @@ clean_xml_chars(PyObject *self, PyObject *text) {
|
|||||||
// based on https://en.wikipedia.org/wiki/Valid_characters_in_XML#Non-restricted_characters
|
// based on https://en.wikipedia.org/wiki/Valid_characters_in_XML#Non-restricted_characters
|
||||||
// python 3.3+ unicode strings never contain surrogate pairs, since if
|
// python 3.3+ unicode strings never contain surrogate pairs, since if
|
||||||
// they did, they would be represented as UTF-32
|
// they did, they would be represented as UTF-32
|
||||||
if ((0x20 <= ch && ch <= 0xd7ff && ch != 0x7f) ||
|
if ((0x20 <= ch && ch <= 0x7e) ||
|
||||||
ch == 9 || ch == 10 || ch == 13 ||
|
ch == 0x9 || ch == 0xa || ch == 0xd || ch == 0x85 ||
|
||||||
|
(0x00A0 <= ch && ch <= 0xD7FF) ||
|
||||||
|
(0xE000 <= ch && ch <= 0xFDCF) ||
|
||||||
|
(0xFDF0 <= ch && ch <= 0xFFFD) ||
|
||||||
(0xffff < ch && ch <= 0x10ffff)) {
|
(0xffff < ch && ch <= 0x10ffff)) {
|
||||||
PyUnicode_WRITE(text_kind, result_text, target_i, ch);
|
PyUnicode_WRITE(text_kind, result_text, target_i, ch);
|
||||||
target_i += 1;
|
target_i += 1;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user