mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Edit book: Fix incorrect syntax highlighting on linux if the text contains non-BMP unicode characters.
This commit is contained in:
parent
7ee75a8775
commit
d337debc92
@ -6,6 +6,7 @@ from __future__ import (unicode_literals, division, absolute_import,
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
|
import sys
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
from PyQt4.Qt import (
|
from PyQt4.Qt import (
|
||||||
@ -13,10 +14,14 @@ from PyQt4.Qt import (
|
|||||||
|
|
||||||
from ..themes import highlight_to_char_format
|
from ..themes import highlight_to_char_format
|
||||||
from calibre.gui2.tweak_book.widgets import BusyCursor
|
from calibre.gui2.tweak_book.widgets import BusyCursor
|
||||||
|
from calibre.utils.icu import utf16_length
|
||||||
|
|
||||||
|
is_wide_build = sys.maxunicode >= 0x10ffff
|
||||||
|
|
||||||
def run_loop(user_data, state_map, formats, text):
|
def run_loop(user_data, state_map, formats, text):
|
||||||
state = user_data.state
|
state = user_data.state
|
||||||
i = 0
|
i = 0
|
||||||
|
fix_offsets = is_wide_build and utf16_length(text) != len(text)
|
||||||
seen_states = defaultdict(set)
|
seen_states = defaultdict(set)
|
||||||
while i < len(text):
|
while i < len(text):
|
||||||
orig_i = i
|
orig_i = i
|
||||||
@ -24,7 +29,12 @@ def run_loop(user_data, state_map, formats, text):
|
|||||||
fmt = state_map[state.parse](state, text, i, formats, user_data)
|
fmt = state_map[state.parse](state, text, i, formats, user_data)
|
||||||
for num, f in fmt:
|
for num, f in fmt:
|
||||||
if num > 0:
|
if num > 0:
|
||||||
yield i, num, f
|
if fix_offsets:
|
||||||
|
# We need to map offsets/lengths from UCS-4 to UTF-16 in
|
||||||
|
# which non-BMP characters are two code points wide
|
||||||
|
yield utf16_length(text[:i]), utf16_length(text[i:i+num]), f
|
||||||
|
else:
|
||||||
|
yield i, num, f
|
||||||
i += num
|
i += num
|
||||||
if orig_i == i and state.parse in seen_states[i]:
|
if orig_i == i and state.parse in seen_states[i]:
|
||||||
# Something went wrong in the syntax highlighter
|
# Something went wrong in the syntax highlighter
|
||||||
|
@ -486,6 +486,7 @@ if __name__ == '__main__':
|
|||||||
<input disabled><input disabled /><span attr=<></span>
|
<input disabled><input disabled /><span attr=<></span>
|
||||||
<!-- Non-breaking spaces are rendered differently from normal spaces, so that they stand out -->
|
<!-- Non-breaking spaces are rendered differently from normal spaces, so that they stand out -->
|
||||||
<p>Some\xa0words\xa0separated\xa0by\xa0non\u2011breaking\xa0spaces and non\u2011breaking hyphens.</p>
|
<p>Some\xa0words\xa0separated\xa0by\xa0non\u2011breaking\xa0spaces and non\u2011breaking hyphens.</p>
|
||||||
|
<p>Some non-BMP unicode text:\U0001f431\U0001f431\U0001f431</p>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
''', path_is_raw=True)
|
''', path_is_raw=True)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user