mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Edit book: Fix incorrect syntax highlighting on linux if the text contains non-BMP unicode characters.
This commit is contained in:
parent
7ee75a8775
commit
d337debc92
@ -6,6 +6,7 @@ from __future__ import (unicode_literals, division, absolute_import,
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
|
||||
from PyQt4.Qt import (
|
||||
@ -13,10 +14,14 @@ from PyQt4.Qt import (
|
||||
|
||||
from ..themes import highlight_to_char_format
|
||||
from calibre.gui2.tweak_book.widgets import BusyCursor
|
||||
from calibre.utils.icu import utf16_length
|
||||
|
||||
is_wide_build = sys.maxunicode >= 0x10ffff
|
||||
|
||||
def run_loop(user_data, state_map, formats, text):
|
||||
state = user_data.state
|
||||
i = 0
|
||||
fix_offsets = is_wide_build and utf16_length(text) != len(text)
|
||||
seen_states = defaultdict(set)
|
||||
while i < len(text):
|
||||
orig_i = i
|
||||
@ -24,7 +29,12 @@ def run_loop(user_data, state_map, formats, text):
|
||||
fmt = state_map[state.parse](state, text, i, formats, user_data)
|
||||
for num, f in fmt:
|
||||
if num > 0:
|
||||
yield i, num, f
|
||||
if fix_offsets:
|
||||
# We need to map offsets/lengths from UCS-4 to UTF-16 in
|
||||
# which non-BMP characters are two code points wide
|
||||
yield utf16_length(text[:i]), utf16_length(text[i:i+num]), f
|
||||
else:
|
||||
yield i, num, f
|
||||
i += num
|
||||
if orig_i == i and state.parse in seen_states[i]:
|
||||
# Something went wrong in the syntax highlighter
|
||||
|
@ -486,6 +486,7 @@ if __name__ == '__main__':
|
||||
<input disabled><input disabled /><span attr=<></span>
|
||||
<!-- Non-breaking spaces are rendered differently from normal spaces, so that they stand out -->
|
||||
<p>Some\xa0words\xa0separated\xa0by\xa0non\u2011breaking\xa0spaces and non\u2011breaking hyphens.</p>
|
||||
<p>Some non-BMP unicode text:\U0001f431\U0001f431\U0001f431</p>
|
||||
</body>
|
||||
</html>
|
||||
''', path_is_raw=True)
|
||||
|
Loading…
x
Reference in New Issue
Block a user