From da187adea6801c96fe595ebe4e5bce434726c374 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 15 Aug 2024 08:49:53 +0530 Subject: [PATCH] Edit book: Fix searching marking incorrect text in the presence of non-BMP unicode characters. Fixes #2075970 [regex does not properly select match](https://bugs.launchpad.net/calibre/+bug/2075970) --- src/calibre/gui2/tweak_book/editor/text.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/calibre/gui2/tweak_book/editor/text.py b/src/calibre/gui2/tweak_book/editor/text.py index 2249dae1f2..cab778533d 100644 --- a/src/calibre/gui2/tweak_book/editor/text.py +++ b/src/calibre/gui2/tweak_book/editor/text.py @@ -50,6 +50,13 @@ from calibre.utils.titlecase import titlecase from polyglot.builtins import as_unicode +def adjust_for_non_bmp_chars(raw: str, start: int, end: int) -> tuple[int, int]: + adjusted_start = utf16_length(raw[:start]) + end = adjusted_start + utf16_length(raw[start:end]) + start = adjusted_start + return start, end + + def get_highlighter(syntax): if syntax: try: @@ -395,6 +402,7 @@ class TextEdit(PlainTextEdit): start, end = m.span() if start == end: return False + start, end = adjust_for_non_bmp_chars(raw, start, end) if wrap: if reverse: textpos = c.anchor() @@ -486,7 +494,7 @@ class TextEdit(PlainTextEdit): start, end = m.span() if start == end: return False - end = start + utf16_length(raw[start:end]) + start, end = adjust_for_non_bmp_chars(raw, start, end) if wrap and not complete: if reverse: textpos = c.anchor() @@ -532,7 +540,7 @@ class TextEdit(PlainTextEdit): start, end = m.span() if start == end: return False - end = start + utf16_length(raw[start:end]) + start, end = adjust_for_non_bmp_chars(raw, start, end) if reverse: start, end = end, start c.clearSelection()