diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py index 65a97dba4d..afeb93bc1f 100644 --- a/src/calibre/ebooks/txt/processor.py +++ b/src/calibre/ebooks/txt/processor.py @@ -231,14 +231,28 @@ def opf_writer(path, opf_name, manifest, spine, mi): opf.render(opffile) +def split_utf8(s, n): + """Split UTF-8 s into chunks of maximum length n.""" + if n < 3: + raise ValueError(f'Cannot split into chunks of less than {n} < 4 bytes') + s = memoryview(s) + while len(s) > n: + k = n + while (s[k] & 0xc0) == 0x80: + k -= 1 + yield bytes(s[:k]) + s = s[k:] + yield bytes(s) + + def split_string_separator(txt, size): ''' Splits the text by putting \n\n at the point size. ''' - if len(txt) > size and size > 2: + if len(txt) > size and size > 3: size -= 2 ans = [] - for part in (txt[i * size: (i + 1) * size] for i in range(0, len(txt), size)): + for part in split_utf8(txt, size): idx = part.rfind(b'.') if idx == -1: part += b'\n\n'