diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py
index b2f4cc9bf0..0add461b14 100644
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@@ -17,7 +17,7 @@ from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.conversion.preprocess import DocAnalysis
from calibre.utils.cleantext import clean_ascii_chars
-from polyglot.builtins import iteritems, unicode_type, map, range, long_type
+from polyglot.builtins import iteritems, unicode_type, map, range
HTML_TEMPLATE = '
%s \n%s\n'
@@ -53,7 +53,7 @@ def clean_txt(txt):
def split_txt(txt, epub_split_size_kb=0):
'''
Ensure there are split points for converting
- to EPUB. A misdetected paragraph type can
+ to EPUB. A mis-detected paragraph type can
result in the entire document being one giant
paragraph. In this case the EPUB parser will not
be able to determine where to split the file
@@ -64,16 +64,14 @@ def split_txt(txt, epub_split_size_kb=0):
if epub_split_size_kb > 0:
if isinstance(txt, unicode_type):
txt = txt.encode('utf-8')
- length_byte = len(txt)
- # Calculating the average chunk value for easy splitting as EPUB (+2 as a safe margin)
- chunk_size = long_type(length_byte / (int(length_byte / (epub_split_size_kb * 1024)) + 2))
- # if there are chunks with a superior size then go and break
- parts = txt.split(b'\n\n')
- lengths = tuple(map(len, parts))
- if lengths and max(lengths) > chunk_size:
- txt = b'\n\n'.join([
- split_string_separator(line, chunk_size) for line in parts
- ])
+ if len(txt) > epub_split_size_kb * 1024:
+ chunk_size = max(16, epub_split_size_kb - 32) * 1024
+ # if there are chunks with a superior size then go and break
+ parts = txt.split(b'\n\n')
+ if parts and max(map(len, parts)) > chunk_size:
+ txt = b'\n\n'.join(
+ split_string_separator(line, chunk_size) for line in parts
+ )
if isbytestring(txt):
txt = txt.decode('utf-8')
@@ -242,15 +240,15 @@ def split_string_separator(txt, size):
'''
if len(txt) > size and size > 2:
size -= 2
- txt = []
+ ans = []
for part in (txt[i * size: (i + 1) * size] for i in range(0, len(txt), size)):
idx = part.rfind(b'.')
if idx == -1:
part += b'\n\n'
else:
part = part[:idx + 1] + b'\n\n' + part[idx:]
- txt.append(part)
- txt = b''.join(txt)
+ ans.append(part)
+ txt = b''.join(ans)
return txt