From c6eaede439fb3b39b58552747d5f8bf80755b730 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 8 Mar 2017 09:53:34 +0530 Subject: [PATCH] TXT Input: When detecting the encoding of txt files only use the first four kilobytes of text. Fixes excessively slow conversion of very large text files. See #1668246 (Txt file to mobi file, more than 19M dead loop.) --- src/calibre/ebooks/conversion/plugins/txt_input.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/conversion/plugins/txt_input.py b/src/calibre/ebooks/conversion/plugins/txt_input.py index d5d6814830..81b5543d6b 100644 --- a/src/calibre/ebooks/conversion/plugins/txt_input.py +++ b/src/calibre/ebooks/conversion/plugins/txt_input.py @@ -110,7 +110,7 @@ class TXTInput(InputFormatPlugin): ienc = options.input_encoding log.debug('Using user specified input encoding of %s' % ienc) else: - det_encoding = detect(txt) + det_encoding = detect(txt[:4096]) det_encoding, confidence = det_encoding['encoding'], det_encoding['confidence'] if det_encoding and det_encoding.lower().replace('_', '-').strip() in ( 'gb2312', 'chinese', 'csiso58gb231280', 'euc-cn', 'euccn',