mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
TXT Input: When detecting the encoding of txt files only use the first four kilobytes of text. Fixes excessively slow conversion of very large text files. See #1668246 (Txt file to mobi file, more than 19M dead loop.)
This commit is contained in:
parent
3a515c1db6
commit
c6eaede439
@ -110,7 +110,7 @@ class TXTInput(InputFormatPlugin):
|
|||||||
ienc = options.input_encoding
|
ienc = options.input_encoding
|
||||||
log.debug('Using user specified input encoding of %s' % ienc)
|
log.debug('Using user specified input encoding of %s' % ienc)
|
||||||
else:
|
else:
|
||||||
det_encoding = detect(txt)
|
det_encoding = detect(txt[:4096])
|
||||||
det_encoding, confidence = det_encoding['encoding'], det_encoding['confidence']
|
det_encoding, confidence = det_encoding['encoding'], det_encoding['confidence']
|
||||||
if det_encoding and det_encoding.lower().replace('_', '-').strip() in (
|
if det_encoding and det_encoding.lower().replace('_', '-').strip() in (
|
||||||
'gb2312', 'chinese', 'csiso58gb231280', 'euc-cn', 'euccn',
|
'gb2312', 'chinese', 'csiso58gb231280', 'euc-cn', 'euccn',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user