mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-11-03 19:17:02 -05:00 
			
		
		
		
	TXT Input: When detecting the encoding of txt files only use the first four kilobytes of text. Fixes excessively slow conversion of very large text files. See #1668246 (Txt file to mobi file, more than 19M dead loop.)
This commit is contained in:
		
							parent
							
								
									3a515c1db6
								
							
						
					
					
						commit
						c6eaede439
					
				@ -110,7 +110,7 @@ class TXTInput(InputFormatPlugin):
 | 
				
			|||||||
            ienc = options.input_encoding
 | 
					            ienc = options.input_encoding
 | 
				
			||||||
            log.debug('Using user specified input encoding of %s' % ienc)
 | 
					            log.debug('Using user specified input encoding of %s' % ienc)
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            det_encoding = detect(txt)
 | 
					            det_encoding = detect(txt[:4096])
 | 
				
			||||||
            det_encoding, confidence = det_encoding['encoding'], det_encoding['confidence']
 | 
					            det_encoding, confidence = det_encoding['encoding'], det_encoding['confidence']
 | 
				
			||||||
            if det_encoding and det_encoding.lower().replace('_', '-').strip() in (
 | 
					            if det_encoding and det_encoding.lower().replace('_', '-').strip() in (
 | 
				
			||||||
                    'gb2312', 'chinese', 'csiso58gb231280', 'euc-cn', 'euccn',
 | 
					                    'gb2312', 'chinese', 'csiso58gb231280', 'euc-cn', 'euccn',
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user