From fd2e3db07abf0541d3750ed21bfaa39f40d19561 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 16 Feb 2011 14:34:09 -0700 Subject: [PATCH] When trying to detect the encoding of html, do not use more than the first 10KB so that detection is not too slow --- src/calibre/ebooks/chardet/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/chardet/__init__.py b/src/calibre/ebooks/chardet/__init__.py index f9bca3c8d4..10cd0a7112 100644 --- a/src/calibre/ebooks/chardet/__init__.py +++ b/src/calibre/ebooks/chardet/__init__.py @@ -53,7 +53,7 @@ _CHARSET_ALIASES = { "macintosh" : "mac-roman", def force_encoding(raw, verbose, assume_utf8=False): from calibre.constants import preferred_encoding try: - chardet = detect(raw) + chardet = detect(raw[:1024*10]) except: chardet = {'encoding':preferred_encoding, 'confidence':0} encoding = chardet['encoding']