From fd2e3db07abf0541d3750ed21bfaa39f40d19561 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 16 Feb 2011 14:34:09 -0700
Subject: [PATCH] When trying to detect the encoding of html, do not use more
 than the first 10KB so that detection is not too slow

---
 src/calibre/ebooks/chardet/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/chardet/__init__.py b/src/calibre/ebooks/chardet/__init__.py
index f9bca3c8d4..10cd0a7112 100644
--- a/src/calibre/ebooks/chardet/__init__.py
+++ b/src/calibre/ebooks/chardet/__init__.py
@@ -53,7 +53,7 @@ _CHARSET_ALIASES = { "macintosh" : "mac-roman",
 def force_encoding(raw, verbose, assume_utf8=False):
     from calibre.constants import preferred_encoding
     try:
-        chardet = detect(raw)
+        chardet = detect(raw[:1024*10])
     except:
         chardet = {'encoding':preferred_encoding, 'confidence':0}
     encoding = chardet['encoding']