When trying to detect the encoding of html, do not use more than the first 10KB so that detection is not too slow

2025-07-09 03:04:10 -04:00 · 2011-02-16 14:34:09 -07:00 · 2011-02-16 14:34:09 -07:00 · fd2e3db07a
commit fd2e3db07a
parent a904d5d192
1 changed files with 1 additions and 1 deletions
--- a/src/calibre/ebooks/chardet/init.py
+++ b/src/calibre/ebooks/chardet/init.py
@ -53,7 +53,7 @@ _CHARSET_ALIASES = { "macintosh" : "mac-roman",
 def force_encoding(raw, verbose, assume_utf8=False):
    from calibre.constants import preferred_encoding
    try:
-        chardet = detect(raw)
+        chardet = detect(raw[:1024*10])
    except:
        chardet = {'encoding':preferred_encoding, 'confidence':0}
    encoding = chardet['encoding']