mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
When trying to detect the encoding of html, do not use more than the first 10KB so that detection is not too slow
This commit is contained in:
parent
a904d5d192
commit
fd2e3db07a
@ -53,7 +53,7 @@ _CHARSET_ALIASES = { "macintosh" : "mac-roman",
|
||||
def force_encoding(raw, verbose, assume_utf8=False):
|
||||
from calibre.constants import preferred_encoding
|
||||
try:
|
||||
chardet = detect(raw)
|
||||
chardet = detect(raw[:1024*10])
|
||||
except:
|
||||
chardet = {'encoding':preferred_encoding, 'confidence':0}
|
||||
encoding = chardet['encoding']
|
||||
|
Loading…
x
Reference in New Issue
Block a user