mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
When trying to detect the encoding of html, do not use more than the first 10KB so that detection is not too slow
This commit is contained in:
parent
a904d5d192
commit
fd2e3db07a
@ -53,7 +53,7 @@ _CHARSET_ALIASES = { "macintosh" : "mac-roman",
|
|||||||
def force_encoding(raw, verbose, assume_utf8=False):
|
def force_encoding(raw, verbose, assume_utf8=False):
|
||||||
from calibre.constants import preferred_encoding
|
from calibre.constants import preferred_encoding
|
||||||
try:
|
try:
|
||||||
chardet = detect(raw)
|
chardet = detect(raw[:1024*10])
|
||||||
except:
|
except:
|
||||||
chardet = {'encoding':preferred_encoding, 'confidence':0}
|
chardet = {'encoding':preferred_encoding, 'confidence':0}
|
||||||
encoding = chardet['encoding']
|
encoding = chardet['encoding']
|
||||||
|
Loading…
x
Reference in New Issue
Block a user