mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #1917889 [carlibre epub reader got a bug](https://bugs.launchpad.net/calibre/+bug/1917889)
This commit is contained in:
parent
f7d80b53e6
commit
5d126692f8
@ -6,7 +6,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import re, codecs
|
import re, codecs, sys
|
||||||
from polyglot.builtins import unicode_type
|
from polyglot.builtins import unicode_type
|
||||||
|
|
||||||
_encoding_pats = (
|
_encoding_pats = (
|
||||||
@ -106,32 +106,30 @@ _CHARSET_ALIASES = {"macintosh" : "mac-roman", "x-sjis" : "shift-jis"}
|
|||||||
|
|
||||||
|
|
||||||
def detect(bytestring):
|
def detect(bytestring):
|
||||||
try:
|
|
||||||
from cchardet import detect as implementation
|
from cchardet import detect as implementation
|
||||||
except ImportError:
|
|
||||||
from chardet import detect as implementation
|
|
||||||
return implementation(bytestring)
|
|
||||||
else:
|
|
||||||
ans = implementation(bytestring)
|
ans = implementation(bytestring)
|
||||||
enc = ans.get('encoding')
|
enc = ans.get('encoding')
|
||||||
if enc:
|
if enc:
|
||||||
ans['encoding'] = enc.lower()
|
ans['encoding'] = enc.lower()
|
||||||
|
elif enc is None:
|
||||||
|
ans['encoding'] = ''
|
||||||
|
if ans.get('confidence') is None:
|
||||||
|
ans['confidence'] = 0
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
|
||||||
def force_encoding(raw, verbose, assume_utf8=False):
|
def force_encoding(raw, verbose, assume_utf8=False):
|
||||||
from calibre.constants import preferred_encoding
|
from calibre.constants import preferred_encoding
|
||||||
|
|
||||||
try:
|
try:
|
||||||
chardet = detect(raw[:1024*50])
|
chardet = detect(raw[:1024*50])
|
||||||
except:
|
except Exception:
|
||||||
chardet = {'encoding':preferred_encoding, 'confidence':0}
|
chardet = {'encoding':preferred_encoding, 'confidence':0}
|
||||||
encoding = chardet['encoding']
|
encoding = chardet['encoding']
|
||||||
if chardet['confidence'] < 1 and assume_utf8:
|
if chardet['confidence'] < 1:
|
||||||
|
if verbose:
|
||||||
|
print(f'WARNING: Encoding detection confidence for {chardet["encoding"]} is {chardet["confidence"]}', file=sys.stderr)
|
||||||
|
if assume_utf8:
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
if chardet['confidence'] < 1 and verbose:
|
|
||||||
print('WARNING: Encoding detection confidence for %s is %d%%'%(
|
|
||||||
chardet['encoding'], chardet['confidence']*100))
|
|
||||||
if not encoding:
|
if not encoding:
|
||||||
encoding = preferred_encoding
|
encoding = preferred_encoding
|
||||||
encoding = encoding.lower()
|
encoding = encoding.lower()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user