mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
HTML Input: Limit emory consumption when converting HTML files that link to lage binary files. Fixes #884821 (python terminated)
This commit is contained in:
parent
1e07787d02
commit
0600e911f1
@ -109,14 +109,16 @@ class HTMLFile(object):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
with open(self.path, 'rb') as f:
|
with open(self.path, 'rb') as f:
|
||||||
src = f.read()
|
src = f.read(4096)
|
||||||
|
self.is_binary = level > 0 and not bool(self.HTML_PAT.search(src))
|
||||||
|
if not self.is_binary:
|
||||||
|
src += f.read()
|
||||||
except IOError as err:
|
except IOError as err:
|
||||||
msg = 'Could not read from file: %s with error: %s'%(self.path, as_unicode(err))
|
msg = 'Could not read from file: %s with error: %s'%(self.path, as_unicode(err))
|
||||||
if level == 0:
|
if level == 0:
|
||||||
raise IOError(msg)
|
raise IOError(msg)
|
||||||
raise IgnoreFile(msg, err.errno)
|
raise IgnoreFile(msg, err.errno)
|
||||||
|
|
||||||
self.is_binary = level > 0 and not bool(self.HTML_PAT.search(src[:4096]))
|
|
||||||
if not self.is_binary:
|
if not self.is_binary:
|
||||||
if not encoding:
|
if not encoding:
|
||||||
encoding = xml_to_unicode(src[:4096], verbose=verbose)[-1]
|
encoding = xml_to_unicode(src[:4096], verbose=verbose)[-1]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user