From 0600e911f1dde5b00c195c12917caaadb947cd6f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 2 Nov 2011 09:14:21 +0530 Subject: [PATCH] HTML Input: Limit emory consumption when converting HTML files that link to lage binary files. Fixes #884821 (python terminated) --- src/calibre/ebooks/html/input.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py index 3de116c411..3688668bfe 100644 --- a/src/calibre/ebooks/html/input.py +++ b/src/calibre/ebooks/html/input.py @@ -109,14 +109,16 @@ class HTMLFile(object): try: with open(self.path, 'rb') as f: - src = f.read() + src = f.read(4096) + self.is_binary = level > 0 and not bool(self.HTML_PAT.search(src)) + if not self.is_binary: + src += f.read() except IOError as err: msg = 'Could not read from file: %s with error: %s'%(self.path, as_unicode(err)) if level == 0: raise IOError(msg) raise IgnoreFile(msg, err.errno) - self.is_binary = level > 0 and not bool(self.HTML_PAT.search(src[:4096])) if not self.is_binary: if not encoding: encoding = xml_to_unicode(src[:4096], verbose=verbose)[-1]