diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py index 3de116c411..3688668bfe 100644 --- a/src/calibre/ebooks/html/input.py +++ b/src/calibre/ebooks/html/input.py @@ -109,14 +109,16 @@ class HTMLFile(object): try: with open(self.path, 'rb') as f: - src = f.read() + src = f.read(4096) + self.is_binary = level > 0 and not bool(self.HTML_PAT.search(src)) + if not self.is_binary: + src += f.read() except IOError as err: msg = 'Could not read from file: %s with error: %s'%(self.path, as_unicode(err)) if level == 0: raise IOError(msg) raise IgnoreFile(msg, err.errno) - self.is_binary = level > 0 and not bool(self.HTML_PAT.search(src[:4096])) if not self.is_binary: if not encoding: encoding = xml_to_unicode(src[:4096], verbose=verbose)[-1]