From 139ba8fd5b00d64926d055024ecaf90c1f50d555 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 3 Mar 2014 11:46:52 +0530 Subject: [PATCH] Some performance improvements for BZZDecoder --- src/calibre/ebooks/djvu/djvu.py | 7 +++--- src/calibre/ebooks/djvu/djvubzzdec.py | 34 ++++++++++++--------------- 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/src/calibre/ebooks/djvu/djvu.py b/src/calibre/ebooks/djvu/djvu.py index 7eebbca52b..6533bc89fb 100644 --- a/src/calibre/ebooks/djvu/djvu.py +++ b/src/calibre/ebooks/djvu/djvu.py @@ -13,7 +13,6 @@ __copyright__ = '2011, Anthon van der Neut ' import sys import struct -from cStringIO import StringIO from calibre.ebooks.djvu.djvubzzdec import BZZDecoder @@ -65,14 +64,14 @@ class DjvuChunk(object): out.write(b'%s%s [%d]\n' % (self.type, b':' + self.subtype if self.subtype else b'', self.size)) if txtout and self.type == b'TXTz': - inbuf = StringIO(self.buf[self.datastart: self.dataend]) - outbuf = StringIO() + inbuf = bytearray(self.buf[self.datastart: self.dataend]) + outbuf = bytearray() decoder = BZZDecoder(inbuf, outbuf) while True: xxres = decoder.convert(1024 * 1024) if not xxres: break - res = outbuf.getvalue() + res = bytes(outbuf) if not res.strip(b'\0'): raise ValueError('TXTz block is completely null') l = 0 diff --git a/src/calibre/ebooks/djvu/djvubzzdec.py b/src/calibre/ebooks/djvu/djvubzzdec.py index 6e8ebe54ae..97a654ae3a 100644 --- a/src/calibre/ebooks/djvu/djvubzzdec.py +++ b/src/calibre/ebooks/djvu/djvubzzdec.py @@ -388,13 +388,11 @@ xmtf = ( ) # }}} -def chr3(l): - return bytes(bytearray(l)) - class BZZDecoder(): def __init__(self, infile, outfile): self.instream = infile + self.inptr = 0 self.outf = outfile self.ieof = False self.bptr = None @@ -442,7 +440,7 @@ class BZZDecoder(): if self.ieof: return 0 copied = 0 - while sz > 0 and not (self.ieof): + while sz > 0 and not self.ieof: # Decode if needed if not self.xsize: self.bptr = 0 @@ -455,8 +453,7 @@ class BZZDecoder(): remaining = min(sz, self.xsize) # Transfer if remaining > 0: - raw = bytes(bytearray(self.outbuf[self.bptr:self.bptr + remaining])) - self.outf.write(raw) + self.outf.extend(self.outbuf[self.bptr:self.bptr + remaining]) self.xsize -= remaining self.bptr += remaining sz -= remaining @@ -466,7 +463,7 @@ class BZZDecoder(): def preload(self): while self.scount <= 24: - if self.read_byte() < 1: + if not self.read_byte(): self.byte = 0xff self.delay -= 1 if self.delay < 1: @@ -502,7 +499,7 @@ class BZZDecoder(): # Decode mtfno = 3 markerpos = -1 - for i in range(self.xsize): + for i in xrange(self.xsize): ctxid = CTXIDS - 1 if ctxid > mtfno: ctxid = mtfno @@ -712,15 +709,12 @@ class BZZDecoder(): return res def read_byte(self): - res = 0 - if self.instream: - ires = self.instream.read(1) - res = len(ires) - if res: - self.byte = ord(ires[0]) - else: - raise NotImplementedError - return res + try: + self.byte = self.instream[self.inptr] + self.inptr += 1 + return True + except IndexError: + return False def ffz(self): x = self.a @@ -733,13 +727,15 @@ class BZZDecoder(): # for testing def main(): import sys - infile = file(sys.argv[1], "rb") - outfile = file(sys.argv[2], "wb") + infile = bytearray(file(sys.argv[1], "rb").read()) + outfile = bytearray() dec = BZZDecoder(infile, outfile) while True: res = dec.convert(1024 * 1024) if not res: break + with open(sys.argv[2], 'wb') as f: + f.write(bytes(outfile)) if __name__ == "__main__": main()