Some performance improvements for BZZDecoder

This commit is contained in:
Kovid Goyal 2014-03-03 11:46:52 +05:30
parent 90aa34a473
commit 139ba8fd5b
2 changed files with 18 additions and 23 deletions

View File

@ -13,7 +13,6 @@ __copyright__ = '2011, Anthon van der Neut <A.van.der.Neut@ruamel.eu>'
import sys import sys
import struct import struct
from cStringIO import StringIO
from calibre.ebooks.djvu.djvubzzdec import BZZDecoder from calibre.ebooks.djvu.djvubzzdec import BZZDecoder
@ -65,14 +64,14 @@ class DjvuChunk(object):
out.write(b'%s%s [%d]\n' % (self.type, out.write(b'%s%s [%d]\n' % (self.type,
b':' + self.subtype if self.subtype else b'', self.size)) b':' + self.subtype if self.subtype else b'', self.size))
if txtout and self.type == b'TXTz': if txtout and self.type == b'TXTz':
inbuf = StringIO(self.buf[self.datastart: self.dataend]) inbuf = bytearray(self.buf[self.datastart: self.dataend])
outbuf = StringIO() outbuf = bytearray()
decoder = BZZDecoder(inbuf, outbuf) decoder = BZZDecoder(inbuf, outbuf)
while True: while True:
xxres = decoder.convert(1024 * 1024) xxres = decoder.convert(1024 * 1024)
if not xxres: if not xxres:
break break
res = outbuf.getvalue() res = bytes(outbuf)
if not res.strip(b'\0'): if not res.strip(b'\0'):
raise ValueError('TXTz block is completely null') raise ValueError('TXTz block is completely null')
l = 0 l = 0

View File

@ -388,13 +388,11 @@ xmtf = (
) )
# }}} # }}}
def chr3(l):
return bytes(bytearray(l))
class BZZDecoder(): class BZZDecoder():
def __init__(self, infile, outfile): def __init__(self, infile, outfile):
self.instream = infile self.instream = infile
self.inptr = 0
self.outf = outfile self.outf = outfile
self.ieof = False self.ieof = False
self.bptr = None self.bptr = None
@ -442,7 +440,7 @@ class BZZDecoder():
if self.ieof: if self.ieof:
return 0 return 0
copied = 0 copied = 0
while sz > 0 and not (self.ieof): while sz > 0 and not self.ieof:
# Decode if needed # Decode if needed
if not self.xsize: if not self.xsize:
self.bptr = 0 self.bptr = 0
@ -455,8 +453,7 @@ class BZZDecoder():
remaining = min(sz, self.xsize) remaining = min(sz, self.xsize)
# Transfer # Transfer
if remaining > 0: if remaining > 0:
raw = bytes(bytearray(self.outbuf[self.bptr:self.bptr + remaining])) self.outf.extend(self.outbuf[self.bptr:self.bptr + remaining])
self.outf.write(raw)
self.xsize -= remaining self.xsize -= remaining
self.bptr += remaining self.bptr += remaining
sz -= remaining sz -= remaining
@ -466,7 +463,7 @@ class BZZDecoder():
def preload(self): def preload(self):
while self.scount <= 24: while self.scount <= 24:
if self.read_byte() < 1: if not self.read_byte():
self.byte = 0xff self.byte = 0xff
self.delay -= 1 self.delay -= 1
if self.delay < 1: if self.delay < 1:
@ -502,7 +499,7 @@ class BZZDecoder():
# Decode # Decode
mtfno = 3 mtfno = 3
markerpos = -1 markerpos = -1
for i in range(self.xsize): for i in xrange(self.xsize):
ctxid = CTXIDS - 1 ctxid = CTXIDS - 1
if ctxid > mtfno: if ctxid > mtfno:
ctxid = mtfno ctxid = mtfno
@ -712,15 +709,12 @@ class BZZDecoder():
return res return res
def read_byte(self): def read_byte(self):
res = 0 try:
if self.instream: self.byte = self.instream[self.inptr]
ires = self.instream.read(1) self.inptr += 1
res = len(ires) return True
if res: except IndexError:
self.byte = ord(ires[0]) return False
else:
raise NotImplementedError
return res
def ffz(self): def ffz(self):
x = self.a x = self.a
@ -733,13 +727,15 @@ class BZZDecoder():
# for testing # for testing
def main(): def main():
import sys import sys
infile = file(sys.argv[1], "rb") infile = bytearray(file(sys.argv[1], "rb").read())
outfile = file(sys.argv[2], "wb") outfile = bytearray()
dec = BZZDecoder(infile, outfile) dec = BZZDecoder(infile, outfile)
while True: while True:
res = dec.convert(1024 * 1024) res = dec.convert(1024 * 1024)
if not res: if not res:
break break
with open(sys.argv[2], 'wb') as f:
f.write(bytes(outfile))
if __name__ == "__main__": if __name__ == "__main__":
main() main()