Some performance improvements for BZZDecoder

This commit is contained in:
Kovid Goyal 2014-03-03 11:46:52 +05:30
parent 90aa34a473
commit 139ba8fd5b
2 changed files with 18 additions and 23 deletions

View File

@ -13,7 +13,6 @@ __copyright__ = '2011, Anthon van der Neut <A.van.der.Neut@ruamel.eu>'
import sys
import struct
from cStringIO import StringIO
from calibre.ebooks.djvu.djvubzzdec import BZZDecoder
@ -65,14 +64,14 @@ class DjvuChunk(object):
out.write(b'%s%s [%d]\n' % (self.type,
b':' + self.subtype if self.subtype else b'', self.size))
if txtout and self.type == b'TXTz':
inbuf = StringIO(self.buf[self.datastart: self.dataend])
outbuf = StringIO()
inbuf = bytearray(self.buf[self.datastart: self.dataend])
outbuf = bytearray()
decoder = BZZDecoder(inbuf, outbuf)
while True:
xxres = decoder.convert(1024 * 1024)
if not xxres:
break
res = outbuf.getvalue()
res = bytes(outbuf)
if not res.strip(b'\0'):
raise ValueError('TXTz block is completely null')
l = 0

View File

@ -388,13 +388,11 @@ xmtf = (
)
# }}}
def chr3(l):
return bytes(bytearray(l))
class BZZDecoder():
def __init__(self, infile, outfile):
self.instream = infile
self.inptr = 0
self.outf = outfile
self.ieof = False
self.bptr = None
@ -442,7 +440,7 @@ class BZZDecoder():
if self.ieof:
return 0
copied = 0
while sz > 0 and not (self.ieof):
while sz > 0 and not self.ieof:
# Decode if needed
if not self.xsize:
self.bptr = 0
@ -455,8 +453,7 @@ class BZZDecoder():
remaining = min(sz, self.xsize)
# Transfer
if remaining > 0:
raw = bytes(bytearray(self.outbuf[self.bptr:self.bptr + remaining]))
self.outf.write(raw)
self.outf.extend(self.outbuf[self.bptr:self.bptr + remaining])
self.xsize -= remaining
self.bptr += remaining
sz -= remaining
@ -466,7 +463,7 @@ class BZZDecoder():
def preload(self):
while self.scount <= 24:
if self.read_byte() < 1:
if not self.read_byte():
self.byte = 0xff
self.delay -= 1
if self.delay < 1:
@ -502,7 +499,7 @@ class BZZDecoder():
# Decode
mtfno = 3
markerpos = -1
for i in range(self.xsize):
for i in xrange(self.xsize):
ctxid = CTXIDS - 1
if ctxid > mtfno:
ctxid = mtfno
@ -712,15 +709,12 @@ class BZZDecoder():
return res
def read_byte(self):
res = 0
if self.instream:
ires = self.instream.read(1)
res = len(ires)
if res:
self.byte = ord(ires[0])
else:
raise NotImplementedError
return res
try:
self.byte = self.instream[self.inptr]
self.inptr += 1
return True
except IndexError:
return False
def ffz(self):
x = self.a
@ -733,13 +727,15 @@ class BZZDecoder():
# for testing
def main():
import sys
infile = file(sys.argv[1], "rb")
outfile = file(sys.argv[2], "wb")
infile = bytearray(file(sys.argv[1], "rb").read())
outfile = bytearray()
dec = BZZDecoder(infile, outfile)
while True:
res = dec.convert(1024 * 1024)
if not res:
break
with open(sys.argv[2], 'wb') as f:
f.write(bytes(outfile))
if __name__ == "__main__":
main()