splitted dump text and structure

This commit is contained in:
Anthon 2011-10-04 12:36:08 +02:00
parent e46e67949a
commit bfade8c911

View File

@ -57,13 +57,11 @@ class DjvuChunk(object):
if verbose > 0: if verbose > 0:
print ' end of chunk %d (%x)' % (pos, pos) print ' end of chunk %d (%x)' % (pos, pos)
def dump(self, verbose=0, indent=1, out=None): def dump(self, verbose=0, indent=1, out=None, txtout=None, maxlevel=100):
if out is None: if out:
out = sys.stdout
if verbose > 0:
out.write(' ' * indent) out.write(' ' * indent)
out.write('%s%s [%d]\n' % (self.type, ':' + self.subtype if self.subtype else '', self.size)) out.write('%s%s [%d]\n' % (self.type, ':' + self.subtype if self.subtype else '', self.size))
if self.type == 'TXTz': if txtout and self.type == 'TXTz':
inbuf = StringIO(self.buf[self.datastart: self.dataend]) inbuf = StringIO(self.buf[self.datastart: self.dataend])
outbuf = StringIO() outbuf = StringIO()
decoder = BZZDecoder(inbuf, outbuf) decoder = BZZDecoder(inbuf, outbuf)
@ -76,33 +74,38 @@ class DjvuChunk(object):
for x in res[:3]: for x in res[:3]:
l <<= 8 l <<= 8
l += ord(x) l += ord(x)
if verbose > 0: if verbose > 0 and out:
print >> out, l print >> out, l
out.write(res[3:3+l]) txtout.write(res[3:3+l])
out.write('\n\f') txtout.write('\n\f')
if self.type == 'TXTa': if txtout and self.type == 'TXTa':
res = self.buf[self.datastart: self.dataend] res = self.buf[self.datastart: self.dataend]
l = 0 l = 0
for x in res[:3]: for x in res[:3]:
l <<= 8 l <<= 8
l += ord(x) l += ord(x)
if verbose > 0: if verbose > 0 and out:
print >> out, l print >> out, l
out.write(res[3:3+l]) txtout.write(res[3:3+l])
out.write('\n\f') txtout.write('\n\f')
if indent >= maxlevel:
return
for schunk in self._subchunks: for schunk in self._subchunks:
schunk.dump(verbose=verbose, indent=indent+1, out=out) schunk.dump(verbose=verbose, indent=indent+1, out=out, txtout=txtout)
class DJVUFile(object): class DJVUFile(object):
def __init__(self, instream): def __init__(self, instream, verbose=0):
self.instream = instream self.instream = instream
buf = self.instream.read(4) buf = self.instream.read(4)
assert(buf == 'AT&T') assert(buf == 'AT&T')
buf = self.instream.read() buf = self.instream.read()
self.dc = DjvuChunk(buf, 0, len(buf)) self.dc = DjvuChunk(buf, 0, len(buf), verbose=verbose)
def get_text(self, outfile=None): def get_text(self, outfile=None):
self.dc.dump(out=outfile) self.dc.dump(txtout=outfile)
def dump(self, outfile=None, maxlevel=0):
self.dc.dump(out=outfile, maxlevel=maxlevel)
def main(): def main():
from ruamel.util.program import Program, CountAction from ruamel.util.program import Program, CountAction
@ -117,13 +120,18 @@ def main():
#self._argparser.add_argument('--segments', '-s', action='append', nargs='+') #self._argparser.add_argument('--segments', '-s', action='append', nargs='+')
#self._argparser.add_argument('--force', '-f', action='store_true') #self._argparser.add_argument('--force', '-f', action='store_true')
#self._argparser.add_argument('classname') #self._argparser.add_argument('classname')
self._argparser.add_argument('--text', '-t', action='store_true')
self._argparser.add_argument('--dump', type=int, default=0)
self._argparser.add_argument('file', nargs='+') self._argparser.add_argument('file', nargs='+')
def run(self): def run(self):
if self._args.verbose > 1: # can be negative with --quiet if self._args.verbose > 1: # can be negative with --quiet
print self._args.file print self._args.file
x = DJVUFile(file(self._args.file[0], 'rb')) x = DJVUFile(file(self._args.file[0], 'rb'), verbose=self._args.verbose)
x.get_text() if self._args.text:
print x.get_text(sys.stdout)
if self._args.dump:
x.dump(sys.stdout, maxlevel=self._args.dump)
return 0 return 0
tt = DJVUDecoder() tt = DJVUDecoder()