mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
DJVU Input: When extracting embedded txt from TXTz sections in DJVU files, fix incorrect extraction of txt when the djvutxt external program is not present. Fixes #1286771 [converting DJVU file containing text fails](https://bugs.launchpad.net/calibre/+bug/1286771)
This commit is contained in:
parent
cc26b6e27f
commit
90aa34a473
@ -15,7 +15,7 @@ import sys
|
|||||||
import struct
|
import struct
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
|
|
||||||
from .djvubzzdec import BZZDecoder
|
from calibre.ebooks.djvu.djvubzzdec import BZZDecoder
|
||||||
|
|
||||||
class DjvuChunk(object):
|
class DjvuChunk(object):
|
||||||
def __init__(self, buf, start, end, align=True, bigendian=True,
|
def __init__(self, buf, start, end, align=True, bigendian=True,
|
||||||
@ -73,14 +73,16 @@ class DjvuChunk(object):
|
|||||||
if not xxres:
|
if not xxres:
|
||||||
break
|
break
|
||||||
res = outbuf.getvalue()
|
res = outbuf.getvalue()
|
||||||
|
if not res.strip(b'\0'):
|
||||||
|
raise ValueError('TXTz block is completely null')
|
||||||
l = 0
|
l = 0
|
||||||
for x in res[:3]:
|
for x in res[:3]:
|
||||||
l <<= 8
|
l <<= 8
|
||||||
l += ord(x)
|
l += ord(x)
|
||||||
if verbose > 0 and out:
|
if verbose > 0 and out:
|
||||||
print >> out, l
|
print (l, file=out)
|
||||||
txtout.write(res[3:3+l])
|
txtout.write(res[3:3+l])
|
||||||
txtout.write(b'\n\f')
|
txtout.write(b'\n')
|
||||||
if txtout and self.type == b'TXTa':
|
if txtout and self.type == b'TXTa':
|
||||||
res = self.buf[self.datastart: self.dataend]
|
res = self.buf[self.datastart: self.dataend]
|
||||||
l = 0
|
l = 0
|
||||||
@ -88,9 +90,9 @@ class DjvuChunk(object):
|
|||||||
l <<= 8
|
l <<= 8
|
||||||
l += ord(x)
|
l += ord(x)
|
||||||
if verbose > 0 and out:
|
if verbose > 0 and out:
|
||||||
print >> out, l
|
print (l, file=out)
|
||||||
txtout.write(res[3:3+l])
|
txtout.write(res[3:3+l])
|
||||||
txtout.write(b'\n\f')
|
txtout.write(b'\n')
|
||||||
if indent >= maxlevel:
|
if indent >= maxlevel:
|
||||||
return
|
return
|
||||||
for schunk in self._subchunks:
|
for schunk in self._subchunks:
|
||||||
@ -111,36 +113,8 @@ class DJVUFile(object):
|
|||||||
self.dc.dump(out=outfile, maxlevel=maxlevel)
|
self.dc.dump(out=outfile, maxlevel=maxlevel)
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
from ruamel.util.program import Program
|
f = DJVUFile(open(sys.argv[-1], 'rb'))
|
||||||
class DJVUDecoder(Program):
|
print (f.get_text(sys.stdout))
|
||||||
def __init__(self):
|
|
||||||
Program.__init__(self)
|
|
||||||
|
|
||||||
def parser_setup(self):
|
|
||||||
Program.parser_setup(self)
|
|
||||||
#self._argparser.add_argument('--combine', '-c', action=CountAction, const=1, nargs=0)
|
|
||||||
#self._argparser.add_argument('--combine', '-c', type=int, default=1)
|
|
||||||
#self._argparser.add_argument('--segments', '-s', action='append', nargs='+')
|
|
||||||
#self._argparser.add_argument('--force', '-f', action='store_true')
|
|
||||||
#self._argparser.add_argument('classname')
|
|
||||||
self._argparser.add_argument('--text', '-t', action='store_true')
|
|
||||||
self._argparser.add_argument('--dump', type=int, default=0)
|
|
||||||
self._argparser.add_argument('file', nargs='+')
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
if self._args.verbose > 1: # can be negative with --quiet
|
|
||||||
print (self._args.file)
|
|
||||||
x = DJVUFile(file(self._args.file[0], 'rb'), verbose=self._args.verbose)
|
|
||||||
if self._args.text:
|
|
||||||
print (x.get_text(sys.stdout))
|
|
||||||
if self._args.dump:
|
|
||||||
x.dump(sys.stdout, maxlevel=self._args.dump)
|
|
||||||
return 0
|
|
||||||
|
|
||||||
tt = DJVUDecoder()
|
|
||||||
res = tt.result
|
|
||||||
if res != 0:
|
|
||||||
print (res)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
@ -80,6 +80,7 @@ MAXLEN = 1024 ** 2
|
|||||||
|
|
||||||
# Exception classes used by this module.
|
# Exception classes used by this module.
|
||||||
class BZZDecoderError(Exception):
|
class BZZDecoderError(Exception):
|
||||||
|
|
||||||
"""This exception is raised when BZZDecode runs into trouble
|
"""This exception is raised when BZZDecode runs into trouble
|
||||||
"""
|
"""
|
||||||
def __init__(self, msg):
|
def __init__(self, msg):
|
||||||
@ -391,6 +392,7 @@ def chr3(l):
|
|||||||
return bytes(bytearray(l))
|
return bytes(bytearray(l))
|
||||||
|
|
||||||
class BZZDecoder():
|
class BZZDecoder():
|
||||||
|
|
||||||
def __init__(self, infile, outfile):
|
def __init__(self, infile, outfile):
|
||||||
self.instream = infile
|
self.instream = infile
|
||||||
self.outf = outfile
|
self.outf = outfile
|
||||||
@ -450,17 +452,15 @@ class BZZDecoder():
|
|||||||
self.xsize -= 1
|
self.xsize -= 1
|
||||||
|
|
||||||
# Compute remaining
|
# Compute remaining
|
||||||
bytes = self.xsize
|
remaining = min(sz, self.xsize)
|
||||||
if bytes > sz:
|
|
||||||
bytes = sz
|
|
||||||
# Transfer
|
# Transfer
|
||||||
if bytes:
|
if remaining > 0:
|
||||||
for i in range(bytes):
|
raw = bytes(bytearray(self.outbuf[self.bptr:self.bptr + remaining]))
|
||||||
self.outf.write(chr3(self.outbuf[self.bptr + i]))
|
self.outf.write(raw)
|
||||||
self.xsize -= bytes
|
self.xsize -= remaining
|
||||||
self.bptr += bytes
|
self.bptr += remaining
|
||||||
sz -= bytes
|
sz -= remaining
|
||||||
copied += bytes
|
copied += remaining
|
||||||
# offset += bytes; // for tell()
|
# offset += bytes; // for tell()
|
||||||
return copied
|
return copied
|
||||||
|
|
||||||
@ -468,7 +468,8 @@ class BZZDecoder():
|
|||||||
while self.scount <= 24:
|
while self.scount <= 24:
|
||||||
if self.read_byte() < 1:
|
if self.read_byte() < 1:
|
||||||
self.byte = 0xff
|
self.byte = 0xff
|
||||||
if --self.delay < 1:
|
self.delay -= 1
|
||||||
|
if self.delay < 1:
|
||||||
raise BZZDecoderError("BiteStream EOF")
|
raise BZZDecoderError("BiteStream EOF")
|
||||||
self.bufint = (self.bufint << 8) | self.byte
|
self.bufint = (self.bufint << 8) | self.byte
|
||||||
self.scount += 8
|
self.scount += 8
|
||||||
@ -729,9 +730,7 @@ class BZZDecoder():
|
|||||||
return (self.ffzt[(x >> 8) & 0xff])
|
return (self.ffzt[(x >> 8) & 0xff])
|
||||||
|
|
||||||
|
|
||||||
|
# for testing
|
||||||
### for testing
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
import sys
|
import sys
|
||||||
infile = file(sys.argv[1], "rb")
|
infile = file(sys.argv[1], "rb")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user