mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
DJVU Input: When extracting embedded txt from TXTz sections in DJVU files, fix incorrect extraction of txt when the djvutxt external program is not present. Fixes #1286771 [converting DJVU file containing text fails](https://bugs.launchpad.net/calibre/+bug/1286771)
This commit is contained in:
parent
cc26b6e27f
commit
90aa34a473
@ -44,7 +44,7 @@ class DJVUInput(InputFormatPlugin):
|
||||
os.remove(filename)
|
||||
ppdjvu = False
|
||||
except:
|
||||
stream.seek(0) # retry with the pure python converter
|
||||
stream.seek(0) # retry with the pure python converter
|
||||
if ppdjvu:
|
||||
from calibre.ebooks.djvu.djvu import DJVUFile
|
||||
x = DJVUFile(stream)
|
||||
|
@ -15,7 +15,7 @@ import sys
|
||||
import struct
|
||||
from cStringIO import StringIO
|
||||
|
||||
from .djvubzzdec import BZZDecoder
|
||||
from calibre.ebooks.djvu.djvubzzdec import BZZDecoder
|
||||
|
||||
class DjvuChunk(object):
|
||||
def __init__(self, buf, start, end, align=True, bigendian=True,
|
||||
@ -73,14 +73,16 @@ class DjvuChunk(object):
|
||||
if not xxres:
|
||||
break
|
||||
res = outbuf.getvalue()
|
||||
if not res.strip(b'\0'):
|
||||
raise ValueError('TXTz block is completely null')
|
||||
l = 0
|
||||
for x in res[:3]:
|
||||
l <<= 8
|
||||
l += ord(x)
|
||||
if verbose > 0 and out:
|
||||
print >> out, l
|
||||
print (l, file=out)
|
||||
txtout.write(res[3:3+l])
|
||||
txtout.write(b'\n\f')
|
||||
txtout.write(b'\n')
|
||||
if txtout and self.type == b'TXTa':
|
||||
res = self.buf[self.datastart: self.dataend]
|
||||
l = 0
|
||||
@ -88,9 +90,9 @@ class DjvuChunk(object):
|
||||
l <<= 8
|
||||
l += ord(x)
|
||||
if verbose > 0 and out:
|
||||
print >> out, l
|
||||
print (l, file=out)
|
||||
txtout.write(res[3:3+l])
|
||||
txtout.write(b'\n\f')
|
||||
txtout.write(b'\n')
|
||||
if indent >= maxlevel:
|
||||
return
|
||||
for schunk in self._subchunks:
|
||||
@ -111,36 +113,8 @@ class DJVUFile(object):
|
||||
self.dc.dump(out=outfile, maxlevel=maxlevel)
|
||||
|
||||
def main():
|
||||
from ruamel.util.program import Program
|
||||
class DJVUDecoder(Program):
|
||||
def __init__(self):
|
||||
Program.__init__(self)
|
||||
|
||||
def parser_setup(self):
|
||||
Program.parser_setup(self)
|
||||
#self._argparser.add_argument('--combine', '-c', action=CountAction, const=1, nargs=0)
|
||||
#self._argparser.add_argument('--combine', '-c', type=int, default=1)
|
||||
#self._argparser.add_argument('--segments', '-s', action='append', nargs='+')
|
||||
#self._argparser.add_argument('--force', '-f', action='store_true')
|
||||
#self._argparser.add_argument('classname')
|
||||
self._argparser.add_argument('--text', '-t', action='store_true')
|
||||
self._argparser.add_argument('--dump', type=int, default=0)
|
||||
self._argparser.add_argument('file', nargs='+')
|
||||
|
||||
def run(self):
|
||||
if self._args.verbose > 1: # can be negative with --quiet
|
||||
print (self._args.file)
|
||||
x = DJVUFile(file(self._args.file[0], 'rb'), verbose=self._args.verbose)
|
||||
if self._args.text:
|
||||
print (x.get_text(sys.stdout))
|
||||
if self._args.dump:
|
||||
x.dump(sys.stdout, maxlevel=self._args.dump)
|
||||
return 0
|
||||
|
||||
tt = DJVUDecoder()
|
||||
res = tt.result
|
||||
if res != 0:
|
||||
print (res)
|
||||
f = DJVUFile(open(sys.argv[-1], 'rb'))
|
||||
print (f.get_text(sys.stdout))
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
@ -80,6 +80,7 @@ MAXLEN = 1024 ** 2
|
||||
|
||||
# Exception classes used by this module.
|
||||
class BZZDecoderError(Exception):
|
||||
|
||||
"""This exception is raised when BZZDecode runs into trouble
|
||||
"""
|
||||
def __init__(self, msg):
|
||||
@ -91,7 +92,7 @@ class BZZDecoderError(Exception):
|
||||
# This table has been designed for the ZPCoder
|
||||
# * by running the following command in file 'zptable.sn':
|
||||
# * (fast-crude (steady-mat 0.0035 0.0002) 260)))
|
||||
default_ztable = [ # {{{
|
||||
default_ztable = [ # {{{
|
||||
(0x8000, 0x0000, 84, 145), # 000: p=0.500000 ( 0, 0)
|
||||
(0x8000, 0x0000, 3, 4), # 001: p=0.500000 ( 0, 0)
|
||||
(0x8000, 0x0000, 4, 3), # 002: p=0.500000 ( 0, 0)
|
||||
@ -391,6 +392,7 @@ def chr3(l):
|
||||
return bytes(bytearray(l))
|
||||
|
||||
class BZZDecoder():
|
||||
|
||||
def __init__(self, infile, outfile):
|
||||
self.instream = infile
|
||||
self.outf = outfile
|
||||
@ -450,17 +452,15 @@ class BZZDecoder():
|
||||
self.xsize -= 1
|
||||
|
||||
# Compute remaining
|
||||
bytes = self.xsize
|
||||
if bytes > sz:
|
||||
bytes = sz
|
||||
remaining = min(sz, self.xsize)
|
||||
# Transfer
|
||||
if bytes:
|
||||
for i in range(bytes):
|
||||
self.outf.write(chr3(self.outbuf[self.bptr + i]))
|
||||
self.xsize -= bytes
|
||||
self.bptr += bytes
|
||||
sz -= bytes
|
||||
copied += bytes
|
||||
if remaining > 0:
|
||||
raw = bytes(bytearray(self.outbuf[self.bptr:self.bptr + remaining]))
|
||||
self.outf.write(raw)
|
||||
self.xsize -= remaining
|
||||
self.bptr += remaining
|
||||
sz -= remaining
|
||||
copied += remaining
|
||||
# offset += bytes; // for tell()
|
||||
return copied
|
||||
|
||||
@ -468,7 +468,8 @@ class BZZDecoder():
|
||||
while self.scount <= 24:
|
||||
if self.read_byte() < 1:
|
||||
self.byte = 0xff
|
||||
if --self.delay < 1:
|
||||
self.delay -= 1
|
||||
if self.delay < 1:
|
||||
raise BZZDecoderError("BiteStream EOF")
|
||||
self.bufint = (self.bufint << 8) | self.byte
|
||||
self.scount += 8
|
||||
@ -495,7 +496,7 @@ class BZZDecoder():
|
||||
if self.zpcodec_decoder():
|
||||
fshift += 1
|
||||
# Prepare Quasi MTF
|
||||
mtf = list(xmtf) # unsigned chars
|
||||
mtf = list(xmtf) # unsigned chars
|
||||
freq = [0] * FREQMAX
|
||||
fadd = 4
|
||||
# Decode
|
||||
@ -524,10 +525,10 @@ class BZZDecoder():
|
||||
elif self.zpcodec_decode(cx, 2*CTXIDS + 14):
|
||||
mtfno = 16 + self.decode_binary(cx, 2*CTXIDS + 14 + 1, 4)
|
||||
outbuf[i] = mtf[mtfno]
|
||||
elif self.zpcodec_decode(cx, 2*CTXIDS + 30 ):
|
||||
elif self.zpcodec_decode(cx, 2*CTXIDS + 30):
|
||||
mtfno = 32 + self.decode_binary(cx, 2*CTXIDS + 30 + 1, 5)
|
||||
outbuf[i] = mtf[mtfno]
|
||||
elif self.zpcodec_decode(cx, 2*CTXIDS + 62 ):
|
||||
elif self.zpcodec_decode(cx, 2*CTXIDS + 62):
|
||||
mtfno = 64 + self.decode_binary(cx, 2*CTXIDS + 62 + 1, 6)
|
||||
outbuf[i] = mtf[mtfno]
|
||||
elif self.zpcodec_decode(cx, 2*CTXIDS + 126):
|
||||
@ -729,9 +730,7 @@ class BZZDecoder():
|
||||
return (self.ffzt[(x >> 8) & 0xff])
|
||||
|
||||
|
||||
|
||||
### for testing
|
||||
|
||||
# for testing
|
||||
def main():
|
||||
import sys
|
||||
infile = file(sys.argv[1], "rb")
|
||||
|
Loading…
x
Reference in New Issue
Block a user