From 90aa34a473a551742ff516f953eca0c1884d4e42 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 3 Mar 2014 11:02:32 +0530
Subject: [PATCH] DJVU Input: When extracting embedded txt from TXTz sections
 in DJVU files, fix incorrect extraction of txt when the djvutxt external
 program is not present. Fixes #1286771 [converting DJVU file containing text
 fails](https://bugs.launchpad.net/calibre/+bug/1286771)

---
 .../ebooks/conversion/plugins/djvu_input.py   |  2 +-
 src/calibre/ebooks/djvu/djvu.py               | 44 ++++---------------
 src/calibre/ebooks/djvu/djvubzzdec.py         | 35 +++++++--------
 3 files changed, 27 insertions(+), 54 deletions(-)

diff --git a/src/calibre/ebooks/conversion/plugins/djvu_input.py b/src/calibre/ebooks/conversion/plugins/djvu_input.py
index a5aa258d87..783931f4f3 100644
--- a/src/calibre/ebooks/conversion/plugins/djvu_input.py
+++ b/src/calibre/ebooks/conversion/plugins/djvu_input.py
@@ -44,7 +44,7 @@ class DJVUInput(InputFormatPlugin):
                 os.remove(filename)
                 ppdjvu = False
             except:
-                stream.seek(0) # retry with the pure python converter
+                stream.seek(0)  # retry with the pure python converter
         if ppdjvu:
             from calibre.ebooks.djvu.djvu import DJVUFile
             x = DJVUFile(stream)
diff --git a/src/calibre/ebooks/djvu/djvu.py b/src/calibre/ebooks/djvu/djvu.py
index ca71e97220..7eebbca52b 100644
--- a/src/calibre/ebooks/djvu/djvu.py
+++ b/src/calibre/ebooks/djvu/djvu.py
@@ -15,7 +15,7 @@ import sys
 import struct
 from cStringIO import StringIO
 
-from .djvubzzdec import BZZDecoder
+from calibre.ebooks.djvu.djvubzzdec import BZZDecoder
 
 class DjvuChunk(object):
     def __init__(self, buf, start, end, align=True, bigendian=True,
@@ -73,14 +73,16 @@ class DjvuChunk(object):
                 if not xxres:
                     break
             res = outbuf.getvalue()
+            if not res.strip(b'\0'):
+                raise ValueError('TXTz block is completely null')
             l = 0
             for x in res[:3]:
                 l <<= 8
                 l += ord(x)
             if verbose > 0 and out:
-                print >> out, l
+                print (l, file=out)
             txtout.write(res[3:3+l])
-            txtout.write(b'\n\f')
+            txtout.write(b'\n')
         if txtout and self.type == b'TXTa':
             res = self.buf[self.datastart: self.dataend]
             l = 0
@@ -88,9 +90,9 @@ class DjvuChunk(object):
                 l <<= 8
                 l += ord(x)
             if verbose > 0 and out:
-                print >> out, l
+                print (l, file=out)
             txtout.write(res[3:3+l])
-            txtout.write(b'\n\f')
+            txtout.write(b'\n')
         if indent >= maxlevel:
             return
         for schunk in self._subchunks:
@@ -111,36 +113,8 @@ class DJVUFile(object):
         self.dc.dump(out=outfile, maxlevel=maxlevel)
 
 def main():
-    from ruamel.util.program import Program
-    class DJVUDecoder(Program):
-        def __init__(self):
-            Program.__init__(self)
-
-        def parser_setup(self):
-            Program.parser_setup(self)
-            #self._argparser.add_argument('--combine', '-c', action=CountAction, const=1, nargs=0)
-            #self._argparser.add_argument('--combine', '-c', type=int, default=1)
-            #self._argparser.add_argument('--segments', '-s', action='append', nargs='+')
-            #self._argparser.add_argument('--force', '-f', action='store_true')
-            #self._argparser.add_argument('classname')
-            self._argparser.add_argument('--text', '-t', action='store_true')
-            self._argparser.add_argument('--dump', type=int, default=0)
-            self._argparser.add_argument('file', nargs='+')
-
-        def run(self):
-            if self._args.verbose > 1: # can be negative with --quiet
-                print (self._args.file)
-            x = DJVUFile(file(self._args.file[0], 'rb'), verbose=self._args.verbose)
-            if self._args.text:
-                print (x.get_text(sys.stdout))
-            if self._args.dump:
-                x.dump(sys.stdout, maxlevel=self._args.dump)
-            return 0
-
-    tt = DJVUDecoder()
-    res = tt.result
-    if res != 0:
-        print (res)
+    f = DJVUFile(open(sys.argv[-1], 'rb'))
+    print (f.get_text(sys.stdout))
 
 if __name__ == '__main__':
     main()
diff --git a/src/calibre/ebooks/djvu/djvubzzdec.py b/src/calibre/ebooks/djvu/djvubzzdec.py
index 3eb8baa9a4..6e8ebe54ae 100644
--- a/src/calibre/ebooks/djvu/djvubzzdec.py
+++ b/src/calibre/ebooks/djvu/djvubzzdec.py
@@ -80,6 +80,7 @@ MAXLEN = 1024 ** 2
 
 # Exception classes used by this module.
 class BZZDecoderError(Exception):
+
     """This exception is raised when BZZDecode runs into trouble
     """
     def __init__(self, msg):
@@ -91,7 +92,7 @@ class BZZDecoderError(Exception):
 # This table has been designed for the ZPCoder
 #   * by running the following command in file 'zptable.sn':
 #   * (fast-crude (steady-mat 0.0035  0.0002) 260)))
-default_ztable = [ # {{{
+default_ztable = [  # {{{
   (0x8000, 0x0000, 84, 145),    # 000: p=0.500000 (    0,    0)
   (0x8000, 0x0000, 3, 4),       # 001: p=0.500000 (    0,    0)
   (0x8000, 0x0000, 4, 3),       # 002: p=0.500000 (    0,    0)
@@ -391,6 +392,7 @@ def chr3(l):
     return bytes(bytearray(l))
 
 class BZZDecoder():
+
     def __init__(self, infile, outfile):
         self.instream = infile
         self.outf = outfile
@@ -450,17 +452,15 @@ class BZZDecoder():
                 self.xsize -= 1
 
             # Compute remaining
-            bytes = self.xsize
-            if bytes > sz:
-                bytes = sz
+            remaining = min(sz, self.xsize)
             # Transfer
-            if bytes:
-                for i in range(bytes):
-                    self.outf.write(chr3(self.outbuf[self.bptr + i]))
-            self.xsize -= bytes
-            self.bptr += bytes
-            sz -= bytes
-            copied += bytes
+            if remaining > 0:
+                raw = bytes(bytearray(self.outbuf[self.bptr:self.bptr + remaining]))
+                self.outf.write(raw)
+            self.xsize -= remaining
+            self.bptr += remaining
+            sz -= remaining
+            copied += remaining
             # offset += bytes; // for tell()
         return copied
 
@@ -468,7 +468,8 @@ class BZZDecoder():
         while self.scount <= 24:
             if self.read_byte() < 1:
                 self.byte = 0xff
-                if --self.delay < 1:
+                self.delay -= 1
+                if self.delay < 1:
                     raise BZZDecoderError("BiteStream EOF")
             self.bufint = (self.bufint << 8) | self.byte
             self.scount += 8
@@ -495,7 +496,7 @@ class BZZDecoder():
             if self.zpcodec_decoder():
                 fshift += 1
         # Prepare Quasi MTF
-        mtf = list(xmtf) # unsigned chars
+        mtf = list(xmtf)  # unsigned chars
         freq = [0] * FREQMAX
         fadd = 4
         # Decode
@@ -524,10 +525,10 @@ class BZZDecoder():
             elif self.zpcodec_decode(cx, 2*CTXIDS + 14):
                 mtfno = 16 + self.decode_binary(cx, 2*CTXIDS + 14 + 1, 4)
                 outbuf[i] = mtf[mtfno]
-            elif self.zpcodec_decode(cx, 2*CTXIDS + 30 ):
+            elif self.zpcodec_decode(cx, 2*CTXIDS + 30):
                 mtfno = 32 + self.decode_binary(cx, 2*CTXIDS + 30 + 1, 5)
                 outbuf[i] = mtf[mtfno]
-            elif self.zpcodec_decode(cx, 2*CTXIDS + 62 ):
+            elif self.zpcodec_decode(cx, 2*CTXIDS + 62):
                 mtfno = 64 + self.decode_binary(cx, 2*CTXIDS + 62 + 1, 6)
                 outbuf[i] = mtf[mtfno]
             elif self.zpcodec_decode(cx, 2*CTXIDS + 126):
@@ -729,9 +730,7 @@ class BZZDecoder():
             return (self.ffzt[(x >> 8) & 0xff])
 
 
-
-### for testing
-
+# for testing
 def main():
     import sys
     infile = file(sys.argv[1], "rb")