diff --git a/recipes/defensenews.recipe b/recipes/defensenews.recipe index 8c0f9b0be7..a521529981 100644 --- a/recipes/defensenews.recipe +++ b/recipes/defensenews.recipe @@ -4,9 +4,7 @@ __copyright__ = '2011, Darko Miletic ' www.defensenews.com ''' -import re from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import BeautifulSoup class DefenseNews(BasicNewsRecipe): title = 'Defense News' @@ -42,7 +40,7 @@ class DefenseNews(BasicNewsRecipe): ] remove_tags_before = attrs={'class':'storyWrp'} remove_tags_after = attrs={'class':'middle'} - + remove_attributes=['lang'] feeds = [ diff --git a/setup/extensions.py b/setup/extensions.py index 87d05c492d..ccff4b6ff7 100644 --- a/setup/extensions.py +++ b/setup/extensions.py @@ -336,7 +336,7 @@ class Build(Command): oinc = ['/Fo'+obj] if iswindows else ['-o', obj] cmd = [compiler] + cflags + ext.cflags + einc + sinc + oinc self.info(' '.join(cmd)) - subprocess.check_call(cmd) + self.check_call(cmd) dest = self.dest(ext) elib = self.lib_dirs_to_ldflags(ext.lib_dirs) @@ -350,18 +350,32 @@ class Build(Command): else: cmd += objects + ext.extra_objs + ['-o', dest] + ldflags + ext.ldflags + elib + xlib self.info('\n\n', ' '.join(cmd), '\n\n') - subprocess.check_call(cmd) + self.check_call(cmd) if iswindows: #manifest = dest+'.manifest' #cmd = [MT, '-manifest', manifest, '-outputresource:%s;2'%dest] #self.info(*cmd) - #subprocess.check_call(cmd) + #self.check_call(cmd) #os.remove(manifest) for x in ('.exp', '.lib'): x = os.path.splitext(dest)[0]+x if os.path.exists(x): os.remove(x) + def check_call(self, *args, **kwargs): + """print cmdline if an error occured + + If something is missing (qmake e.g.) you get a non-informative error + self.check_call(qmc + [ext.name+'.pro']) + so you would have to look a the source to see the actual command. + """ + try: + subprocess.check_call(*args, **kwargs) + except: + cmdline = ' '.join(['"%s"' % (arg) if ' ' in arg else arg for arg in args[0]]) + print "Error while executing: %s\n" % (cmdline) + raise + def build_qt_objects(self, ext): obj_pat = 'release\\*.obj' if iswindows else '*.o' objects = glob.glob(obj_pat) @@ -380,8 +394,8 @@ class Build(Command): qmc = [QMAKE, '-o', 'Makefile'] if iswindows: qmc += ['-spec', 'win32-msvc2008'] - subprocess.check_call(qmc + [ext.name+'.pro']) - subprocess.check_call([make, '-f', 'Makefile']) + self.check_call(qmc + [ext.name+'.pro']) + self.check_call([make, '-f', 'Makefile']) objects = glob.glob(obj_pat) return list(map(self.a, objects)) @@ -407,7 +421,7 @@ class Build(Command): cmd = [pyqt.sip_bin+exe, '-w', '-c', src_dir, '-b', sbf, '-I'+\ pyqt.pyqt_sip_dir] + shlex.split(pyqt.pyqt_sip_flags) + [sipf] self.info(' '.join(cmd)) - subprocess.check_call(cmd) + self.check_call(cmd) module = self.j(src_dir, self.b(dest)) if self.newer(dest, [sbf]+qt_objects): mf = self.j(src_dir, 'Makefile') @@ -417,7 +431,7 @@ class Build(Command): makefile.extra_include_dirs = ext.inc_dirs makefile.generate() - subprocess.check_call([make, '-f', mf], cwd=src_dir) + self.check_call([make, '-f', mf], cwd=src_dir) shutil.copy2(module, dest) def clean(self): @@ -457,7 +471,7 @@ class BuildPDF2XML(Command): cmd += ['-I'+x for x in poppler_inc_dirs+magick_inc_dirs] cmd += ['/Fo'+obj, src] self.info(*cmd) - subprocess.check_call(cmd) + self.check_call(cmd) objects.append(obj) if self.newer(dest, objects): @@ -470,7 +484,7 @@ class BuildPDF2XML(Command): png_libs+magick_libs+poppler_libs+ft_libs+jpg_libs+pdfreflow_libs] cmd += ['/OUT:'+dest] + objects self.info(*cmd) - subprocess.check_call(cmd) + self.check_call(cmd) self.info('Binary installed as', dest) diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 1adcd52ad9..7e9f34f134 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -502,6 +502,7 @@ class TXTZMetadataWriter(MetadataWriterPlugin): # }}} from calibre.ebooks.comic.input import ComicInput +from calibre.ebooks.djvu.input import DJVUInput from calibre.ebooks.epub.input import EPUBInput from calibre.ebooks.fb2.input import FB2Input from calibre.ebooks.html.input import HTMLInput @@ -600,6 +601,7 @@ plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon] plugins += [ ComicInput, + DJVUInput, EPUBInput, FB2Input, HTMLInput, diff --git a/src/calibre/customize/profiles.py b/src/calibre/customize/profiles.py index dc4ce01bf6..3b471ee47f 100644 --- a/src/calibre/customize/profiles.py +++ b/src/calibre/customize/profiles.py @@ -4,7 +4,6 @@ __license__ = 'GPL 3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import sys from itertools import izip from xml.sax.saxutils import escape @@ -742,7 +741,7 @@ class PocketBook900Output(OutputProfile): screen_size = (810, 1180) dpi = 150.0 comic_screen_size = screen_size - + output_profiles = [OutputProfile, SonyReaderOutput, SonyReader300Output, SonyReader900Output, MSReaderOutput, MobipocketOutput, HanlinV3Output, HanlinV5Output, CybookG3Output, CybookOpusOutput, KindleOutput, diff --git a/src/calibre/ebooks/djvu/__init__.py b/src/calibre/ebooks/djvu/__init__.py new file mode 100644 index 0000000000..fb2d53c224 --- /dev/null +++ b/src/calibre/ebooks/djvu/__init__.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Anthon van der Neut ' +__docformat__ = 'restructuredtext en' + +''' +Used for DJVU input +''' + diff --git a/src/calibre/ebooks/djvu/djvu.py b/src/calibre/ebooks/djvu/djvu.py new file mode 100644 index 0000000000..ca71e97220 --- /dev/null +++ b/src/calibre/ebooks/djvu/djvu.py @@ -0,0 +1,146 @@ +#! /usr/bin/env python +# coding: utf-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Anthon van der Neut ' + +# this code is based on: +# Lizardtech DjVu Reference +# DjVu v3 +# November 2005 + +import sys +import struct +from cStringIO import StringIO + +from .djvubzzdec import BZZDecoder + +class DjvuChunk(object): + def __init__(self, buf, start, end, align=True, bigendian=True, + inclheader=False, verbose=0): + self.subtype = None + self._subchunks = [] + self.buf = buf + pos = start + 4 + self.type = buf[start:pos] + self.align = align # whether to align to word (2-byte) boundaries + self.headersize = 0 if inclheader else 8 + if bigendian: + self.strflag = b'>' + else: + self.strflag = b'<' + oldpos, pos = pos, pos+4 + self.size = struct.unpack(self.strflag+b'L', buf[oldpos:pos])[0] + self.dataend = pos + self.size - (8 if inclheader else 0) + if self.type == b'FORM': + oldpos, pos = pos, pos+4 + #print oldpos, pos + self.subtype = buf[oldpos:pos] + #self.headersize += 4 + self.datastart = pos + if verbose > 0: + print ('found', self.type, self.subtype, pos, self.size) + if self.type in b'FORM'.split(): + if verbose > 0: + print ('processing substuff %d %d (%x)' % (pos, self.dataend, + self.dataend)) + numchunks = 0 + while pos < self.dataend: + x = DjvuChunk(buf, pos, start+self.size, verbose=verbose) + numchunks += 1 + self._subchunks.append(x) + newpos = pos + x.size + x.headersize + (1 if (x.size % 2) else 0) + if verbose > 0: + print ('newpos %d %d (%x, %x) %d' % (newpos, self.dataend, + newpos, self.dataend, x.headersize)) + pos = newpos + if verbose > 0: + print (' end of chunk %d (%x)' % (pos, pos)) + + def dump(self, verbose=0, indent=1, out=None, txtout=None, maxlevel=100): + if out: + out.write(b' ' * indent) + out.write(b'%s%s [%d]\n' % (self.type, + b':' + self.subtype if self.subtype else b'', self.size)) + if txtout and self.type == b'TXTz': + inbuf = StringIO(self.buf[self.datastart: self.dataend]) + outbuf = StringIO() + decoder = BZZDecoder(inbuf, outbuf) + while True: + xxres = decoder.convert(1024 * 1024) + if not xxres: + break + res = outbuf.getvalue() + l = 0 + for x in res[:3]: + l <<= 8 + l += ord(x) + if verbose > 0 and out: + print >> out, l + txtout.write(res[3:3+l]) + txtout.write(b'\n\f') + if txtout and self.type == b'TXTa': + res = self.buf[self.datastart: self.dataend] + l = 0 + for x in res[:3]: + l <<= 8 + l += ord(x) + if verbose > 0 and out: + print >> out, l + txtout.write(res[3:3+l]) + txtout.write(b'\n\f') + if indent >= maxlevel: + return + for schunk in self._subchunks: + schunk.dump(verbose=verbose, indent=indent+1, out=out, txtout=txtout) + +class DJVUFile(object): + def __init__(self, instream, verbose=0): + self.instream = instream + buf = self.instream.read(4) + assert(buf == b'AT&T') + buf = self.instream.read() + self.dc = DjvuChunk(buf, 0, len(buf), verbose=verbose) + + def get_text(self, outfile=None): + self.dc.dump(txtout=outfile) + + def dump(self, outfile=None, maxlevel=0): + self.dc.dump(out=outfile, maxlevel=maxlevel) + +def main(): + from ruamel.util.program import Program + class DJVUDecoder(Program): + def __init__(self): + Program.__init__(self) + + def parser_setup(self): + Program.parser_setup(self) + #self._argparser.add_argument('--combine', '-c', action=CountAction, const=1, nargs=0) + #self._argparser.add_argument('--combine', '-c', type=int, default=1) + #self._argparser.add_argument('--segments', '-s', action='append', nargs='+') + #self._argparser.add_argument('--force', '-f', action='store_true') + #self._argparser.add_argument('classname') + self._argparser.add_argument('--text', '-t', action='store_true') + self._argparser.add_argument('--dump', type=int, default=0) + self._argparser.add_argument('file', nargs='+') + + def run(self): + if self._args.verbose > 1: # can be negative with --quiet + print (self._args.file) + x = DJVUFile(file(self._args.file[0], 'rb'), verbose=self._args.verbose) + if self._args.text: + print (x.get_text(sys.stdout)) + if self._args.dump: + x.dump(sys.stdout, maxlevel=self._args.dump) + return 0 + + tt = DJVUDecoder() + res = tt.result + if res != 0: + print (res) + +if __name__ == '__main__': + main() diff --git a/src/calibre/ebooks/djvu/djvubzzdec.py b/src/calibre/ebooks/djvu/djvubzzdec.py new file mode 100644 index 0000000000..3eb8baa9a4 --- /dev/null +++ b/src/calibre/ebooks/djvu/djvubzzdec.py @@ -0,0 +1,746 @@ +#! /usr/bin/env python +# coding: utf-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Anthon van der Neut ' +#__docformat__ = 'restructuredtext en' + +# Copyright (C) 2011 Anthon van der Neut, Ruamel bvba +# Adapted from Leon Bottou's djvulibre C++ code, +# ( ZPCodec.{cpp,h} and BSByteStream.{cpp,h} ) +# that code was first converted to C removing any dependencies on the DJVU libre +# framework for ByteStream, making it into a ctypes callable shared object +# then to python, and remade into a class +original_copyright_notice = ''' +//C- ------------------------------------------------------------------- +//C- DjVuLibre-3.5 +//C- Copyright (c) 2002 Leon Bottou and Yann Le Cun. +//C- Copyright (c) 2001 AT&T +//C- +//C- This software is subject to, and may be distributed under, the +//C- GNU General Public License, either Version 2 of the license, +//C- or (at your option) any later version. The license should have +//C- accompanied the software or you may obtain a copy of the license +//C- from the Free Software Foundation at http://www.fsf.org . +//C- +//C- This program is distributed in the hope that it will be useful, +//C- but WITHOUT ANY WARRANTY; without even the implied warranty of +//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//C- GNU General Public License for more details. +//C- +//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from +//C- Lizardtech Software. Lizardtech Software has authorized us to +//C- replace the original DjVu(r) Reference Library notice by the following +//C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu): +//C- +//C- ------------------------------------------------------------------ +//C- | DjVu (r) Reference Library (v. 3.5) +//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved. +//C- | The DjVu Reference Library is protected by U.S. Pat. No. +//C- | 6,058,214 and patents pending. +//C- | +//C- | This software is subject to, and may be distributed under, the +//C- | GNU General Public License, either Version 2 of the license, +//C- | or (at your option) any later version. The license should have +//C- | accompanied the software or you may obtain a copy of the license +//C- | from the Free Software Foundation at http://www.fsf.org . +//C- | +//C- | The computer code originally released by LizardTech under this +//C- | license and unmodified by other parties is deemed "the LIZARDTECH +//C- | ORIGINAL CODE." Subject to any third party intellectual property +//C- | claims, LizardTech grants recipient a worldwide, royalty-free, +//C- | non-exclusive license to make, use, sell, or otherwise dispose of +//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the +//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU +//C- | General Public License. This grant only confers the right to +//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to +//C- | the extent such infringement is reasonably necessary to enable +//C- | recipient to make, have made, practice, sell, or otherwise dispose +//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to +//C- | any greater extent that may be necessary to utilize further +//C- | modifications or combinations. +//C- | +//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY +//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF +//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. +//C- +------------------------------------------------------------------ +// +// $Id: BSByteStream.cpp,v 1.9 2007/03/25 20:48:29 leonb Exp $ +// $Name: release_3_5_23 $ +''' + + +MAXBLOCK = 4096 +FREQMAX = 4 +CTXIDS = 3 +MAXLEN = 1024 ** 2 + +# Exception classes used by this module. +class BZZDecoderError(Exception): + """This exception is raised when BZZDecode runs into trouble + """ + def __init__(self, msg): + self.msg = msg + def __str__(self): + return "BZZDecoderError: %s" % (self.msg) + + +# This table has been designed for the ZPCoder +# * by running the following command in file 'zptable.sn': +# * (fast-crude (steady-mat 0.0035 0.0002) 260))) +default_ztable = [ # {{{ + (0x8000, 0x0000, 84, 145), # 000: p=0.500000 ( 0, 0) + (0x8000, 0x0000, 3, 4), # 001: p=0.500000 ( 0, 0) + (0x8000, 0x0000, 4, 3), # 002: p=0.500000 ( 0, 0) + (0x6bbd, 0x10a5, 5, 1), # 003: p=0.465226 ( 0, 0) + (0x6bbd, 0x10a5, 6, 2), # 004: p=0.465226 ( 0, 0) + (0x5d45, 0x1f28, 7, 3), # 005: p=0.430708 ( 0, 0) + (0x5d45, 0x1f28, 8, 4), # 006: p=0.430708 ( 0, 0) + (0x51b9, 0x2bd3, 9, 5), # 007: p=0.396718 ( 0, 0) + (0x51b9, 0x2bd3, 10, 6), # 008: p=0.396718 ( 0, 0) + (0x4813, 0x36e3, 11, 7), # 009: p=0.363535 ( 0, 0) + (0x4813, 0x36e3, 12, 8), # 010: p=0.363535 ( 0, 0) + (0x3fd5, 0x408c, 13, 9), # 011: p=0.331418 ( 0, 0) + (0x3fd5, 0x408c, 14, 10), # 012: p=0.331418 ( 0, 0) + (0x38b1, 0x48fd, 15, 11), # 013: p=0.300585 ( 0, 0) + (0x38b1, 0x48fd, 16, 12), # 014: p=0.300585 ( 0, 0) + (0x3275, 0x505d, 17, 13), # 015: p=0.271213 ( 0, 0) + (0x3275, 0x505d, 18, 14), # 016: p=0.271213 ( 0, 0) + (0x2cfd, 0x56d0, 19, 15), # 017: p=0.243438 ( 0, 0) + (0x2cfd, 0x56d0, 20, 16), # 018: p=0.243438 ( 0, 0) + (0x2825, 0x5c71, 21, 17), # 019: p=0.217391 ( 0, 0) + (0x2825, 0x5c71, 22, 18), # 020: p=0.217391 ( 0, 0) + (0x23ab, 0x615b, 23, 19), # 021: p=0.193150 ( 0, 0) + (0x23ab, 0x615b, 24, 20), # 022: p=0.193150 ( 0, 0) + (0x1f87, 0x65a5, 25, 21), # 023: p=0.170728 ( 0, 0) + (0x1f87, 0x65a5, 26, 22), # 024: p=0.170728 ( 0, 0) + (0x1bbb, 0x6962, 27, 23), # 025: p=0.150158 ( 0, 0) + (0x1bbb, 0x6962, 28, 24), # 026: p=0.150158 ( 0, 0) + (0x1845, 0x6ca2, 29, 25), # 027: p=0.131418 ( 0, 0) + (0x1845, 0x6ca2, 30, 26), # 028: p=0.131418 ( 0, 0) + (0x1523, 0x6f74, 31, 27), # 029: p=0.114460 ( 0, 0) + (0x1523, 0x6f74, 32, 28), # 030: p=0.114460 ( 0, 0) + (0x1253, 0x71e6, 33, 29), # 031: p=0.099230 ( 0, 0) + (0x1253, 0x71e6, 34, 30), # 032: p=0.099230 ( 0, 0) + (0x0fcf, 0x7404, 35, 31), # 033: p=0.085611 ( 0, 0) + (0x0fcf, 0x7404, 36, 32), # 034: p=0.085611 ( 0, 0) + (0x0d95, 0x75d6, 37, 33), # 035: p=0.073550 ( 0, 0) + (0x0d95, 0x75d6, 38, 34), # 036: p=0.073550 ( 0, 0) + (0x0b9d, 0x7768, 39, 35), # 037: p=0.062888 ( 0, 0) + (0x0b9d, 0x7768, 40, 36), # 038: p=0.062888 ( 0, 0) + (0x09e3, 0x78c2, 41, 37), # 039: p=0.053539 ( 0, 0) + (0x09e3, 0x78c2, 42, 38), # 040: p=0.053539 ( 0, 0) + (0x0861, 0x79ea, 43, 39), # 041: p=0.045365 ( 0, 0) + (0x0861, 0x79ea, 44, 40), # 042: p=0.045365 ( 0, 0) + (0x0711, 0x7ae7, 45, 41), # 043: p=0.038272 ( 0, 0) + (0x0711, 0x7ae7, 46, 42), # 044: p=0.038272 ( 0, 0) + (0x05f1, 0x7bbe, 47, 43), # 045: p=0.032174 ( 0, 0) + (0x05f1, 0x7bbe, 48, 44), # 046: p=0.032174 ( 0, 0) + (0x04f9, 0x7c75, 49, 45), # 047: p=0.026928 ( 0, 0) + (0x04f9, 0x7c75, 50, 46), # 048: p=0.026928 ( 0, 0) + (0x0425, 0x7d0f, 51, 47), # 049: p=0.022444 ( 0, 0) + (0x0425, 0x7d0f, 52, 48), # 050: p=0.022444 ( 0, 0) + (0x0371, 0x7d91, 53, 49), # 051: p=0.018636 ( 0, 0) + (0x0371, 0x7d91, 54, 50), # 052: p=0.018636 ( 0, 0) + (0x02d9, 0x7dfe, 55, 51), # 053: p=0.015421 ( 0, 0) + (0x02d9, 0x7dfe, 56, 52), # 054: p=0.015421 ( 0, 0) + (0x0259, 0x7e5a, 57, 53), # 055: p=0.012713 ( 0, 0) + (0x0259, 0x7e5a, 58, 54), # 056: p=0.012713 ( 0, 0) + (0x01ed, 0x7ea6, 59, 55), # 057: p=0.010419 ( 0, 0) + (0x01ed, 0x7ea6, 60, 56), # 058: p=0.010419 ( 0, 0) + (0x0193, 0x7ee6, 61, 57), # 059: p=0.008525 ( 0, 0) + (0x0193, 0x7ee6, 62, 58), # 060: p=0.008525 ( 0, 0) + (0x0149, 0x7f1a, 63, 59), # 061: p=0.006959 ( 0, 0) + (0x0149, 0x7f1a, 64, 60), # 062: p=0.006959 ( 0, 0) + (0x010b, 0x7f45, 65, 61), # 063: p=0.005648 ( 0, 0) + (0x010b, 0x7f45, 66, 62), # 064: p=0.005648 ( 0, 0) + (0x00d5, 0x7f6b, 67, 63), # 065: p=0.004506 ( 0, 0) + (0x00d5, 0x7f6b, 68, 64), # 066: p=0.004506 ( 0, 0) + (0x00a5, 0x7f8d, 69, 65), # 067: p=0.003480 ( 0, 0) + (0x00a5, 0x7f8d, 70, 66), # 068: p=0.003480 ( 0, 0) + (0x007b, 0x7faa, 71, 67), # 069: p=0.002602 ( 0, 0) + (0x007b, 0x7faa, 72, 68), # 070: p=0.002602 ( 0, 0) + (0x0057, 0x7fc3, 73, 69), # 071: p=0.001843 ( 0, 0) + (0x0057, 0x7fc3, 74, 70), # 072: p=0.001843 ( 0, 0) + (0x003b, 0x7fd7, 75, 71), # 073: p=0.001248 ( 0, 0) + (0x003b, 0x7fd7, 76, 72), # 074: p=0.001248 ( 0, 0) + (0x0023, 0x7fe7, 77, 73), # 075: p=0.000749 ( 0, 0) + (0x0023, 0x7fe7, 78, 74), # 076: p=0.000749 ( 0, 0) + (0x0013, 0x7ff2, 79, 75), # 077: p=0.000402 ( 0, 0) + (0x0013, 0x7ff2, 80, 76), # 078: p=0.000402 ( 0, 0) + (0x0007, 0x7ffa, 81, 77), # 079: p=0.000153 ( 0, 0) + (0x0007, 0x7ffa, 82, 78), # 080: p=0.000153 ( 0, 0) + (0x0001, 0x7fff, 81, 79), # 081: p=0.000027 ( 0, 0) + (0x0001, 0x7fff, 82, 80), # 082: p=0.000027 ( 0, 0) + (0x5695, 0x0000, 9, 85), # 083: p=0.411764 ( 2, 3) + (0x24ee, 0x0000, 86, 226), # 084: p=0.199988 ( 1, 0) + (0x8000, 0x0000, 5, 6), # 085: p=0.500000 ( 3, 3) + (0x0d30, 0x0000, 88, 176), # 086: p=0.071422 ( 4, 0) + (0x481a, 0x0000, 89, 143), # 087: p=0.363634 ( 1, 2) + (0x0481, 0x0000, 90, 138), # 088: p=0.024388 ( 13, 0) + (0x3579, 0x0000, 91, 141), # 089: p=0.285711 ( 1, 3) + (0x017a, 0x0000, 92, 112), # 090: p=0.007999 ( 41, 0) + (0x24ef, 0x0000, 93, 135), # 091: p=0.199997 ( 1, 5) + (0x007b, 0x0000, 94, 104), # 092: p=0.002611 ( 127, 0) + (0x1978, 0x0000, 95, 133), # 093: p=0.137929 ( 1, 8) + (0x0028, 0x0000, 96, 100), # 094: p=0.000849 ( 392, 0) + (0x10ca, 0x0000, 97, 129), # 095: p=0.090907 ( 1, 13) + (0x000d, 0x0000, 82, 98), # 096: p=0.000276 ( 1208, 0) + (0x0b5d, 0x0000, 99, 127), # 097: p=0.061537 ( 1, 20) + (0x0034, 0x0000, 76, 72), # 098: p=0.001102 ( 1208, 1) + (0x078a, 0x0000, 101, 125), # 099: p=0.040815 ( 1, 31) + (0x00a0, 0x0000, 70, 102), # 100: p=0.003387 ( 392, 1) + (0x050f, 0x0000, 103, 123), # 101: p=0.027397 ( 1, 47) + (0x0117, 0x0000, 66, 60), # 102: p=0.005912 ( 392, 2) + (0x0358, 0x0000, 105, 121), # 103: p=0.018099 ( 1, 72) + (0x01ea, 0x0000, 106, 110), # 104: p=0.010362 ( 127, 1) + (0x0234, 0x0000, 107, 119), # 105: p=0.011940 ( 1, 110) + (0x0144, 0x0000, 66, 108), # 106: p=0.006849 ( 193, 1) + (0x0173, 0x0000, 109, 117), # 107: p=0.007858 ( 1, 168) + (0x0234, 0x0000, 60, 54), # 108: p=0.011925 ( 193, 2) + (0x00f5, 0x0000, 111, 115), # 109: p=0.005175 ( 1, 256) + (0x0353, 0x0000, 56, 48), # 110: p=0.017995 ( 127, 2) + (0x00a1, 0x0000, 69, 113), # 111: p=0.003413 ( 1, 389) + (0x05c5, 0x0000, 114, 134), # 112: p=0.031249 ( 41, 1) + (0x011a, 0x0000, 65, 59), # 113: p=0.005957 ( 2, 389) + (0x03cf, 0x0000, 116, 132), # 114: p=0.020618 ( 63, 1) + (0x01aa, 0x0000, 61, 55), # 115: p=0.009020 ( 2, 256) + (0x0285, 0x0000, 118, 130), # 116: p=0.013652 ( 96, 1) + (0x0286, 0x0000, 57, 51), # 117: p=0.013672 ( 2, 168) + (0x01ab, 0x0000, 120, 128), # 118: p=0.009029 ( 146, 1) + (0x03d3, 0x0000, 53, 47), # 119: p=0.020710 ( 2, 110) + (0x011a, 0x0000, 122, 126), # 120: p=0.005961 ( 222, 1) + (0x05c5, 0x0000, 49, 41), # 121: p=0.031250 ( 2, 72) + (0x00ba, 0x0000, 124, 62), # 122: p=0.003925 ( 338, 1) + (0x08ad, 0x0000, 43, 37), # 123: p=0.046979 ( 2, 47) + (0x007a, 0x0000, 72, 66), # 124: p=0.002586 ( 514, 1) + (0x0ccc, 0x0000, 39, 31), # 125: p=0.069306 ( 2, 31) + (0x01eb, 0x0000, 60, 54), # 126: p=0.010386 ( 222, 2) + (0x1302, 0x0000, 33, 25), # 127: p=0.102940 ( 2, 20) + (0x02e6, 0x0000, 56, 50), # 128: p=0.015695 ( 146, 2) + (0x1b81, 0x0000, 29, 131), # 129: p=0.148935 ( 2, 13) + (0x045e, 0x0000, 52, 46), # 130: p=0.023648 ( 96, 2) + (0x24ef, 0x0000, 23, 17), # 131: p=0.199999 ( 3, 13) + (0x0690, 0x0000, 48, 40), # 132: p=0.035533 ( 63, 2) + (0x2865, 0x0000, 23, 15), # 133: p=0.218748 ( 2, 8) + (0x09de, 0x0000, 42, 136), # 134: p=0.053434 ( 41, 2) + (0x3987, 0x0000, 137, 7), # 135: p=0.304346 ( 2, 5) + (0x0dc8, 0x0000, 38, 32), # 136: p=0.074626 ( 41, 3) + (0x2c99, 0x0000, 21, 139), # 137: p=0.241378 ( 2, 7) + (0x10ca, 0x0000, 140, 172), # 138: p=0.090907 ( 13, 1) + (0x3b5f, 0x0000, 15, 9), # 139: p=0.312499 ( 3, 7) + (0x0b5d, 0x0000, 142, 170), # 140: p=0.061537 ( 20, 1) + (0x5695, 0x0000, 9, 85), # 141: p=0.411764 ( 2, 3) + (0x078a, 0x0000, 144, 168), # 142: p=0.040815 ( 31, 1) + (0x8000, 0x0000, 141, 248), # 143: p=0.500000 ( 2, 2) + (0x050f, 0x0000, 146, 166), # 144: p=0.027397 ( 47, 1) + (0x24ee, 0x0000, 147, 247), # 145: p=0.199988 ( 0, 1) + (0x0358, 0x0000, 148, 164), # 146: p=0.018099 ( 72, 1) + (0x0d30, 0x0000, 149, 197), # 147: p=0.071422 ( 0, 4) + (0x0234, 0x0000, 150, 162), # 148: p=0.011940 ( 110, 1) + (0x0481, 0x0000, 151, 95), # 149: p=0.024388 ( 0, 13) + (0x0173, 0x0000, 152, 160), # 150: p=0.007858 ( 168, 1) + (0x017a, 0x0000, 153, 173), # 151: p=0.007999 ( 0, 41) + (0x00f5, 0x0000, 154, 158), # 152: p=0.005175 ( 256, 1) + (0x007b, 0x0000, 155, 165), # 153: p=0.002611 ( 0, 127) + (0x00a1, 0x0000, 70, 156), # 154: p=0.003413 ( 389, 1) + (0x0028, 0x0000, 157, 161), # 155: p=0.000849 ( 0, 392) + (0x011a, 0x0000, 66, 60), # 156: p=0.005957 ( 389, 2) + (0x000d, 0x0000, 81, 159), # 157: p=0.000276 ( 0, 1208) + (0x01aa, 0x0000, 62, 56), # 158: p=0.009020 ( 256, 2) + (0x0034, 0x0000, 75, 71), # 159: p=0.001102 ( 1, 1208) + (0x0286, 0x0000, 58, 52), # 160: p=0.013672 ( 168, 2) + (0x00a0, 0x0000, 69, 163), # 161: p=0.003387 ( 1, 392) + (0x03d3, 0x0000, 54, 48), # 162: p=0.020710 ( 110, 2) + (0x0117, 0x0000, 65, 59), # 163: p=0.005912 ( 2, 392) + (0x05c5, 0x0000, 50, 42), # 164: p=0.031250 ( 72, 2) + (0x01ea, 0x0000, 167, 171), # 165: p=0.010362 ( 1, 127) + (0x08ad, 0x0000, 44, 38), # 166: p=0.046979 ( 47, 2) + (0x0144, 0x0000, 65, 169), # 167: p=0.006849 ( 1, 193) + (0x0ccc, 0x0000, 40, 32), # 168: p=0.069306 ( 31, 2) + (0x0234, 0x0000, 59, 53), # 169: p=0.011925 ( 2, 193) + (0x1302, 0x0000, 34, 26), # 170: p=0.102940 ( 20, 2) + (0x0353, 0x0000, 55, 47), # 171: p=0.017995 ( 2, 127) + (0x1b81, 0x0000, 30, 174), # 172: p=0.148935 ( 13, 2) + (0x05c5, 0x0000, 175, 193), # 173: p=0.031249 ( 1, 41) + (0x24ef, 0x0000, 24, 18), # 174: p=0.199999 ( 13, 3) + (0x03cf, 0x0000, 177, 191), # 175: p=0.020618 ( 1, 63) + (0x2b74, 0x0000, 178, 222), # 176: p=0.235291 ( 4, 1) + (0x0285, 0x0000, 179, 189), # 177: p=0.013652 ( 1, 96) + (0x201d, 0x0000, 180, 218), # 178: p=0.173910 ( 6, 1) + (0x01ab, 0x0000, 181, 187), # 179: p=0.009029 ( 1, 146) + (0x1715, 0x0000, 182, 216), # 180: p=0.124998 ( 9, 1) + (0x011a, 0x0000, 183, 185), # 181: p=0.005961 ( 1, 222) + (0x0fb7, 0x0000, 184, 214), # 182: p=0.085105 ( 14, 1) + (0x00ba, 0x0000, 69, 61), # 183: p=0.003925 ( 1, 338) + (0x0a67, 0x0000, 186, 212), # 184: p=0.056337 ( 22, 1) + (0x01eb, 0x0000, 59, 53), # 185: p=0.010386 ( 2, 222) + (0x06e7, 0x0000, 188, 210), # 186: p=0.037382 ( 34, 1) + (0x02e6, 0x0000, 55, 49), # 187: p=0.015695 ( 2, 146) + (0x0496, 0x0000, 190, 208), # 188: p=0.024844 ( 52, 1) + (0x045e, 0x0000, 51, 45), # 189: p=0.023648 ( 2, 96) + (0x030d, 0x0000, 192, 206), # 190: p=0.016529 ( 79, 1) + (0x0690, 0x0000, 47, 39), # 191: p=0.035533 ( 2, 63) + (0x0206, 0x0000, 194, 204), # 192: p=0.010959 ( 120, 1) + (0x09de, 0x0000, 41, 195), # 193: p=0.053434 ( 2, 41) + (0x0155, 0x0000, 196, 202), # 194: p=0.007220 ( 183, 1) + (0x0dc8, 0x0000, 37, 31), # 195: p=0.074626 ( 3, 41) + (0x00e1, 0x0000, 198, 200), # 196: p=0.004750 ( 279, 1) + (0x2b74, 0x0000, 199, 243), # 197: p=0.235291 ( 1, 4) + (0x0094, 0x0000, 72, 64), # 198: p=0.003132 ( 424, 1) + (0x201d, 0x0000, 201, 239), # 199: p=0.173910 ( 1, 6) + (0x0188, 0x0000, 62, 56), # 200: p=0.008284 ( 279, 2) + (0x1715, 0x0000, 203, 237), # 201: p=0.124998 ( 1, 9) + (0x0252, 0x0000, 58, 52), # 202: p=0.012567 ( 183, 2) + (0x0fb7, 0x0000, 205, 235), # 203: p=0.085105 ( 1, 14) + (0x0383, 0x0000, 54, 48), # 204: p=0.019021 ( 120, 2) + (0x0a67, 0x0000, 207, 233), # 205: p=0.056337 ( 1, 22) + (0x0547, 0x0000, 50, 44), # 206: p=0.028571 ( 79, 2) + (0x06e7, 0x0000, 209, 231), # 207: p=0.037382 ( 1, 34) + (0x07e2, 0x0000, 46, 38), # 208: p=0.042682 ( 52, 2) + (0x0496, 0x0000, 211, 229), # 209: p=0.024844 ( 1, 52) + (0x0bc0, 0x0000, 40, 34), # 210: p=0.063636 ( 34, 2) + (0x030d, 0x0000, 213, 227), # 211: p=0.016529 ( 1, 79) + (0x1178, 0x0000, 36, 28), # 212: p=0.094593 ( 22, 2) + (0x0206, 0x0000, 215, 225), # 213: p=0.010959 ( 1, 120) + (0x19da, 0x0000, 30, 22), # 214: p=0.139999 ( 14, 2) + (0x0155, 0x0000, 217, 223), # 215: p=0.007220 ( 1, 183) + (0x24ef, 0x0000, 26, 16), # 216: p=0.199998 ( 9, 2) + (0x00e1, 0x0000, 219, 221), # 217: p=0.004750 ( 1, 279) + (0x320e, 0x0000, 20, 220), # 218: p=0.269229 ( 6, 2) + (0x0094, 0x0000, 71, 63), # 219: p=0.003132 ( 1, 424) + (0x432a, 0x0000, 14, 8), # 220: p=0.344827 ( 6, 3) + (0x0188, 0x0000, 61, 55), # 221: p=0.008284 ( 2, 279) + (0x447d, 0x0000, 14, 224), # 222: p=0.349998 ( 4, 2) + (0x0252, 0x0000, 57, 51), # 223: p=0.012567 ( 2, 183) + (0x5ece, 0x0000, 8, 2), # 224: p=0.434782 ( 4, 3) + (0x0383, 0x0000, 53, 47), # 225: p=0.019021 ( 2, 120) + (0x8000, 0x0000, 228, 87), # 226: p=0.500000 ( 1, 1) + (0x0547, 0x0000, 49, 43), # 227: p=0.028571 ( 2, 79) + (0x481a, 0x0000, 230, 246), # 228: p=0.363634 ( 2, 1) + (0x07e2, 0x0000, 45, 37), # 229: p=0.042682 ( 2, 52) + (0x3579, 0x0000, 232, 244), # 230: p=0.285711 ( 3, 1) + (0x0bc0, 0x0000, 39, 33), # 231: p=0.063636 ( 2, 34) + (0x24ef, 0x0000, 234, 238), # 232: p=0.199997 ( 5, 1) + (0x1178, 0x0000, 35, 27), # 233: p=0.094593 ( 2, 22) + (0x1978, 0x0000, 138, 236), # 234: p=0.137929 ( 8, 1) + (0x19da, 0x0000, 29, 21), # 235: p=0.139999 ( 2, 14) + (0x2865, 0x0000, 24, 16), # 236: p=0.218748 ( 8, 2) + (0x24ef, 0x0000, 25, 15), # 237: p=0.199998 ( 2, 9) + (0x3987, 0x0000, 240, 8), # 238: p=0.304346 ( 5, 2) + (0x320e, 0x0000, 19, 241), # 239: p=0.269229 ( 2, 6) + (0x2c99, 0x0000, 22, 242), # 240: p=0.241378 ( 7, 2) + (0x432a, 0x0000, 13, 7), # 241: p=0.344827 ( 3, 6) + (0x3b5f, 0x0000, 16, 10), # 242: p=0.312499 ( 7, 3) + (0x447d, 0x0000, 13, 245), # 243: p=0.349998 ( 2, 4) + (0x5695, 0x0000, 10, 2), # 244: p=0.411764 ( 3, 2) + (0x5ece, 0x0000, 7, 1), # 245: p=0.434782 ( 3, 4) + (0x8000, 0x0000, 244, 83), # 246: p=0.500000 ( 2, 2) + (0x8000, 0x0000, 249, 250), # 247: p=0.500000 ( 1, 1) + (0x5695, 0x0000, 10, 2), # 248: p=0.411764 ( 3, 2) + (0x481a, 0x0000, 89, 143), # 249: p=0.363634 ( 1, 2) + (0x481a, 0x0000, 230, 246), # 250: p=0.363634 ( 2, 1) + (0, 0, 0, 0), + (0, 0, 0, 0), + (0, 0, 0, 0), + (0, 0, 0, 0), + (0, 0, 0, 0), +] + + +xmtf = ( + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, + 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, + 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, + 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, + 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, + 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, + 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, + 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, + 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF +) + # }}} + +def chr3(l): + return bytes(bytearray(l)) + +class BZZDecoder(): + def __init__(self, infile, outfile): + self.instream = infile + self.outf = outfile + self.ieof = False + self.bptr = None + self.xsize = None + self.outbuf = [0] * (MAXBLOCK * 1024) + self.byte = None + self.scount = 0 + self.delay = 25 + self.a = 0 + self.code = 0 + self.bufint = 0 + self.ctx = [0] * 300 + # table + self.p = [0] * 256 + self.m = [0] * 256 + self.up = [0] * 256 + self.dn = [0] * 256 + # machine independent ffz + self.ffzt = [0] * 256 + + # Create machine independent ffz table + for i in range(256): + j = i + while(j & 0x80): + self.ffzt[i] += 1 + j <<= 1 + # Initialize table + self.newtable(default_ztable) + # Codebit counter + # Read first 16 bits of code + if not self.read_byte(): + self.byte = 0xff + self.code = (self.byte << 8) + if not self.read_byte(): + self.byte = 0xff + self.code = self.code | self.byte + # Preload buffer + self.preload() + # Compute initial fence + self.fence = self.code + if self.code >= 0x8000: + self.fence = 0x7fff + + def convert(self, sz): + if self.ieof: + return 0 + copied = 0 + while sz > 0 and not (self.ieof): + # Decode if needed + if not self.xsize: + self.bptr = 0 + if not self.decode(): # input block size set in decode + self.xsize = 1 + self.ieof = True + self.xsize -= 1 + + # Compute remaining + bytes = self.xsize + if bytes > sz: + bytes = sz + # Transfer + if bytes: + for i in range(bytes): + self.outf.write(chr3(self.outbuf[self.bptr + i])) + self.xsize -= bytes + self.bptr += bytes + sz -= bytes + copied += bytes + # offset += bytes; // for tell() + return copied + + def preload(self): + while self.scount <= 24: + if self.read_byte() < 1: + self.byte = 0xff + if --self.delay < 1: + raise BZZDecoderError("BiteStream EOF") + self.bufint = (self.bufint << 8) | self.byte + self.scount += 8 + + def newtable(self, table): + for i in range(256): + self.p[i] = table[i][0] + self.m[i] = table[i][1] + self.up[i] = table[i][2] + self.dn[i] = table[i][3] + + def decode(self): + outbuf = self.outbuf + # Decode block size + self.xsize = self.decode_raw(24) + if not self.xsize: + return 0 + if self.xsize > MAXBLOCK * 1024: # 4MB (4096 * 1024) is max block + raise BZZDecoderError("BiteStream.corrupt") + # Dec11ode Estimation Speed + fshift = 0 + if self.zpcodec_decoder(): + fshift += 1 + if self.zpcodec_decoder(): + fshift += 1 + # Prepare Quasi MTF + mtf = list(xmtf) # unsigned chars + freq = [0] * FREQMAX + fadd = 4 + # Decode + mtfno = 3 + markerpos = -1 + for i in range(self.xsize): + ctxid = CTXIDS - 1 + if ctxid > mtfno: + ctxid = mtfno + cx = self.ctx + if self.zpcodec_decode(cx, ctxid): + mtfno = 0 + outbuf[i] = mtf[mtfno] + elif self.zpcodec_decode(cx, ctxid + CTXIDS): + mtfno = 1 + outbuf[i] = mtf[mtfno] + elif self.zpcodec_decode(cx, 2*CTXIDS): + mtfno = 2 + self.decode_binary(cx, 2*CTXIDS + 1, 1) + outbuf[i] = mtf[mtfno] + elif self.zpcodec_decode(cx, 2*CTXIDS+2): + mtfno = 4 + self.decode_binary(cx, 2*CTXIDS+2 + 1, 2) + outbuf[i] = mtf[mtfno] + elif self.zpcodec_decode(cx, 2*CTXIDS + 6): + mtfno = 8 + self.decode_binary(cx, 2*CTXIDS + 6 + 1, 3) + outbuf[i] = mtf[mtfno] + elif self.zpcodec_decode(cx, 2*CTXIDS + 14): + mtfno = 16 + self.decode_binary(cx, 2*CTXIDS + 14 + 1, 4) + outbuf[i] = mtf[mtfno] + elif self.zpcodec_decode(cx, 2*CTXIDS + 30 ): + mtfno = 32 + self.decode_binary(cx, 2*CTXIDS + 30 + 1, 5) + outbuf[i] = mtf[mtfno] + elif self.zpcodec_decode(cx, 2*CTXIDS + 62 ): + mtfno = 64 + self.decode_binary(cx, 2*CTXIDS + 62 + 1, 6) + outbuf[i] = mtf[mtfno] + elif self.zpcodec_decode(cx, 2*CTXIDS + 126): + mtfno = 128 + self.decode_binary(cx, 2*CTXIDS + 126 + 1, 7) + outbuf[i] = mtf[mtfno] + else: + mtfno = 256 # EOB + outbuf[i] = 0 + markerpos = i + continue + + # Rotate mtf according to empirical frequencies (new!) + # :rotate label + # Adjust frequencies for overflow + fadd = fadd + (fadd >> fshift) + if fadd > 0x10000000: + fadd >>= 24 + freq[0] >>= 24 + freq[1] >>= 24 + freq[2] >>= 24 + freq[3] >>= 24 + for k in range(4, FREQMAX): + freq[k] = freq[k] >> 24 + # Relocate new char according to new freq + fc = fadd + if mtfno < FREQMAX: + fc += freq[mtfno] + k = mtfno + while (k >= FREQMAX): + mtf[k] = mtf[k - 1] + k -= 1 + while (k > 0 and fc >= freq[k - 1]): + mtf[k] = mtf[k - 1] + freq[k] = freq[k - 1] + k -= 1 + mtf[k] = outbuf[i] + freq[k] = fc + #/////////////////////////////// + #//////// Reconstruct the string + + if markerpos < 1 or markerpos >= self.xsize: + raise BZZDecoderError("BiteStream.corrupt") + # Allocate pointers + posn = [0] * self.xsize + # Prepare count buffer + count = [0] * 256 + # Fill count buffer + for i in range(markerpos): + c = outbuf[i] + posn[i] = (c << 24) | (count[c] & 0xffffff) + count[c] += 1 + for i in range(markerpos + 1, self.xsize): + c = outbuf[i] + posn[i] = (c << 24) | (count[c] & 0xffffff) + count[c] += 1 + # Compute sorted char positions + last = 1 + for i in range(256): + tmp = count[i] + count[i] = last + last += tmp + # Undo the sort transform + i = 0 + last = self.xsize - 1 + while last > 0: + n = posn[i] + c = (posn[i] >> 24) + last -= 1 + outbuf[last] = c + i = count[c] + (n & 0xffffff) + # Free and check + if i != markerpos: + raise BZZDecoderError("BiteStream.corrupt") + return self.xsize + + def decode_raw(self, bits): + n = 1 + m = (1 << bits) + while n < m: + b = self.zpcodec_decoder() + n = (n << 1) | b + return n - m + + def decode_binary(self, ctx, index, bits): + n = 1 + m = (1 << bits) + while n < m: + b = self.zpcodec_decode(ctx, index + n - 1) + n = (n << 1) | b + return n - m + + def zpcodec_decoder(self): + return self.decode_sub_simple(0, 0x8000 + (self.a >> 1)) + + def decode_sub_simple(self, mps, z): + # Test MPS/LPS + if z > self.code: + # LPS branch + z = 0x10000 - z + self.a += +z + self.code = self.code + z + # LPS renormalization + shift = self.ffz() + self.scount -= shift + self.a = self.a << shift + self.a &= 0xffff + self.code = (self.code << shift) | ((self.bufint >> self.scount) & ((1 << shift) - 1)) + self.code &= 0xffff + if self.scount < 16: + self.preload() + # Adjust fence + self.fence = self.code + if self.code >= 0x8000: + self.fence = 0x7fff + result = mps ^ 1 + else: + # MPS renormalization + self.scount -= 1 + self.a = (z << 1) & 0xffff + self.code = ((self.code << 1) | ((self.bufint >> self.scount) & 1)) + self.code &= 0xffff + if self.scount < 16: + self.preload() + # Adjust fence + self.fence = self.code + if self.code >= 0x8000: + self.fence = 0x7fff + result = mps + return result + + def decode_sub(self, ctx, index, z): + # Save bit + bit = (ctx[index] & 1) + # Avoid interval reversion + d = 0x6000 + ((z + self.a) >> 2) + if z > d: + z = d + # Test MPS/LPS + if z > self.code: + # LPS branch + z = 0x10000 - z + self.a += +z + self.code = self.code + z + # LPS adaptation + ctx[index] = self.dn[ctx[index]] + # LPS renormalization + shift = self.ffz() + self.scount -= shift + self.a = (self.a << shift) & 0xffff + self.code = ((self.code << shift) | ((self.bufint >> self.scount) & ((1 << shift) - 1))) & 0xffff + if self.scount < 16: + self.preload() + # Adjust fence + self.fence = self.code + if self.code >= 0x8000: + self.fence = 0x7fff + return bit ^ 1 + else: + # MPS adaptation + if self.a >= self.m[ctx[index]]: + ctx[index] = self.up[ctx[index]] + # MPS renormalization + self.scount -= 1 + self.a = z << 1 & 0xffff + self.code = ((self.code << 1) | ((self.bufint >> self.scount) & 1)) & 0xffff + if self.scount < 16: + self.preload() + # Adjust fence + self.fence = self.code + if self.code >= 0x8000: + self.fence = 0x7fff + return bit + + def zpcodec_decode(self, ctx, index): + z = self.a + self.p[ctx[index]] + if z <= self.fence: + self.a = z + res = (ctx[index] & 1) + else: + res = self.decode_sub(ctx, index, z) + return res + + def read_byte(self): + res = 0 + if self.instream: + ires = self.instream.read(1) + res = len(ires) + if res: + self.byte = ord(ires[0]) + else: + raise NotImplementedError + return res + + def ffz(self): + x = self.a + if (x >= 0xff00): + return (self.ffzt[x & 0xff] + 8) + else: + return (self.ffzt[(x >> 8) & 0xff]) + + + +### for testing + +def main(): + import sys + infile = file(sys.argv[1], "rb") + outfile = file(sys.argv[2], "wb") + dec = BZZDecoder(infile, outfile) + while True: + res = dec.convert(1024 * 1024) + if not res: + break + +if __name__ == "__main__": + main() diff --git a/src/calibre/ebooks/djvu/input.py b/src/calibre/ebooks/djvu/input.py new file mode 100644 index 0000000000..70dbf97f5d --- /dev/null +++ b/src/calibre/ebooks/djvu/input.py @@ -0,0 +1,87 @@ +# -*- coding: utf-8 -*- + +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL 3' +__copyright__ = '2011, Anthon van der Neut ' +__docformat__ = 'restructuredtext en' + +import os +from subprocess import Popen, PIPE +from cStringIO import StringIO + +from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation +from calibre.ebooks.txt.processor import convert_basic + +class DJVUInput(InputFormatPlugin): + + name = 'DJVU Input' + author = 'Anthon van der Neut' + description = 'Convert OCR-ed DJVU files (.djvu) to HTML' + file_types = set(['djvu', 'djv']) + + options = set([ + OptionRecommendation(name='use_djvutxt', recommended_value=True, + help=_('Try to use the djvutxt program and fall back to pure ' + 'python implementation if it fails or is not available')), + ]) + + def convert(self, stream, options, file_ext, log, accelerators): + stdout = StringIO() + ppdjvu = True + # using djvutxt is MUCH faster, should make it an option + if options.use_djvutxt and os.path.exists('/usr/bin/djvutxt'): + from calibre.ptempfile import PersistentTemporaryFile + try: + fp = PersistentTemporaryFile(suffix='.djvu', prefix='djv_input') + filename = fp._name + fp.write(stream.read()) + fp.close() + cmd = ['djvutxt', filename] + stdout.write(Popen(cmd, stdout=PIPE, close_fds=True).communicate()[0]) + os.remove(filename) + ppdjvu = False + except: + stream.seek(0) # retry with the pure python converter + if ppdjvu: + from .djvu import DJVUFile + x = DJVUFile(stream) + x.get_text(stdout) + + html = convert_basic(stdout.getvalue().replace(b"\n", b' ').replace( + b'\037', b'\n\n')) + # Run the HTMLized text through the html processing plugin. + from calibre.customize.ui import plugin_for_input_format + html_input = plugin_for_input_format('html') + for opt in html_input.options: + setattr(options, opt.option.name, opt.recommended_value) + options.input_encoding = 'utf-8' + base = os.getcwdu() + if file_ext != 'txtz' and hasattr(stream, 'name'): + base = os.path.dirname(stream.name) + fname = os.path.join(base, 'index.html') + c = 0 + while os.path.exists(fname): + c += 1 + fname = 'index%d.html'%c + htmlfile = open(fname, 'wb') + with htmlfile: + htmlfile.write(html.encode('utf-8')) + odi = options.debug_pipeline + options.debug_pipeline = None + # Generate oeb from html conversion. + with open(htmlfile.name, 'rb') as f: + oeb = html_input.convert(f, options, 'html', log, + {}) + options.debug_pipeline = odi + os.remove(htmlfile.name) + + # Set metadata from file. + from calibre.customize.ui import get_file_type_metadata + from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata + mi = get_file_type_metadata(stream, file_ext) + meta_info_to_oeb_metadata(mi, oeb.metadata, log) + + return oeb + diff --git a/src/calibre/gui2/convert/djvu_input.py b/src/calibre/gui2/convert/djvu_input.py new file mode 100644 index 0000000000..6e52487d81 --- /dev/null +++ b/src/calibre/gui2/convert/djvu_input.py @@ -0,0 +1,24 @@ +# coding: utf-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Anthon van der Neut ' + + +from calibre.gui2.convert.djvu_input_ui import Ui_Form +from calibre.gui2.convert import Widget + +class PluginWidget(Widget, Ui_Form): + + TITLE = _('DJVU Input') + HELP = _('Options specific to')+' DJVU '+_('input') + COMMIT_NAME = 'djvu_input' + ICON = I('mimetypes/djvu.png') + + def __init__(self, parent, get_option, get_help, db=None, book_id=None): + Widget.__init__(self, parent, + ['use_djvutxt', ]) + self.db, self.book_id = db, book_id + self.initialize_options(get_option, get_help, db, book_id) + diff --git a/src/calibre/gui2/convert/djvu_input.ui b/src/calibre/gui2/convert/djvu_input.ui new file mode 100644 index 0000000000..ad027f645e --- /dev/null +++ b/src/calibre/gui2/convert/djvu_input.ui @@ -0,0 +1,28 @@ + + + Form + + + + 0 + 0 + 400 + 300 + + + + Form + + + + + + Use &djvutxt, if available, for faster processing + + + + + + + +