From f740d20f32e9ca2fbedcb2bcff5e7e4d9b5dfcd4 Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Sun, 7 Dec 2008 23:53:14 -0500
Subject: [PATCH 01/15] Adding initial LitWriter and oeb2lit code.

---
 setup.py                           |   1 +
 src/calibre/ebooks/lit/html.css    | 420 ++++++++++++++++++
 src/calibre/ebooks/lit/lzxcomp.py  | 176 ++++++++
 src/calibre/ebooks/lit/oeb.py      | 690 +++++++++++++++++++++++++++++
 src/calibre/ebooks/lit/split.py    | 149 +++++++
 src/calibre/ebooks/lit/stylizer.py | 435 ++++++++++++++++++
 src/calibre/ebooks/lit/writer.py   | 655 +++++++++++++++++++++++++++
 src/calibre/linux.py               |   1 +
 8 files changed, 2527 insertions(+)
 create mode 100644 src/calibre/ebooks/lit/html.css
 create mode 100644 src/calibre/ebooks/lit/lzxcomp.py
 create mode 100644 src/calibre/ebooks/lit/oeb.py
 create mode 100644 src/calibre/ebooks/lit/split.py
 create mode 100644 src/calibre/ebooks/lit/stylizer.py
 create mode 100644 src/calibre/ebooks/lit/writer.py

diff --git a/setup.py b/setup.py
index 37d54c4317..aa72b46f00 100644
--- a/setup.py
+++ b/setup.py
@@ -146,6 +146,7 @@ if __name__ == '__main__':
             metadata_sqlite = 'library/metadata_sqlite.sql',
             jquery          = 'gui2/viewer/jquery.js',
             jquery_scrollTo = 'gui2/viewer/jquery_scrollTo.js',
+            html_css        = 'ebooks/lit/html.css',
         )
         
         DEST = os.path.join('src', APPNAME, 'resources.py')
diff --git a/src/calibre/ebooks/lit/html.css b/src/calibre/ebooks/lit/html.css
new file mode 100644
index 0000000000..5b75ea6649
--- /dev/null
+++ b/src/calibre/ebooks/lit/html.css
@@ -0,0 +1,420 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is mozilla.org code.
+ *
+ * The Initial Developer of the Original Code is
+ * Netscape Communications Corporation.
+ * Portions created by the Initial Developer are Copyright (C) 1998
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *   Blake Ross <BlakeR1234@aol.com>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either of the GNU General Public License Version 2 or later (the "GPL"),
+ * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+@namespace url(http://www.w3.org/1999/xhtml); /* set default namespace to HTML */
+
+/* blocks */
+
+html, div, map, dt, isindex, form {
+  display: block;
+}
+
+body {
+  display: block;
+  margin: 8px;
+}
+
+p, dl, multicol {
+  display: block;
+  margin: 1em 0;
+}
+
+dd {
+  display: block;
+}
+
+blockquote {
+  display: block;
+  margin: 1em 40px;
+}
+
+address {
+  display: block;
+  font-style: italic;
+}
+
+center {
+  display: block;
+  text-align: center;
+}
+
+blockquote[type=cite] {
+  display: block;
+  margin: 1em 0px;
+  border-color: blue;
+  border-width: thin;
+}
+
+span[_moz_quote=true] {
+  color: blue;
+}
+
+pre[_moz_quote=true] {
+  color: blue;
+}
+
+h1 {
+  display: block;
+  font-size: 2em;
+  font-weight: bold;
+  margin: .67em 0;
+}
+
+h2 {
+  display: block;
+  font-size: 1.5em;
+  font-weight: bold;
+  margin: .83em 0;
+}
+
+h3 {
+  display: block;
+  font-size: 1.17em;
+  font-weight: bold;
+  margin: 1em 0;
+}
+
+h4 {
+  display: block;
+  font-weight: bold;
+  margin: 1.33em 0;
+}
+
+h5 {
+  display: block;
+  font-size: 0.83em;
+  font-weight: bold;
+  margin: 1.67em 0;
+}
+
+h6 {
+  display: block;
+  font-size: 0.67em;
+  font-weight: bold;
+  margin: 2.33em 0;
+}
+
+listing {
+  display: block;
+  font-family: monospace;
+  font-size: medium;
+  white-space: pre;
+  margin: 1em 0;
+}
+
+xmp, pre, plaintext {
+  display: block;
+  font-family: monospace;
+  white-space: pre;
+  margin: 1em 0;
+}
+
+/* tables */
+
+table {
+  display: table;
+  border-spacing: 2px;
+  border-collapse: separate;
+  margin-top: 0;
+  margin-bottom: 0;
+  text-indent: 0;
+}
+
+table[align="left"] {
+  float: left;
+}
+
+table[align="right"] {
+  float: right;
+}
+
+table[rules]:not([rules="none"]) {
+  border-collapse: collapse;
+}
+   
+/* caption inherits from table not table-outer */  
+caption {
+  display: table-caption;
+  text-align: center;
+}
+
+table[align="center"] > caption {
+  margin-left: auto;
+  margin-right: auto;
+}
+
+table[align="center"] > caption[align="left"] {
+  margin-right: 0;
+}
+
+table[align="center"] > caption[align="right"] {
+  margin-left: 0;
+}
+
+tr {
+  display: table-row;
+  vertical-align: inherit;
+}
+
+col {
+  display: table-column;
+}
+
+colgroup {
+  display: table-column-group;
+}
+
+tbody {
+  display: table-row-group;
+  vertical-align: middle;
+}
+
+thead {
+  display: table-header-group;
+  vertical-align: middle;
+}
+
+tfoot {
+  display: table-footer-group;
+  vertical-align: middle;
+}
+
+/* for XHTML tables without tbody */
+table > tr {
+  vertical-align: middle;
+}
+
+td { 
+  display: table-cell;
+  vertical-align: inherit;
+  text-align: inherit; 
+  padding: 1px;
+}
+
+th {
+  display: table-cell;
+  vertical-align: inherit;
+  font-weight: bold;
+  padding: 1px;
+}
+
+/* inlines */
+
+q:before {
+  content: open-quote;
+}
+
+q:after {
+  content: close-quote;
+}
+
+b, strong {
+  font-weight: bolder;
+}
+
+i, cite, em, var, dfn {
+  font-style: italic;
+}
+
+tt, code, kbd, samp {
+  font-family: monospace;
+}
+
+u, ins {
+  text-decoration: underline;
+}
+
+s, strike, del {
+  text-decoration: line-through;
+}
+
+blink {
+  text-decoration: blink;
+}
+
+big {
+  font-size: larger;
+}
+
+small {
+  font-size: smaller;
+}
+
+sub {
+  vertical-align: sub;
+  font-size: smaller;
+  line-height: normal;
+}
+
+sup {
+  vertical-align: super;
+  font-size: smaller;
+  line-height: normal;
+}
+
+nobr {
+  white-space: nowrap;
+}
+
+/* titles */
+abbr[title], acronym[title] {
+  border-bottom: dotted 1px;
+}
+
+/* lists */
+
+ul, menu, dir {
+  display: block;
+  list-style-type: disc;
+  margin: 1em 0;
+}
+
+ol {
+  display: block;
+  list-style-type: decimal;
+  margin: 1em 0;
+}
+
+li {
+  display: list-item;
+}
+
+/* nested lists have no top/bottom margins */
+ul ul,   ul ol,   ul dir,   ul menu,   ul dl,
+ol ul,   ol ol,   ol dir,   ol menu,   ol dl,
+dir ul,  dir ol,  dir dir,  dir menu,  dir dl,
+menu ul, menu ol, menu dir, menu menu, menu dl,
+dl ul,   dl ol,   dl dir,   dl menu,   dl dl {
+  margin-top: 0;
+  margin-bottom: 0;
+}
+
+/* 2 deep unordered lists use a circle */
+ol ul,   ul ul,   menu ul,   dir ul,
+ol menu, ul menu, menu menu, dir menu,
+ol dir,  ul dir,  menu dir,  dir dir {
+  list-style-type: circle;
+}
+
+/* 3 deep (or more) unordered lists use a square */
+ol ol ul,     ol ul ul,     ol menu ul,     ol dir ul,
+ol ol menu,   ol ul menu,   ol menu menu,   ol dir menu,
+ol ol dir,    ol ul dir,    ol menu dir,    ol dir dir,
+ul ol ul,     ul ul ul,     ul menu ul,     ul dir ul,
+ul ol menu,   ul ul menu,   ul menu menu,   ul dir menu,
+ul ol dir,    ul ul dir,    ul menu dir,    ul dir dir,
+menu ol ul,   menu ul ul,   menu menu ul,   menu dir ul,
+menu ol menu, menu ul menu, menu menu menu, menu dir menu,
+menu ol dir,  menu ul dir,  menu menu dir,  menu dir dir,
+dir ol ul,    dir ul ul,    dir menu ul,    dir dir ul,
+dir ol menu,  dir ul menu,  dir menu menu,  dir dir menu,
+dir ol dir,   dir ul dir,   dir menu dir,   dir dir dir {
+  list-style-type: square;
+}
+
+
+/* leafs */
+
+/* <hr> noshade and color attributes are handled completely by
+ * the nsHTMLHRElement attribute mapping code
+ */
+hr {
+  display: block;
+  height: 2px;
+  border: 1px inset;
+  margin: 0.5em auto 0.5em auto;
+  color: gray;
+}
+
+hr[size="1"] {
+  border-style: solid none none none;
+}
+
+img[usemap], object[usemap] {
+  color: blue;
+}
+
+frameset {
+  display: block ! important;
+  position: static ! important;
+  float: none ! important;
+  border: none ! important;
+}
+
+frame {
+  border: none ! important;
+}
+
+iframe {
+  border: 2px inset;
+}
+
+noframes {
+  display: none;
+}
+
+spacer {
+  position: static ! important;
+  float: none ! important;
+}
+
+/* focusable content: anything w/ tabindex >=0 is focusable */
+abbr:focus, acronym:focus, address:focus, applet:focus, b:focus, 
+base:focus, big:focus, blockquote:focus, br:focus, canvas:focus, caption:focus,
+center:focus, cite:focus, code:focus, col:focus, colgroup:focus, dd:focus,
+del:focus, dfn:focus, dir:focus, div:focus, dl:focus, dt:focus, em:focus,
+fieldset:focus, font:focus, form:focus, h1:focus, h2:focus, h3:focus, h4:focus,
+h5:focus, h6:focus, hr:focus, i:focus, img:focus, ins:focus, 
+kbd:focus, label:focus, legend:focus, li:focus, link:focus, menu:focus, 
+object:focus, ol:focus, p:focus, pre:focus, q:focus, s:focus, samp:focus, 
+small:focus, span:focus, strike:focus, strong:focus, sub:focus, sup:focus, 
+table:focus, tbody:focus, td:focus, tfoot:focus, th:focus, thead:focus, 
+tr:focus, tt:focus, u:focus, ul:focus, var:focus {
+  /* Don't specify the outline-color, we should always use initial value. */
+   outline: 1px dotted;
+}
+
+/* hidden elements */
+area, base, basefont, head, meta, script, style, title,
+noembed, param {
+   display: none;
+}
+
+/* Page breaks at body tags, to help out with LIT-generation */
+body {
+  page-break-before: always;
+}
diff --git a/src/calibre/ebooks/lit/lzxcomp.py b/src/calibre/ebooks/lit/lzxcomp.py
new file mode 100644
index 0000000000..4f147a90a1
--- /dev/null
+++ b/src/calibre/ebooks/lit/lzxcomp.py
@@ -0,0 +1,176 @@
+from __future__ import with_statement
+import sys
+import os
+from cStringIO import StringIO
+from ctypes import *
+
+__all__ = ['Compressor']
+
+liblzxcomp = cdll.LoadLibrary('liblzxcomp.so')
+
+class lzx_data(Structure):
+    pass
+    
+lzx_get_bytes_t = CFUNCTYPE(c_int, c_voidp, c_int, c_voidp)
+lzx_put_bytes_t = CFUNCTYPE(c_int, c_voidp, c_int, c_voidp)
+lzx_mark_frame_t = CFUNCTYPE(None, c_voidp, c_uint32, c_uint32)
+lzx_at_eof_t = CFUNCTYPE(c_int, c_voidp)
+
+class lzx_results(Structure):
+    _fields_ = [('len_compressed_output', c_long),
+                ('len_uncompressed_input', c_long)]
+
+# int lzx_init(struct lzx_data **lzxdp, int wsize_code, 
+#              lzx_get_bytes_t get_bytes, void *get_bytes_arg,
+#              lzx_at_eof_t at_eof,
+#              lzx_put_bytes_t put_bytes, void *put_bytes_arg,
+#              lzx_mark_frame_t mark_frame, void *mark_frame_arg);
+lzx_init = liblzxcomp.lzx_init
+lzx_init.restype = c_int
+lzx_init.argtypes = [POINTER(POINTER(lzx_data)), c_int,
+                     lzx_get_bytes_t, c_voidp,
+                     lzx_at_eof_t,
+                     lzx_put_bytes_t, c_voidp,
+                     lzx_mark_frame_t, c_voidp]
+
+# void  lzx_reset(lzx_data *lzxd);
+lzx_reset = liblzxcomp.lzx_reset
+lzx_reset.restype = None
+lzx_reset.argtypes = [POINTER(lzx_data)]
+
+# int lzx_compress_block(lzx_data *lzxd, int block_size, int subdivide);
+lzx_compress_block = liblzxcomp.lzx_compress_block
+lzx_compress_block.restype = c_int
+lzx_compress_block.argtypes = [POINTER(lzx_data), c_int, c_int]
+
+# int lzx_finish(struct lzx_data *lzxd, struct lzx_results *lzxr);
+lzx_finish = liblzxcomp.lzx_finish
+lzx_finish.restype = c_int
+lzx_finish.argtypes = [POINTER(lzx_data), POINTER(lzx_results)]
+
+
+class LzxError(Exception):
+    pass
+
+
+class Compressor(object):
+    def __init__(self, wbits, reset=True):
+        self._reset = reset
+        self._blocksize = 1 << wbits
+        self._buffered = 0
+        self._input = StringIO()
+        self._output = StringIO()
+        self._flushing = False
+        self._rtable = []
+        self._get_bytes = lzx_get_bytes_t(self._get_bytes)
+        self._at_eof = lzx_at_eof_t(self._at_eof)
+        self._put_bytes = lzx_put_bytes_t(self._put_bytes)
+        self._mark_frame = lzx_mark_frame_t(self._mark_frame)
+        self._lzx = POINTER(lzx_data)()
+        self._results = lzx_results()
+        rv = lzx_init(self._lzx, wbits, self._get_bytes, c_voidp(),
+                      self._at_eof, self._put_bytes, c_voidp(),
+                      self._mark_frame, c_voidp())
+        if rv != 0:
+            raise LzxError("lzx_init() failed with %d" % rv)
+
+    def _add_input(self, data):
+        self._input.seek(0, 2)
+        self._input.write(data)
+        self._input.seek(0)
+        self._buffered += len(data)
+
+    def _reset_input(self):
+        data = self._input.read()
+        self._input.seek(0)
+        self._input.truncate()
+        self._input.write(data)
+        self._input.seek(0)
+
+    def _reset_output(self):
+        data = self._output.getvalue()
+        self._output.seek(0)
+        self._output.truncate()
+        return data
+
+    def _reset_rtable(self):
+        rtable = list(self._rtable)
+        del self._rtable[:]
+        return rtable
+        
+    def _get_bytes(self, arg, n, buf):
+        data = self._input.read(n)
+        memmove(buf, data, len(data))
+        self._buffered -= len(data)
+        return len(data)
+
+    def _put_bytes(self, arg, n, buf):
+        self._output.write(string_at(buf, n))
+        return n
+
+    def _at_eof(self, arg):
+        if self._flushing and self._buffered == 0:
+            return 1
+        return 0
+
+    def _mark_frame(self, arg, uncomp, comp):
+        self._rtable.append((uncomp, comp))
+        return
+
+    def _compress_block(self):
+        rv = lzx_compress_block(self._lzx, self._blocksize, 1)
+        if rv != 0:
+            raise LzxError("lzx_compress_block() failed with %d" % rv)
+        if self._reset:
+            lzx_reset(self._lzx)        
+    
+    def compress(self, data, flush=False):
+        self._add_input(data)
+        self._flushing = flush
+        while self._buffered >= self._blocksize:
+            self._compress_block()
+        if self._buffered > 0 and flush:
+            self._compress_block()
+        self._reset_input()
+        data = self._reset_output()
+        rtable = self._reset_rtable()
+        return (data, rtable)
+
+    def flush(self):
+        self._flushing = True
+        if self._buffered > 0:
+            self._compress_block()
+            self._reset_input()
+        data = self._reset_output()
+        rtable = self._reset_rtable()
+        return (data, rtable)
+
+    def close(self):
+        if self._lzx:
+            lzx_finish(self._lzx, self._results)
+            self._lzx = None
+        pass
+    
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *exc_info):
+        self.close()
+
+    def __del__(self):
+        self.close()
+
+
+def main(argv=sys.argv):
+    wbits, inf, outf = argv[1:]
+    with open(inf, 'rb') as f:
+        data = f.read()
+    with Compressor(int(wbits)) as lzx:
+        data, rtable = lzx.compress(data, flush=True)
+    print rtable
+    with open(outf, 'wb') as f:
+        f.write(data)
+    return 0
+    
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/src/calibre/ebooks/lit/oeb.py b/src/calibre/ebooks/lit/oeb.py
new file mode 100644
index 0000000000..a4ad927fed
--- /dev/null
+++ b/src/calibre/ebooks/lit/oeb.py
@@ -0,0 +1,690 @@
+from __future__ import with_statement
+import os
+import sys
+from collections import defaultdict
+from types import StringTypes
+from itertools import izip, count
+from urlparse import urldefrag
+from lxml import etree
+
+XML_PARSER = etree.XMLParser(
+    remove_blank_text=True, recover=True, resolve_entities=False)
+XHTML_NS = 'http://www.w3.org/1999/xhtml'
+OPF1_NS = 'http://openebook.org/namespaces/oeb-package/1.0/'
+OPF2_NS = 'http://www.idpf.org/2007/opf'
+DC09_NS = 'http://purl.org/metadata/dublin_core'
+DC10_NS = 'http://purl.org/dc/elements/1.0/'
+DC11_NS = 'http://purl.org/dc/elements/1.1/'
+XSI_NS = 'http://www.w3.org/2001/XMLSchema-instance'
+DCTERMS_NS = 'http://purl.org/dc/terms/'
+NCX_NS = 'http://www.daisy.org/z3986/2005/ncx/'
+XPNSMAP = {'h': XHTML_NS, 'o1': OPF1_NS, 'o2': OPF2_NS,
+           'd09': DC09_NS, 'd10': DC10_NS, 'd11': DC11_NS,
+           'xsi': XSI_NS, 'dt': DCTERMS_NS, 'ncx': NCX_NS}
+
+def XHTML(name): return '{%s}%s' % (XHTML_NS, name)
+def OPF(name): return '{%s}%s' % (OPF2_NS, name)
+def DC(name): return '{%s}%s' % (DC11_NS, name)
+def NCX(name): return '{%s}%s' % (NCX_NS, name)
+
+XHTML_MIME = 'application/xhtml+xml'
+CSS_MIME = 'text/css'
+NCX_MIME = 'application/x-dtbncx+xml'
+OPF_MIME = 'application/oebps-package+xml'
+
+OEB_STYLES = set([CSS_MIME, 'text/x-oeb1-css', 'text/x-oeb-css'])
+OEB_DOCS = set([XHTML_MIME, 'text/html', 'text/x-oeb1-document',
+                'text/x-oeb-document'])
+
+
+def element(parent, *args, **kwargs):
+    if parent is not None:
+        return etree.SubElement(parent, *args, **kwargs)
+    return etree.Element(*args, **kwargs)
+
+def namespace(name):
+    if '}' in name:
+        return name.split('}', 1)[0][1:]
+    return ''
+
+def barename(name):
+    if '}' in name:
+        return name.split('}', 1)[1]
+    return name
+
+def xpath(elem, expr):
+    return elem.xpath(expr, namespaces=XPNSMAP)
+
+
+class AbstractContainer(object):
+    def read_xml(self, path):
+        return etree.fromstring(
+            self.read(path), parser=XML_PARSER,
+            base_url=os.path.dirname(path))
+
+class DirContainer(AbstractContainer):
+    def __init__(self, rootdir):
+        self.rootdir = rootdir
+
+    def read(self, path):
+        path = os.path.join(self.rootdir, path)
+        with open(path, 'rb') as f:
+            return f.read()
+
+    def write(self, path, data):
+        path = os.path.join(self.rootdir, path)
+        with open(path, 'wb') as f:
+            return f.write(data)
+
+
+class Metadata(object):
+    TERMS = set(['contributor', 'coverage', 'creator', 'date', 'description',
+                 'format', 'identifier', 'language', 'publisher', 'relation',
+                 'rights', 'source', 'subject', 'title', 'type'])
+    OPF1_NSMAP = {'dc': DC11_NS, 'oebpackage': OPF1_NS}
+    OPF2_NSMAP = {'opf': OPF2_NS, 'dc': DC11_NS, 'dcterms': DCTERMS_NS,
+                  'xsi': XSI_NS}
+    
+    class Item(object):
+        def __init__(self, term, value, fq_attrib={}):
+            if term == OPF('meta') and not value:
+                fq_attrib = dict(fq_attrib)
+                term = fq_attrib.pop('name')
+                value = fq_attrib.pop('content')
+            elif term in Metadata.TERMS and not namespace(term):
+                term = DC(term)
+            self.term = term
+            self.value = value
+            self.fq_attrib = dict(fq_attrib)
+            self.attrib = attrib = {}
+            for fq_attr in fq_attrib:
+                attr = barename(fq_attr)
+                attrib[attr] = fq_attrib[fq_attr]
+        
+        def __getattr__(self, name):
+            name = name.replace('_', '-')
+            try:
+                return self.attrib[name]
+            except KeyError:
+                raise AttributeError(
+                    '%r object has no attribute %r' \
+                        % (self.__class__.__name__, name))
+
+        def __repr__(self):
+            return 'Item(term=%r, value=%r, attrib=%r)' \
+                % (barename(self.term), self.value, self.attrib)
+
+        def __str__(self):
+            return str(self.value)
+
+        def __unicode__(self):
+            return unicode(self.value)
+
+        def to_opf1(self, dcmeta=None, xmeta=None):
+            if namespace(self.term) == DC11_NS:
+                name = DC(barename(self.term).title())
+                elem = element(dcmeta, name, attrib=self.attrib)
+                elem.text = self.value
+            else:
+                elem = element(xmeta, 'meta', attrib=self.attrib)
+                elem.attrib['name'] = self.term
+                elem.attrib['content'] = self.value
+            return elem
+        
+        def to_opf2(self, parent=None):
+            if namespace(self.term) == DC11_NS:
+                elem = element(parent, self.term, attrib=self.fq_attrib)
+                elem.text = self.value
+            else:
+                elem = element(parent, OPF('meta'), attrib=self.fq_attrib)
+                elem.attrib['name'] = self.term
+                elem.attrib['content'] = self.value
+            return elem
+    
+    def __init__(self, oeb):
+        self.oeb = oeb
+        self.items = defaultdict(list)
+
+    def add(self, term, value, attrib):
+        item = self.Item(term, value, attrib)
+        items = self.items[barename(term)]
+        items.append(item)
+        return item
+
+    def iterkeys(self):
+        for key in self.items:
+            yield key
+    __iter__ = iterkeys
+
+    def __getitem__(self, key):
+        return self.items[key]
+
+    def __contains__(self, key):
+        return key in self.items
+
+    def __getattr__(self, term):
+        return self.items[term]
+
+    def to_opf1(self, parent=None):
+        elem = element(parent, 'metadata')
+        dcmeta = element(elem, 'dc-metadata', nsmap=self.OPF1_NSMAP)
+        xmeta = element(elem, 'x-metadata')
+        for term in self.items:
+            for item in self.items[term]:
+                item.to_opf1(dcmeta, xmeta)
+        if 'ms-chaptertour' not in self.items:
+            chaptertour = self.Item('ms-chaptertour', 'chaptertour')
+            chaptertour.to_opf1(dcmeta, xmeta)
+        return elem
+        
+    def to_opf2(self, parent=None):
+        elem = element(parent, OPF('metadata'), nsmap=self.NSMAP)
+        for term in self.items:
+            for item in self.items[term]:
+                item.to_opf2(elem)
+        return elem
+
+
+class Manifest(object):
+    class Item(object):
+        def __init__(self, id, href, media_type, loader=str):
+            self.id = id
+            self.href = self.path = href.replace('%20', ' ')
+            self.media_type = media_type
+            self.spine_position = None
+            self.linear = True
+            self._loader = loader
+            self._data = None
+
+        def __repr__(self):
+            return 'Item(id=%r, href=%r, media_type=%r)' \
+                % (self.id, self.href, self.media_type)
+
+        def data():
+            def fget(self):
+                if self._data:
+                    return self._data
+                data = self._loader(self.href)
+                if self.media_type == XHTML_MIME:
+                    data = etree.fromstring(data, parser=XML_PARSER)
+                    if namespace(data.tag) != XHTML_NS:
+                        data.attrib['xmlns'] = XHTML_NS
+                        data = etree.tostring(data)
+                        data = etree.fromstring(data, parser=XML_PARSER)
+                elif self.media_type.startswith('application/') \
+                     and self.media_type.endswith('+xml'):
+                    data = etree.fromstring(data, parser=XML_PARSER)
+                return data
+            def fset(self, value):
+                self._data = value
+            def fdel(self):
+                self._data = None
+            return property(fget, fset, fdel)
+        data = data()
+
+        def __cmp__(self, other):
+            result = cmp(self.spine_position, other.spine_position)
+            if result != 0:
+                return result
+            return cmp(self.id, other.id)
+    
+    def __init__(self, oeb):
+        self.oeb = oeb
+        self.items = {}
+        self.hrefs = {}
+
+    def add(self, id, href, media_type):
+        item = self.Item(id, href, media_type, self.oeb.container.read)
+        self.items[id] = item
+        self.hrefs[href] = item
+        return item
+
+    def remove(self, id):
+        href = self.items[id].href
+        del self.items[id]
+        del self.hrefs[href]
+
+    def __iter__(self):
+        for id in self.items:
+            yield id
+
+    def __getitem__(self, id):
+        return self.items[id]
+
+    def values(self):
+        for item in self.items.values():
+            yield item
+
+    def items(self):
+        for id, item in self.refs.items():
+            yield id, items
+    
+    def __contains__(self, key):
+        return id in self.items
+
+    def to_opf1(self, parent=None):
+        elem = element(parent, 'manifest')
+        for item in self.items.values():
+            attrib = {'id': item.id, 'href': item.href,
+                      'media-type': item.media_type}
+            element(elem, 'item', attrib=attrib)
+        return elem            
+    
+    def to_opf2(self, parent=None):
+        elem = element(parent, OPF('manifest'))
+        for item in self.items.values():
+            attrib = {'id': item.id, 'href': item.href,
+                      'media-type': item.media_type}
+            element(elem, OPF('item'), attrib=attrib)
+        return elem
+
+
+class Spine(object):
+    def __init__(self, oeb):
+        self.oeb = oeb
+        self.items = []
+
+    def add(self, item, linear):
+        if isinstance(linear, StringTypes):
+            linear = linear.lower()
+        if linear is None or linear in ('yes', 'true'):
+            linear = True
+        elif linear in ('no', 'false'):
+            linear = False
+        item.linear = linear
+        item.spine_position = len(self.items)
+        self.items.append(item)
+        return item
+    
+    def __iter__(self):
+        for item in self.items:
+            yield item
+
+    def __getitem__(self, index):
+        return self.items[index]
+
+    def __len__(self):
+        return len(self.items)
+
+    def __contains__(self, item):
+        return (item in self.items)
+
+    def to_opf1(self, parent=None):
+        elem = element(parent, 'spine')
+        for item in self.items:
+            if item.linear:
+                element(elem, 'itemref', attrib={'idref': item.id})
+        return elem
+
+    def to_opf2(self, parent=None):
+        elem = element(parent, OPF('spine'))
+        for item in self.items:
+            attrib = {'idref': item.id}
+            if not item.linear:
+                attrib['linear'] = 'no'
+            element(elem, OPF('itemref'), attrib=attrib)
+        return elem
+
+
+class Guide(object):
+    class Reference(object):
+        def __init__(self, type, title, href):
+            self.type = type
+            self.title = title
+            self.href = href
+
+        def __repr__(self):
+            return 'Reference(type=%r, title=%r, href=%r)' \
+                % (self.type, self.title, self.href)
+    
+    def __init__(self, oeb):
+        self.oeb = oeb
+        self.refs = {}
+
+    def add(self, type, title, href):
+        ref = self.Reference(type, title, href)
+        self.refs[type] = ref
+        return ref
+
+    def by_type(self, type):
+        return self.ref_types[type]
+
+    def iterkeys(self):
+        for type in self.refs:
+            yield type
+    __iter__ = iterkeys
+
+    def values(self):
+        for ref in self.refs.values():
+            yield ref
+
+    def items(self):
+        for type, ref in self.refs.items():
+            yield type, ref
+    
+    def __getitem__(self, index):
+        return self.refs[index]
+
+    def __contains__(self, key):
+        return key in self.refs
+
+    def to_opf1(self, parent=None):
+        elem = element(parent, 'guide')
+        for ref in self.refs.values():
+            attrib = {'type': ref.type, 'href': ref.href}
+            if ref.title:
+                attrib['title'] = ref.title
+            element(elem, 'reference', attrib=attrib)
+        return elem
+    
+    def to_opf2(self, parent=None):
+        elem = element(parent, OPF('guide'))
+        for ref in self.refs.values():
+            attrib = {'type': ref.type, 'href': ref.href}
+            if ref.title:
+                attrib['title'] = ref.title
+            element(elem, OPF('reference'), attrib=attrib)
+        return elem
+
+
+class Toc(object):
+    def __init__(self, title=None, href=None, klass=None, id=None):
+        self.title = title
+        self.href = href
+        self.klass = klass
+        self.id = id
+        self.nodes = []
+    
+    def add(self, title, href, klass=None, id=None):
+        node = Toc(title, href, klass, id)
+        self.nodes.append(node)
+        return node
+    
+    def __iter__(self):
+        for node in self.nodes:
+            yield node
+    
+    def __getitem__(self, index):
+        return self.nodes[index]
+    
+    def depth(self, level=0):
+        if self.nodes:
+            return self.nodes[0].depth(level+1)
+        return level
+
+    def to_opf1(self, tour):
+        for node in self.nodes:
+            element(tour, 'site',
+                attrib={'title': node.title, 'href': node.href})
+            node.to_opf1(tour)
+        return tour
+    
+    def to_ncx(self, parent, playorder=None, depth=1):
+        if not playorder: playorder = [0]
+        for node in self.nodes:
+            playorder[0] += 1
+            point = etree.SubElement(parent,
+                NCX('navPoint'), attrib={'playOrder': str(playorder[0])})
+            if self.klass:
+                point.attrib['class'] = self.klass
+            if self.id:
+                point.attrib['id'] = self.id
+            label = etree.SubElement(point, NCX('navLabel'))
+            etree.SubElement(label, NCX('text')).text = node.title
+            href = node.href if depth > 1 else node.href.split('#', 1)[0]
+            etree.SubElement(point, NCX('content'), attrib={'src': href})
+            node.to_ncx(point, playorder, depth+1)
+        return parent
+
+
+class Oeb(object):
+    def __init__(self, opfpath, container=None):
+        if not container:
+            container = DirContainer(os.path.dirname(opfpath))
+            opfpath = os.path.basename(opfpath)
+        self.container = container
+        opf = self._read_opf(opfpath)
+        self._all_from_opf(opf)
+    
+    def _convert_opf1(self, opf):
+        nroot = etree.Element(OPF('package'),
+            nsmap={None: OPF2_NS}, version="2.0", **dict(opf.attrib))
+        metadata = etree.SubElement(nroot, OPF('metadata'),
+            nsmap={'opf': OPF2_NS, 'dc': DC11_NS,
+                   'xsi': XSI_NS, 'dcterms': DCTERMS_NS})
+        for prefix in ('d11', 'd10', 'd09'):
+            elements = xpath(opf, 'metadata/dc-metadata/%s:*' % prefix)
+            if elements: break
+        for element in elements:
+            if not element.text: continue
+            tag = barename(element.tag).lower()
+            element.tag = '{%s}%s' % (DC11_NS, tag)
+            for name in element.attrib:
+                if name in ('role', 'file-as', 'scheme'):
+                    nsname = '{%s}%s' % (OPF2_NS, name)
+                    element.attrib[nsname] = element.attrib[name]
+                    del element.attrib[name]
+            metadata.append(element)
+        for element in opf.xpath('metadata/x-metadata/meta'):
+            metadata.append(element)
+        for item in opf.xpath('manifest/item'):
+            media_type = item.attrib['media-type']
+            if media_type in OEB_DOCS:
+                media_type = XHTML_MIME
+            elif media_type in OEB_STYLES:
+                media_type = CSS_MIME
+            item.attrib['media-type'] = media_type
+        for tag in ('manifest', 'spine', 'tours', 'guide'):
+            for element in opf.xpath(tag):
+                nroot.append(element)
+        return etree.fromstring(etree.tostring(nroot), parser=XML_PARSER)
+    
+    def _read_opf(self, opfpath):
+        opf = self.container.read_xml(opfpath)
+        version = float(opf.get('version', 1.0))
+        if version < 2.0:
+            opf = self._convert_opf1(opf)
+        return opf
+    
+    def _metadata_from_opf(self, opf):
+        uid = opf.attrib['unique-identifier']
+        self.metadata = metadata = Metadata(self)        
+        for elem in xpath(opf, '/o2:package/o2:metadata/*'):
+            metadata.add(elem.tag, elem.text, elem.attrib)
+        for item in metadata.identifier:
+            if item.id == uid:
+                self.uid = item
+                break
+    
+    def _manifest_from_opf(self, opf):
+        self.manifest = manifest = Manifest(self)
+        for elem in xpath(opf, '/o2:package/o2:manifest/o2:item'):
+            manifest.add(elem.get('id'), elem.get('href'),
+                         elem.get('media-type'))
+    
+    def _spine_from_opf(self, opf):
+        self.spine = spine = Spine(self)
+        for elem in xpath(opf, '/o2:package/o2:spine/o2:itemref'):
+            item = self.manifest[elem.get('idref')]
+            spine.add(item, elem.get('linear'))
+        extras = []
+        for item in self.manifest.values():
+            if item.media_type == XHTML_MIME \
+               and item not in spine:
+                extras.append(item)
+        extras.sort()
+        for item in extras:
+            spine.add(item, False)
+
+    def _guide_from_opf(self, opf):
+        self.guide = guide = Guide(self)
+        for elem in xpath(opf, '/o2:package/o2:guide/o2:reference'):
+            guide.add(elem.get('type'), elem.get('title'), elem.get('href'))
+
+    def _toc_from_navpoint(self, toc, navpoint):
+        children = xpath(navpoint, 'ncx:navPoint')
+        for child in children:
+            title = xpath(child, 'ncx:navLabel/ncx:text/text()')[0]
+            href = xpath(child, 'ncx:content/@src')[0]
+            id = child.get('id')
+            klass = child.get('class')
+            node = toc.add(title, href, id=id, klass=klass)
+            self._toc_from_navpoint(node, child)
+            
+    def _toc_from_ncx(self, opf):
+        result = xpath(opf, '/o2:package/o2:spine/@toc')
+        if not result:
+            return False
+        id = result[0]
+        ncx = self.manifest[id].data
+        self.manifest.remove(id)
+        title = xpath(ncx, 'ncx:docTitle/ncx:text/text()')[0]
+        self.toc = toc = Toc(title)
+        navmaps = xpath(ncx, 'ncx:navMap')
+        for navmap in navmaps:
+            self._toc_from_navpoint(toc, navmap)
+        return True
+
+    def _toc_from_tour(self, opf):
+        result = xpath(opf, '/o2:package/o2:tours/o2:tour')
+        if not result:
+            return False
+        tour = result[0]
+        self.toc = toc = Toc(tour.get('title'))
+        sites = xpath(tour, 'o2:site')
+        for site in sites:
+            toc.add(site.get('title'), site.get('href'))
+        return True
+
+    def _toc_from_html(self, opf):
+        if 'toc' not in self.guide:
+            return False
+        self.toc = toc = Toc()
+        itempath, frag = urldefrag(self.guide['toc'].href)
+        item = self.manifest.hrefs[itempath]
+        html = item.data
+        if frag:
+            elem = xpath(html, './/*[@id="%s"]' % frag)
+            html = elem[0] if elem else html
+        titles = defaultdict(list)
+        order = []
+        for anchor in xpath(html, './/h:a[@href]'):
+            href = anchor.attrib['href']
+            path, frag = urldefrag(href)
+            if not path:
+                href = '#'.join((itempath, frag))
+            title = ' '.join(xpath(anchor, './/text()'))
+            if href not in titles:
+                order.append(href)
+            titles[href].append(title)
+        for href in order:
+            toc.add(' '.join(titles[href]), href)
+        return True
+    
+    def _toc_from_spine(self, opf):
+        self.toc = toc = Toc()
+        titles = []
+        headers = []
+        for item in self.spine:
+            if not item.linear: continue
+            html = item.data
+            title = xpath(html, '/h:html/h:head/h:title/text()')
+            if title: titles.append(title[0])
+            headers.append('(unlabled)')
+            for tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'strong'):
+                expr = '/h:html/h:body//h:%s[position()=1]/text()' % (tag,)
+                header = xpath(html, expr)
+                if header:
+                    headers[-1] = header[0]
+                    break
+        use = titles
+        if len(titles) > len(set(titles)):
+            use = headers
+        for title, item in izip(use, self.spine):
+            if not item.linear: continue
+            toc.add(title, item.href)
+        return True
+    
+    def _toc_from_opf(self, opf):
+        if self._toc_from_ncx(opf): return
+        if self._toc_from_tour(opf): return
+        if self._toc_from_html(opf): return
+        self._toc_from_spine(opf)
+            
+    def _all_from_opf(self, opf):
+        self._metadata_from_opf(opf)
+        self._manifest_from_opf(opf)
+        self._spine_from_opf(opf)
+        self._guide_from_opf(opf)
+        self._toc_from_opf(opf)
+
+    def to_opf1(self):
+        package = etree.Element('package',
+            attrib={'unique-identifier': self.uid.id})
+        metadata = self.metadata.to_opf1(package)
+        manifest = self.manifest.to_opf1(package)
+        spine = self.spine.to_opf1(package)
+        tours = element(package, 'tours')
+        tour = element(tours, 'tour',
+            attrib={'id': 'chaptertour', 'title': 'Chapter Tour'})
+        self.toc.to_opf1(tour)
+        guide = self.guide.to_opf1(package)
+        return {OPF_MIME: ('content.opf', package)}
+
+    def _generate_ncx_item(self):
+        id = 'ncx'
+        index = 0
+        while id in self.manifest:
+            id = 'ncx' + str(index)
+            index = index + 1
+        href = 'toc'
+        index = 0
+        while (href + '.ncx') in self.manifest.hrefs:
+            href = 'toc' + str(index)
+        href += '.ncx'
+        return (id, href)
+        
+    def _to_ncx(self):
+        ncx = etree.Element(NCX('ncx'), attrib={'version': '2005-1'},
+                            nsmap={None: NCX_NS})
+        head = etree.SubElement(ncx, NCX('head'))
+        etree.SubElement(head, NCX('meta'),
+            attrib={'name': 'dtb:uid', 'content': unicode(self.uid)})
+        etree.SubElement(head, NCX('meta'),
+            attrib={'name': 'dtb:depth', 'content': str(self.toc.depth())})
+        etree.SubElement(head, NCX('meta'),
+            attrib={'name': 'dtb:totalPageCount', 'content': '0'})
+        etree.SubElement(head, NCX('meta'),
+            attrib={'name': 'dtb:maxPageNumber', 'content': '0'})
+        title = etree.SubElement(ncx, NCX('docTitle'))
+        text = etree.SubElement(title, NCX('text'))
+        text.text = unicode(self.metadata.title[0])
+        navmap = etree.SubElement(ncx, NCX('navMap'))
+        self.toc.to_ncx(navmap)
+        return ncx
+    
+    def to_opf2(self):
+        package = etree.Element(OPF('package'),
+            attrib={'version': '2.0', 'unique-identifier': self.uid.id},
+            nsmap={None: OPF2_NS})
+        metadata = self.metadata.to_opf2(package)
+        manifest = self.manifest.to_opf2(package)
+        id, href = self._generate_ncx_item()
+        etree.SubElement(manifest, OPF('item'),
+            attrib={'id': id, 'href': href, 'media-type': NCX_MIME})
+        spine = self.spine.to_opf2(package)
+        spine.attrib['toc'] = id
+        guide = self.guide.to_opf2(package)
+        ncx = self._to_ncx()
+        return {OPF_MIME: ('content.opf', package),
+                NCX_MIME: (href, ncx)}
+
+def main(argv=sys.argv):
+    for arg in argv[1:]:
+        oeb = Oeb(arg)
+        for name, doc in oeb.to_opf2().items():
+            print etree.tostring(doc, pretty_print=True)
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/src/calibre/ebooks/lit/split.py b/src/calibre/ebooks/lit/split.py
new file mode 100644
index 0000000000..2083f95016
--- /dev/null
+++ b/src/calibre/ebooks/lit/split.py
@@ -0,0 +1,149 @@
+#! /usr/bin/python
+
+from __future__ import with_statement
+import sys
+import os
+import re
+import types
+import copy
+import itertools
+from collections import defaultdict
+from lxml import etree
+from stylizer import Page, Stylizer, Style
+
+XHTML_NS = 'http://www.w3.org/1999/xhtml'
+XPNSMAP = {'h': XHTML_NS,}
+
+class Splitter(object):
+    XML_PARSER = etree.XMLParser(remove_blank_text=True)
+    COLLAPSE = re.compile(r'[ \n\r]+')
+    CONTENT_TAGS = set(['img', 'object', 'embed'])
+    for tag in list(CONTENT_TAGS):
+        CONTENT_TAGS.add('{%s}%s' % (XHTML_NS, tag))
+    
+    def __init__(self, path):
+        with open(path, 'rb') as f:
+            self.tree = etree.parse(f, parser=self.XML_PARSER)
+        self.stylizer = Stylizer(self.tree, path)
+        self.path = path
+        self.basename = os.path.splitext(
+            os.path.basename(path))[0].lower()
+        self.splits = []
+        self.names = []
+        self.idmap = {}
+        self.fonts = defaultdict(int)
+        self.content = False
+
+    def split(self):
+        tree = self.tree
+        for prefix in ('', 'h:'):
+            d = {'h': prefix}
+            roots = tree.xpath('/%(h)shtml' % d, namespaces=XPNSMAP)
+            if roots: break
+        self.root, = roots
+        self.head, = tree.xpath('/%(h)shtml/%(h)shead' % d, namespaces=XPNSMAP)
+        body, = tree.xpath('/%(h)shtml/%(h)sbody' % d, namespaces=XPNSMAP)
+        self._split(body, [self.new_root(str(self.basename))], 9.0)
+        results = zip(self.names, self.splits)
+        self.post_process_links(results, d)
+        return results
+
+    def new_root(self, name):
+        nroot = self.dup(self.root)
+        nroot.append(copy.deepcopy(self.head))
+        self.splits.append(nroot)
+        self.names.append(name + '.html')
+        return nroot
+
+    def dup(self, e):
+        new = etree.Element(e.tag, nsmap=e.nsmap, **dict(e.attrib))
+        new.text = e.text
+        new.tail = e.tail
+        return new
+    
+    def dupsub(self, p, e):
+        new = etree.SubElement(p, e.tag, nsmap=e.nsmap, **dict(e.attrib))
+        new.text = e.text
+        new.tail = e.tail
+        return new
+
+    def _split(self, src, dstq, psize):
+        style = self.stylizer.style(src)
+        if self.new_page(style, 'before'):
+            self.new_split(src, dstq)
+        attrib = src.attrib
+        name = self.names[-1]
+        for aname in ('id', 'name'):
+            if aname in attrib:
+                self.idmap[attrib[aname]] = name
+        text = self.COLLAPSE.sub(' ', src.text or '')
+        tail = self.COLLAPSE.sub(' ', src.text or '')
+        if text or tail or src.tag.lower() in self.CONTENT_TAGS:
+            self.content = True
+        size = style['font-size']
+        self.fonts[size] += len(text)
+        self.fonts[psize] += len(tail)
+        new = self.dupsub(dstq[-1], src)
+        if len(src) > 0:
+            dstq.append(new)
+            for child in src:
+                self._split(child, dstq, size)
+            dstq.pop()
+        if self.new_page(style, 'after'):
+            self.new_split(src, dstq)
+
+    def new_page(self, style, when):
+        if self.content \
+                and (style['page-break-%s' % when] \
+                         in ('always', 'odd', 'even')):
+            return True
+        return False
+            
+    def new_split(self, src, dstq):
+        name = self.basename
+        attrib = src.attrib
+        if 'class' in attrib:
+            name = src.attrib['class']            
+            if ' ' in name:
+                name = name.split(' ', 2)[0]
+        if 'id' in attrib:
+            name = '%s-%s' % (name, attrib['id'])
+        name = name.lower().replace('_', '-')
+        if (name + '.html') in self.names:
+            name = '%s-%02d' % (name, len(self.names))
+        prev = None
+        for i in xrange(len(dstq)):
+            new = self.new_root(name) if prev is None \
+                else self.dupsub(prev, dstq[i])
+            prev = dstq[i] = new
+        self.content = False
+
+    def post_process_links(self, results, prefixes):
+        basename = os.path.basename(self.path)
+        query = '//%(h)sa[@href]' % prefixes
+        for name, root in results:
+            elements = root.xpath(query, namespaces=XPNSMAP)
+            for element in elements:
+                href = element.attrib['href']
+                if '#' not in href: continue
+                fname, id = href.split('#', 2)
+                if fname in ('', basename):
+                    href = '#'.join((self.idmap[id], id))
+                    element.attrib['href'] = href
+
+def main():
+    def xml2str(root):
+        return etree.tostring(root, pretty_print=True,
+                              encoding='utf-8', xml_declaration=True)
+    tree = None
+    path = sys.argv[1]
+    dest = sys.argv[2]
+    splitter = Splitter(path)
+    for name, root in splitter.split():
+        print name
+        with open(os.path.join(dest, name), 'wb') as f:
+            f.write(xml2str(root))
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/src/calibre/ebooks/lit/stylizer.py b/src/calibre/ebooks/lit/stylizer.py
new file mode 100644
index 0000000000..97b7e2d91d
--- /dev/null
+++ b/src/calibre/ebooks/lit/stylizer.py
@@ -0,0 +1,435 @@
+#! /usr/bin/python2.5
+# -*- encoding: utf-8 -*-
+
+from __future__ import with_statement
+import sys
+import os
+import locale
+import codecs
+import itertools
+import types
+import re
+import copy
+import cssutils
+from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \
+    CSSValueList, cssproperties
+from lxml import etree
+from calibre.ebooks.lit.oeb import XHTML_NS, CSS_MIME, OEB_STYLES, barename
+from calibre.resources import html_css
+
+HTML_CSS_STYLESHEET = cssutils.parseString(html_css)
+XHTML_CSS_NAMESPACE = "@namespace url(http://www.w3.org/1999/xhtml);\n"
+
+INHERITED = set(['azimuth', 'border-collapse', 'border-spacing',
+                 'caption-side', 'color', 'cursor', 'direction', 'elevation',
+                 'empty-cells', 'font-family', 'font-size', 'font-style',
+                 'font-variant', 'font-weight', 'letter-spacing',
+                 'line-height', 'list-style-image', 'list-style-position',
+                 'list-style-type', 'orphans', 'page-break-inside',
+                 'pitch-range', 'pitch', 'quotes', 'richness', 'speak-header',
+                 'speak-numeral', 'speak-punctuation', 'speak', 'speech-rate',
+                 'stress', 'text-align', 'text-indent', 'text-transform',
+                 'visibility', 'voice-family', 'volume', 'white-space',
+                 'widows', 'word-spacing'])
+
+DEFAULTS = {'azimuth': 'center', 'background-attachment': 'scroll',
+            'background-color': 'transparent', 'background-image': 'none',
+            'background-position': '0% 0%', 'background-repeat': 'repeat',
+            'border-bottom-color': ':color', 'border-bottom-style': 'none',
+            'border-bottom-width': 'medium', 'border-collapse': 'separate',
+            'border-left-color': ':color', 'border-left-style': 'none',
+            'border-left-width': 'medium', 'border-right-color': ':color',
+            'border-right-style': 'none', 'border-right-width': 'medium',
+            'border-spacing': 0, 'border-top-color': ':color',
+            'border-top-style': 'none', 'border-top-width': 'medium', 'bottom':
+            'auto', 'caption-side': 'top', 'clear': 'none', 'clip': 'auto',
+            'color': 'black', 'content': 'normal', 'counter-increment': 'none',
+            'counter-reset': 'none', 'cue-after': 'none', 'cue-before': 'none',
+            'cursor': 'auto', 'direction': 'ltr', 'display': 'inline',
+            'elevation': 'level', 'empty-cells': 'show', 'float': 'none',
+            'font-family': 'serif', 'font-size': 'medium', 'font-style':
+            'normal', 'font-variant': 'normal', 'font-weight': 'normal',
+            'height': 'auto', 'left': 'auto', 'letter-spacing': 'normal',
+            'line-height': 'normal', 'list-style-image': 'none',
+            'list-style-position': 'outside', 'list-style-type': 'disc',
+            'margin-bottom': 0, 'margin-left': 0, 'margin-right': 0,
+            'margin-top': 0, 'max-height': 'none', 'max-width': 'none',
+            'min-height': 0, 'min-width': 0, 'orphans': '2',
+            'outline-color': 'invert', 'outline-style': 'none',
+            'outline-width': 'medium', 'overflow': 'visible', 'padding-bottom':
+            0, 'padding-left': 0, 'padding-right': 0, 'padding-top': 0,
+            'page-break-after': 'auto', 'page-break-before': 'auto',
+            'page-break-inside': 'auto', 'pause-after': 0, 'pause-before':
+            0, 'pitch': 'medium', 'pitch-range': '50', 'play-during': 'auto',
+            'position': 'static', 'quotes': u"'“' '”' '‘' '’'", 'richness':
+            '50', 'right': 'auto', 'speak': 'normal', 'speak-header': 'once',
+            'speak-numeral': 'continuous', 'speak-punctuation': 'none',
+            'speech-rate': 'medium', 'stress': '50', 'table-layout': 'auto',
+            'text-align': 'left', 'text-decoration': 'none', 'text-indent':
+            0, 'text-transform': 'none', 'top': 'auto', 'unicode-bidi':
+            'normal', 'vertical-align': 'baseline', 'visibility': 'visible',
+            'voice-family': 'default', 'volume': 'medium', 'white-space':
+            'normal', 'widows': '2', 'width': 'auto', 'word-spacing': 'normal',
+            'z-index': 'auto'}
+
+FONT_SIZE_NAMES = set(['xx-small', 'x-small', 'small', 'medium', 'large',
+                       'x-large', 'xx-large'])
+
+FONT_SIZE_LIST = [('xx-small', 1,     6.),
+                  ('x-small',  None,  7.),
+                  ('small',    2,     8.),
+                  ('medium',   3,     9.),
+                  ('large',    4,    11.),
+                  ('x-large',  5,    13.),
+                  ('xx-large', 6,    15.),
+                  (None,       7,    17.)]
+
+FONT_SIZE_BY_NAME = {}
+FONT_SIZE_BY_NUM = {}
+for name, num, size in FONT_SIZE_LIST:
+    FONT_SIZE_BY_NAME[name] = size
+    FONT_SIZE_BY_NUM[num] = size
+
+XPNSMAP = {'h': XHTML_NS,}
+def xpath(elem, expr):
+    return elem.xpath(expr, namespaces=XPNSMAP)
+
+
+class Page(object):
+    def __init__(self, width, height, dpi):
+        self.width = float(width)
+        self.height = float(height)
+        self.dpi = float(dpi)
+
+class Profiles(object):
+    PRS500 = Page(584, 754, 168.451)
+    PRS505 = PRS500
+
+    
+class Stylizer(object):    
+    STYLESHEETS = {}
+    
+    def __init__(self, tree, path, oeb, page=Profiles.PRS505):
+        self.page = page
+        base = os.path.dirname(path)
+        basename = os.path.basename(path)
+        cssname = os.path.splitext(basename)[0] + '.css'
+        stylesheets = [HTML_CSS_STYLESHEET]
+        head = xpath(tree, '/h:html/h:head')[0] 
+        for elem in head:
+            tag = barename(elem.tag)
+            if tag == 'style':
+                text = ''.join(elem.text)
+                stylesheet = cssutils.parseString(text, href=cssname)
+                stylesheets.append(stylesheet)
+            elif tag == 'link' \
+                 and elem.get('rel', 'stylesheet') == 'stylesheet' \
+                 and elem.get('type', CSS_MIME) in OEB_STYLES:
+                href = elem.attrib['href']
+                path = os.path.join(base, href)
+                path = os.path.normpath(path).replace('\\', '/')
+                if path in self.STYLESHEETS:
+                    stylesheet = self.STYLESHEETS[path]
+                else:
+                    data = XHTML_CSS_NAMESPACE
+                    data += oeb.manifest.hrefs[path].data
+                    stylesheet = cssutils.parseString(data, href=path)
+                    self.STYLESHEETS[path] = stylesheet
+                stylesheets.append(stylesheet)
+        rules = []
+        index = 0
+        self.stylesheets = set()
+        for stylesheet in stylesheets:
+            href = stylesheet.href
+            self.stylesheets.add(href)
+            for rule in stylesheet.cssRules:
+                rules.extend(self.flatten_rule(rule, href, index))
+                index = index + 1
+        rules.sort()
+        self.rules = rules
+        self._styles = {}
+
+    def flatten_rule(self, rule, href, index):
+        results = []
+        if isinstance(rule, CSSStyleRule):
+            style = self.flatten_style(rule.style)
+            for selector in rule.selectorList:
+                specificity = selector.specificity + (index,)
+                text = selector.selectorText
+                selector = list(selector.seq)
+                results.append((specificity, selector, style, text, href))
+        elif isinstance(rule, CSSPageRule):
+            style = self.flatten_style(rule.style)
+            results.append(((0, 0, 0, 0), [], style, '@page', href))
+        return results
+
+    def flatten_style(self, cssstyle):
+        style = {}
+        for prop in cssstyle:
+            name = prop.name
+            if name in ('margin', 'padding'):
+                style.update(self._normalize_edge(prop.cssValue, name))
+            elif name == 'font':
+                style.update(self._normalize_font(prop.cssValue))
+            else:
+                style[name] = prop.value
+        if 'font-size' in style:
+            size = style['font-size']
+            if size == 'normal': size = 'medium'
+            if size in FONT_SIZE_NAMES:
+                style['font-size'] = "%dpt" % FONT_SIZE_BY_NAME[size]
+        return style
+    
+    def _normalize_edge(self, cssvalue, name):
+        style = {}
+        if isinstance(cssvalue, CSSValueList):
+            primitives = [v.cssText for v in cssvalue]
+        else:
+            primitives = [cssvalue.cssText]
+        if len(primitives) == 1:
+            value, = primitives
+            values = [value, value, value, value]
+        elif len(primitives) == 2:
+            vert, horiz = primitives
+            values = [vert, horiz, vert, horiz]
+        elif len(primitives) == 3:
+            top, horiz, bottom = primitives
+            values = [top, horiz, bottom, horiz]
+        else:
+            values = primitives[:4]
+        edges = ('top', 'right', 'bottom', 'left')
+        for edge, value in itertools.izip(edges, values):
+            style["%s-%s" % (name, edge)] = value
+        return style
+        
+    def _normalize_font(self, cssvalue):
+        composition = ('font-style', 'font-variant', 'font-weight',
+                       'font-size', 'line-height', 'font-family')
+        style = {}
+        if cssvalue.cssText == 'inherit':
+            for key in composition:
+                style[key] = 'inherit'
+        else:
+            primitives = [v.cssText for v in cssvalue]
+            primitites.reverse()
+            value = primitives.pop()
+            for key in composition:
+                if cssproperties.cssvalues[key](value):
+                    style[key] = value
+                    if not primitives: break
+                    value = primitives.pop()
+            for key in composition:
+                if key not in style:
+                    style[key] = DEFAULTS[key]
+        return style
+
+    def style(self, element):
+        try: return self._styles[element]
+        except: pass
+        return Style(element, self)
+
+    def stylesheet(self, name, font_scale=None):
+        rules = []
+        for _, _, style, selector, href in self.rules:
+            if href != name: continue
+            if font_scale and 'font-size' in style and \
+                    style['font-size'].endswith('pt'):
+                style = copy.copy(style)
+                size = float(style['font-size'][:-2])
+                style['font-size'] = "%.2fpt" % (size * font_scale)
+            style = ';\n    '.join(': '.join(item) for item in style.items())
+            rules.append('%s {\n    %s;\n}' % (selector, style))
+        return '\n'.join(rules)
+
+class Style(object):
+    def __init__(self, element, stylizer):
+        self._element = element
+        self._page = stylizer.page
+        self._stylizer = stylizer
+        self._style = self._assemble_style(element, stylizer)
+        stylizer._styles[element] = self
+        
+    def _assemble_style(self, element, stylizer):
+        result = {}
+        rules = stylizer.rules
+        for _, selector, style, _, _ in rules:
+            if self._selects_element(element, selector):
+                result.update(style)
+        try:
+            style = CSSStyleDeclaration(element.attrib['style'])
+            result.update(stylizer.flatten_style(style))
+        except KeyError:
+            pass
+        return result
+        
+    def _selects_element(self, element, selector):
+        def _selects_element(element, items, index):
+            if index == -1:
+                return True
+            item = items[index]
+            if item.type == 'universal':
+                pass
+            elif item.type == 'type-selector':
+                name1 = ("{%s}%s" % item.value).lower()
+                name2 = element.tag.lower()
+                if name1 != name2:
+                    return False
+            elif item.type == 'id':
+                name1 = item.value[1:].lower()
+                name2 = element.attrib.get('id', '').lower().split()
+                if name1 != name2:
+                    return False
+            elif item.type == 'class':
+                name = item.value[1:].lower()
+                classes = element.attrib.get('class', '').lower().split()
+                if name not in classes:
+                    return False
+            elif item.type == 'child':
+                parent = element.getparent()
+                if parent is None:
+                    return False
+                element = parent
+            elif item.type == 'descendant':
+                element = element.getparent()
+                while element is not None:
+                    if _selects_element(element, items, index - 1):
+                        return True
+                    element = element.getparent()
+                return False
+            elif item.type == 'pseudo-class':
+                if item.value == ':first-child':
+                    e = element.getprevious()
+                    if e is not None:
+                        return False
+                else:
+                    return False
+            elif item.type == 'pseudo-element':
+                return False
+            else:
+                return False
+            return _selects_element(element, items, index - 1)
+        return _selects_element(element, selector, len(selector) - 1)
+
+    def _has_parent(self):
+        parent = self._element.getparent()
+        return (parent is not None) \
+            and (parent in self._stylizer._styles)
+    
+    def __getitem__(self, name):
+        domname = cssproperties._toDOMname(name)
+        if hasattr(self, domname):
+            return getattr(self, domname)
+        return self._unit_convert(self._get(name))
+    
+    def _get(self, name):
+        result = None
+        styles = self._stylizer._styles
+        if name in self._style:
+            result = self._style[name]
+        if (result == 'inherit'
+            or (result is None and name in INHERITED
+                and self._has_parent())):
+            result = styles[self._element.getparent()]._get(name)
+        if result is None:
+            result = DEFAULTS[name]
+        return result
+
+    def _unit_convert(self, value, base=None, font=None):
+        if isinstance(value, (int, long, float)):
+            return value
+        try:
+            if float(value) == 0:
+                return 0.0
+        except:
+            pass
+        result = value
+        m = re.search(
+            r"^(-*[0-9]*\.?[0-9]*)\s*(%|em|px|mm|cm|in|pt|pc)$", value)
+        if m is not None and m.group(1):
+            value = float(m.group(1))
+            unit = m.group(2)
+            if unit == '%':
+                base = base or self.width
+                result = (value/100.0) * base
+            elif unit == 'px':
+                result = value * 72.0 / self._page.dpi
+            elif unit == 'in':
+                result = value * 72.0
+            elif unit == 'pt':
+                result = value 
+            elif unit == 'em':
+                font = font or self.fontSize
+                result = value * font
+            elif unit == 'pc':
+                result = value * 12.0
+            elif unit == 'mm':
+                result = value * 0.04
+            elif unit == 'cm':
+                result = value * 0.40
+        return result
+
+    @property
+    def fontSize(self):
+        def normalize_fontsize(value, base=None):
+            result = None
+            factor = None
+            if value == 'inherit':
+                value = 'medium'
+            if value in FONT_SIZE_NAMES:
+                result = FONT_SIZE_BY_NAME[value]
+            elif value == 'smaller':
+                factor = 1.0/1.2
+                for _, _, size in FONT_SIZE_LIST:
+                    if base <= size: break
+                    factor = None
+                    result = size
+            elif value == 'larger':
+                factor = 1.2
+                for _, _, size in reversed(FONT_SIZE_LIST):
+                    if base >= size: break
+                    factor = None
+                    result = size
+            else:
+                result = self._unit_convert(value, base=base, font=base)
+                if result < 0:
+                    result = normalize_fontsize("smaller", base)
+            if factor:
+                result = factor * base
+            return result
+        result = None
+        if self._has_parent():
+            styles = self._stylizer._styles
+            base = styles[self._element.getparent()].fontSize
+        else:
+            base = normalize_fontsize(DEFAULTS['font-size'])
+        if 'font-size' in self._style:
+            size = self._style['font-size']
+            result = normalize_fontsize(size, base)
+        else:
+            result = base
+        self.__dict__['fontSize'] = result
+        return result
+
+    @property
+    def width(self):
+        result = None
+        base = None
+        if self._has_parent():
+            styles = self._stylizer._styles
+            base = styles[self._element.getparent()].width
+        else:
+            base = self._page.width
+        if 'width' in self._style:
+            width = self._style['width']
+            if width == 'auto':
+                result = base
+            else:
+                result = self._unit_convert(width, base=base)
+        else:
+            result = base
+        self.__dict__['width'] = result
+        return result
+    
+    def __str__(self):
+        items = self._style.items()
+        return '; '.join("%s: %s" % (key, val) for key, val in items)
diff --git a/src/calibre/ebooks/lit/writer.py b/src/calibre/ebooks/lit/writer.py
new file mode 100644
index 0000000000..5ed3bdf8ec
--- /dev/null
+++ b/src/calibre/ebooks/lit/writer.py
@@ -0,0 +1,655 @@
+from __future__ import with_statement
+import sys
+import os
+from cStringIO import StringIO
+from struct import pack, unpack
+from itertools import izip, count
+import time
+import random
+import re
+import copy
+import uuid
+import functools
+from lxml import etree
+from calibre.ebooks.lit.reader import msguid, DirectoryEntry
+import calibre.ebooks.lit.maps as maps
+from calibre.ebooks.lit.oeb import CSS_MIME, OPF_MIME
+from calibre.ebooks.lit.oeb import Oeb, namespace, barename
+from calibre.ebooks.lit.stylizer import Stylizer
+from calibre.ebooks.lit.lzxcomp import Compressor
+import calibre
+from calibre import plugins
+msdes, msdeserror = plugins['msdes']
+import calibre.ebooks.lit.mssha1 as mssha1
+
+__all__ = ['LitWriter']
+
+def invert_tag_map(tag_map):
+    tags, dattrs, tattrs = tag_map
+    tags = dict((tags[i], i) for i in xrange(len(tags)))
+    dattrs = dict((v, k) for k, v in dattrs.items())
+    tattrs = [dict((v, k) for k, v in (map or {}).items()) for map in tattrs]
+    for map in tattrs:
+        if map: map.update(dattrs)
+    tattrs[0] = dattrs
+    return tags, tattrs
+
+OPF_MAP = invert_tag_map(maps.OPF_MAP)
+HTML_MAP = invert_tag_map(maps.HTML_MAP)
+
+LIT_MAGIC = 'ITOLITLS'
+
+LITFILE_GUID = "{0A9007C1-4076-11D3-8789-0000F8105754}"
+PIECE3_GUID = "{0A9007C3-4076-11D3-8789-0000F8105754}"
+PIECE4_GUID = "{0A9007C4-4076-11D3-8789-0000F8105754}"
+DESENCRYPT_GUID = "{67F6E4A2-60BF-11D3-8540-00C04F58C3CF}"
+LZXCOMPRESS_GUID = "{0A9007C6-4076-11D3-8789-0000F8105754}"
+
+def packguid(guid):
+    values = guid[1:9], guid[10:14], guid[15:19], \
+        guid[20:22], guid[22:24], guid[25:27], guid[27:29], \
+        guid[29:31], guid[31:33], guid[33:35], guid[35:37]
+    values = [int(value, 16) for value in values]
+    return pack("<LHHBBBBBBBB", *values)
+
+FLAG_OPENING = (1 << 0)
+FLAG_CLOSING = (1 << 1)
+FLAG_BLOCK = (1 << 2)
+FLAG_HEAD = (1 << 3)
+FLAG_ATOM = (1 << 4)
+FLAG_CUSTOM  = (1 << 15)
+ATTR_NUMBER  = 0xffff
+
+PIECE_SIZE = 16
+PRIMARY_SIZE = 40
+SECONDARY_SIZE = 232
+DCHUNK_SIZE = 0x2000
+CCHUNK_SIZE = 0x0200
+ULL_NEG1 = 0xffffffffffffffff
+ROOT_OFFSET = 1284508585713721976
+ROOT_SIZE = 4165955342166943123
+
+BLOCK_CAOL = \
+    "\x43\x41\x4f\x4c\x02\x00\x00\x00" \
+    "\x50\x00\x00\x00\x37\x13\x03\x00" \
+    "\x00\x00\x00\x00\x00\x20\x00\x00" \
+    "\x00\x02\x00\x00\x00\x00\x10\x00" \
+    "\x00\x00\x02\x00\x00\x00\x00\x00" \
+    "\x00\x00\x00\x00\x00\x00\x00\x00"
+BLOCK_ITSF = \
+    "\x49\x54\x53\x46\x04\x00\x00\x00" \
+    "\x20\x00\x00\x00\x01\x00\x00\x00"
+
+MSDES_CONTROL = \
+    "\x03\x00\x00\x00\x29\x17\x00\x00" \
+    "\x01\x00\x00\x00\xa5\xa5\x00\x00"
+LZXC_CONTROL = \
+    "\x07\x00\x00\x00\x4c\x5a\x58\x43" \
+    "\x03\x00\x00\x00\x04\x00\x00\x00" \
+    "\x04\x00\x00\x00\x02\x00\x00\x00" \
+    "\x00\x00\x00\x00\x00\x00\x00\x00"
+
+COLLAPSE = re.compile(r'[ \r\n\v]+')
+
+def prefixname(name, nsrmap):
+    prefix = nsrmap[namespace(name)]
+    if not prefix:
+        return barename(name)
+    return ':'.join((prefix, barename(name)))
+
+def decint(value):
+    bytes = []
+    while True:
+        b = value & 0x7f
+        value >>= 7
+        if bytes:
+            b |= 0x80
+        bytes.append(chr(b))
+        if value == 0:
+            break
+    return ''.join(reversed(bytes))
+
+def randbytes(n):
+    return ''.join(chr(random.randint(0, 255)) for x in xrange(n))
+
+class ReBinary(object):
+    def __init__(self, root, path, oeb, map=HTML_MAP):
+        self.dir = os.path.dirname(path)
+        self.manifest = oeb.manifest
+        self.tags, self.tattrs = map
+        self.buf = StringIO()
+        self.anchors = []
+        self.page_breaks = []
+        self.is_html  = is_html = map is HTML_MAP
+        self.stylizer = Stylizer(root, path, oeb) if is_html else None
+        self.tree_to_binary(root)
+        self.content = self.buf.getvalue()
+        self.ahc = self.build_ahc()
+        self.aht = self.build_aht()
+
+    def write(self, *values):
+        for value in values:
+            if isinstance(value, (int, long)):
+                value = unichr(value)
+            self.buf.write(value.encode('utf-8'))
+        
+    def tree_to_binary(self, elem, nsrmap={'': None}, parents=[],
+                       inhead=False, preserve=False):
+        if not isinstance(elem.tag, basestring):
+            self.write(etree.tostring(elem))
+            return
+        nsrmap = copy.copy(nsrmap)
+        attrib = dict(elem.attrib)
+        style = self.stylizer.style(elem) if self.stylizer else None
+        for key, value in elem.nsmap.items():
+            if value not in nsrmap or nsrmap[value] != key:
+                xmlns = ('xmlns:' + key) if key else 'xmlns'
+                attrib[xmlns] = value
+            nsrmap[value] = key
+        tag = prefixname(elem.tag, nsrmap)
+        tag_offset = self.buf.tell()
+        if tag == 'head':
+            inhead = True
+        flags = FLAG_OPENING
+        if not elem.text and len(elem) == 0:
+            flags |= FLAG_CLOSING
+        if inhead:
+            flags |= FLAG_HEAD
+        if style and style['display'] in ('block', 'table'):
+            flags |= FLAG_BLOCK
+        self.write(0, flags)
+        tattrs = self.tattrs[0]
+        if tag in self.tags:
+            index = self.tags[tag]
+            self.write(index)
+            if self.tattrs[index]:
+                tattrs = self.tattrs[index]
+        else:
+            self.write(FLAG_CUSTOM, len(tag)+1, tag)
+        last_break = self.page_breaks[-1][0] if self.page_breaks else None
+        if style and last_break != tag_offset \
+           and style['page-break-before'] not in ('avoid', 'auto'):
+            self.page_breaks.append((tag_offset, list(parents)))
+        for attr, value in attrib.items():
+            attr = prefixname(attr, nsrmap)
+            if attr in ('href', 'src'):
+                path, hash, frag = value.partition('#')
+                path = os.path.join(self.dir, path)
+                path = os.path.normpath(path)
+                path = path.replace('\\', '/')
+                prefix = unichr(3)
+                if path in self.manifest.hrefs:
+                    prefix = unichr(2)
+                    value = self.manifest.hrefs[path].id
+                    if hash and frag:
+                        value = '#'.join((value, frag))
+                value = prefix + value
+            elif attr in ('id', 'name'):
+                self.anchors.append((value, tag_offset))
+            elif attr.startswith('ms--'):
+                attr = '%' + attr[4:]
+            if attr in tattrs:
+                self.write(tattrs[attr])
+            else:
+                self.write(FLAG_CUSTOM, len(attr)+1, attr)
+            try:
+                self.write(ATTR_NUMBER, int(value)+1)
+            except ValueError:
+                self.write(len(value)+1, value)
+        self.write(0)
+        if elem.text:
+            text = elem.text
+            if style and style['white-space'] == 'pre':
+                preserve = True
+            if elem.get('xml:space') == 'preserve':
+                preserve = True
+            if not preserve:
+                text = COLLAPSE.sub(' ', text)
+            self.write(text)
+        parents.append(tag_offset)
+        for child in elem:
+            self.tree_to_binary(child, nsrmap, parents, inhead, preserve)
+        parents.pop()
+        if not flags & FLAG_CLOSING:
+            self.write(0, (flags & ~FLAG_OPENING) | FLAG_CLOSING, 0)
+        if elem.tail:
+            tail = elem.tail
+            if tag != 'pre':
+                tail = COLLAPSE.sub(' ', tail)
+            self.write(tail)
+        if style and style['page-break-after'] not in ('avoid', 'auto'):
+            self.page_breaks.append((self.buf.tell(), list(parents)))
+
+    def build_ahc(self):
+        data = StringIO()
+        data.write(unichr(len(self.anchors)).encode('utf-8'))
+        for anchor, offset in self.anchors:
+            data.write(unichr(len(anchor)).encode('utf-8'))
+            data.write(anchor)
+            data.write(pack('<I', offset))
+        return data.getvalue()
+
+    def build_aht(self):
+        return pack('<I', 0)
+
+
+def preserve(function):
+    def wrapper(self, *args, **kwargs):
+        opos = self._stream.tell()
+        try:
+            return function(self, *args, **kwargs)
+        finally:
+            self._stream.seek(opos)
+    functools.update_wrapper(wrapper, function)
+    return wrapper
+    
+class LitWriter(object):
+    def __init__(self, oeb):
+        self._oeb = oeb
+
+    def dump(self, stream):
+        self._stream = stream
+        self._sections = [StringIO() for i in xrange(4)]
+        self._directory = []
+        self._meta = None
+        self._dump()
+        
+    def _write(self, *data):
+        for datum in data:
+            self._stream.write(datum)
+
+    @preserve
+    def _writeat(self, pos, *data):
+        self._stream.seek(pos)
+        self._write(*data)
+
+    def _tell(self):
+        return self._stream.tell()
+        
+    def _dump(self):
+        # Build content sections
+        self._build_sections()
+
+        # Build directory chunks
+        dcounts, dchunks, ichunk = self._build_dchunks()
+
+        # Write headers
+        self._write(LIT_MAGIC)
+        self._write(pack('<IIII',
+            1, PRIMARY_SIZE, 5, SECONDARY_SIZE))
+        self._write(packguid(LITFILE_GUID))
+        offset = self._tell()
+        pieces = list(xrange(offset, offset + (PIECE_SIZE * 5), PIECE_SIZE))
+        self._write((5 * PIECE_SIZE) * '\0')
+        aoli1 = len(dchunks) if ichunk else ULL_NEG1
+        last = len(dchunks) - 1
+        ddepth = 2 if ichunk else 1
+        self._write(pack('<IIQQQQIIIIQIIQQQQIIIIQIIIIQ',
+            2, 0x98, aoli1, 0, last, 0, DCHUNK_SIZE, 2, 0, ddepth, 0,
+            len(self._directory), 0, ULL_NEG1, 0, 0, 0, CCHUNK_SIZE, 2,
+            0, 1, 0, len(dcounts), 0, 0x100000, 0x20000, 0))
+        self._write(BLOCK_CAOL)
+        self._write(BLOCK_ITSF)
+        conoff_offset = self._tell()
+        timestamp = int(time.time())
+        self._write(pack('<QII', 0, timestamp, 0x409))
+
+        # Piece #0
+        piece0_offset = self._tell()
+        self._write(pack('<II', 0x1fe, 0))
+        filesz_offset = self._tell()
+        self._write(pack('<QQ', 0, 0))
+        self._writeat(pieces[0], pack('<QQ',
+            piece0_offset, self._tell() - piece0_offset))
+
+        # Piece #1: Directory chunks
+        piece1_offset = self._tell()
+        number = len(dchunks) + ((ichunk and 1) or 0)
+        self._write('IFCM', pack('<IIIQQ',
+            1, DCHUNK_SIZE, 0x100000, ULL_NEG1, number))
+        for dchunk in dchunks:
+            self._write(dchunk)
+        if ichunk:
+            self._write(ichunk)
+        self._writeat(pieces[1], pack('<QQ',
+            piece1_offset, self._tell() - piece1_offset))
+
+        # Piece #2: Count chunks
+        piece2_offset = self._tell()
+        self._write('IFCM', pack('<IIIQQ',
+            1, CCHUNK_SIZE, 0x20000, ULL_NEG1, 1))
+        cchunk = StringIO()
+        last = 0
+        for i, dcount in izip(count(), dcounts):
+            cchunk.write(decint(last))
+            cchunk.write(decint(dcount))
+            cchunk.write(decint(i))
+            last = dcount
+        cchunk = cchunk.getvalue()
+        rem = CCHUNK_SIZE - (len(cchunk) + 50)
+        self._write('AOLL', pack('<IQQQQQ',
+            rem, 0, ULL_NEG1, ULL_NEG1, 0, 1))
+        filler = '\0' * rem
+        self._write(cchunk, filler, pack('<H', len(dcounts)))
+        self._writeat(pieces[2], pack('<QQ',
+            piece2_offset, self._tell() - piece2_offset))
+        
+        # Piece #3: GUID3
+        piece3_offset = self._tell()
+        self._write(packguid(PIECE3_GUID))
+        self._writeat(pieces[3], pack('<QQ',
+            piece3_offset, self._tell() - piece3_offset))
+        
+        # Piece #4: GUID4
+        piece4_offset = self._tell()
+        self._write(packguid(PIECE4_GUID))
+        self._writeat(pieces[4], pack('<QQ',
+            piece4_offset, self._tell() - piece4_offset))
+
+        # The actual section content
+        content_offset = self._tell()
+        self._writeat(conoff_offset, pack('<Q', content_offset))
+        self._write(self._sections[0].getvalue())
+        self._writeat(filesz_offset, pack('<Q', self._tell()))
+
+    def _add_file(self, name, data, secnum=0):
+        if len(data) > 0:
+            section = self._sections[secnum]
+            offset = section.tell()
+            section.write(data)
+        else:
+            offset = 0
+        self._directory.append(
+            DirectoryEntry(name, secnum, offset, len(data)))
+
+    def _add_folder(self, name, offset=0, size=0):
+        if not name.endswith('/'):
+            name += '/'
+        self._directory.append(
+            DirectoryEntry(name, 0, offset, size))
+
+    def _djoin(self, *names):
+        return '/'.join(names)
+        
+    def _build_sections(self):
+        self._add_folder('/', ROOT_OFFSET, ROOT_SIZE)
+        self._build_data()
+        self._build_manifest()
+        self._build_page_breaks()
+        self._build_meta()
+        self._build_drm_storage()
+        self._build_version()
+        self._build_namelist()
+        self._build_storage()
+        self._build_transforms()
+
+    def _build_data(self):
+        self._add_folder('/data')
+        for item in self._oeb.manifest.values():
+            name = '/data/' + item.id
+            data = item.data
+            secnum = 0
+            if not isinstance(data, basestring):
+                self._add_folder(name)
+                rebin = ReBinary(data, item.href, self._oeb)
+                self._add_file(name + '/ahc', rebin.ahc, 0)
+                self._add_file(name + '/aht', rebin.aht, 0)
+                item.page_breaks = rebin.page_breaks
+                data = rebin.content
+                name = name + '/content'
+                secnum = 1
+            self._add_file(name, data, secnum)
+            item.size = len(data)
+
+    def _build_manifest(self):
+        states = ['linear', 'nonlinear', 'css', 'images']
+        manifest = dict((state, []) for state in states)
+        for item in self._oeb.manifest.values():
+            if item.spine_position is not None:
+                key = 'linear' if item.linear else 'nonlinear'
+                manifest[key].append(item)
+            elif item.media_type == CSS_MIME:
+                manifest['css'].append(item)
+            else:
+                manifest['images'].append(item)
+        data = StringIO()
+        data.write(pack('<Bc', 1, '\\'))
+        offset = 0
+        for state in states:
+            items = manifest[state]
+            items.sort()
+            data.write(pack('<I', len(items)))
+            for item in items:
+                id, href, media_type = item.id, item.href, item.media_type
+                item.offset = offset \
+                    if state in ('linear', 'nonlinear') else 0
+                data.write(pack('<I', item.offset))
+                entry = [unichr(len(id)), unicode(id),
+                         unichr(len(href)), unicode(href),
+                         unichr(len(media_type)), unicode(media_type)]
+                for value in entry:
+                    data.write(value.encode('utf-8'))
+                data.write('\0')
+                offset += item.size
+        self._add_file('/manifest', data.getvalue())
+
+    def _build_page_breaks(self):
+        pb1 = StringIO()
+        pb2 = StringIO()
+        pb3 = StringIO()
+        pb3cur = 0
+        bits = 0
+        for item in self._oeb.spine:
+            page_breaks = copy.copy(item.page_breaks)
+            if not item.linear:
+                page_breaks.insert(0, (0, []))
+            for pbreak, parents in page_breaks:
+                pb3cur = (pb3cur << 2) | 1
+                if len(parents) > 1:
+                    pb3cur |= 0x2
+                bits += 2
+                if bits >= 8:
+                    pb3.write(pack('<B', pb3cur))
+                    pb3cur = 0
+                    bits = 0
+                pbreak += item.offset
+                pb1.write(pack('<II', pbreak, pb2.tell()))
+                pb2.write(pack('<I', len(parents)))
+                for parent in parents:
+                    pb2.write(pack('<I', parent))
+        if bits != 0:
+            pb3cur <<= (8 - bits)
+            pb3.write(pack('<B', pb3cur))
+        self._add_file('/pb1', pb1.getvalue(), 0)
+        self._add_file('/pb2', pb2.getvalue(), 0)
+        self._add_file('/pb3', pb3.getvalue(), 0)
+        
+    def _build_meta(self):
+        _, meta = self._oeb.to_opf1()[OPF_MIME]
+        xmetadata, = meta.xpath('/package/metadata/x-metadata')
+        etree.SubElement(xmetadata, 'meta', attrib={
+            'name': 'calibre-oeb2lit-version',
+            'content': calibre.__version__})
+        meta.attrib['ms--minimum_level'] = '0'
+        meta.attrib['ms--attr5'] = '1'
+        meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper()
+        rebin = ReBinary(meta, 'content.opf', self._oeb, OPF_MAP)
+        meta = rebin.content
+        self._meta = meta
+        self._add_file('/meta', meta)
+        
+    def _build_drm_storage(self):
+        drmsource = u'Fuck Microsoft\0'.encode('utf-16-le')
+        self._add_file('/DRMStorage/DRMSource', drmsource)
+        tempkey = self._calculate_deskey([self._meta, drmsource])
+        msdes.deskey(tempkey, msdes.EN0)
+        self._add_file('/DRMStorage/DRMSealed', msdes.des("\0" * 16))
+        self._bookkey = '\0' * 8
+        self._add_file('/DRMStorage/ValidationStream', 'MSReader', 3)
+
+    def _build_version(self):
+        self._add_file('/Version', pack('<HH', 8, 1))
+
+    def _build_namelist(self):
+        data = StringIO()
+        data.write(pack('<HH', 0x3c, len(self._sections)))
+        names = ['Uncompressed', 'MSCompressed', 'EbEncryptDS',
+                 'EbEncryptOnlyDS']
+        for name in names:
+            data.write(pack('<H', len(name)))
+            data.write(name.encode('utf-16-le'))
+            data.write('\0\0')
+        self._add_file('::DataSpace/NameList', data.getvalue())
+
+    def _build_storage(self):
+        mapping = [(1, 'MSCompressed', (LZXCOMPRESS_GUID,)),
+                   (2, 'EbEncryptDS', (LZXCOMPRESS_GUID, DESENCRYPT_GUID)),
+                   (3, 'EbEncryptOnlyDS', (DESENCRYPT_GUID,)),]
+        for secnum, name, transforms in mapping:
+            root = '::DataSpace/Storage/' + name
+            data = self._sections[secnum].getvalue()
+            cdata, sdata, tdata, rdata = '', '', '', ''
+            for guid in transforms:
+                tdata = packguid(guid) + tdata
+                sdata = sdata + pack('<Q', len(data))
+                if guid == DESENCRYPT_GUID:
+                    cdata = MSDES_CONTROL + cdata
+                    if not data: continue
+                    msdes.deskey(self._bookkey, msdes.EN0)
+                    pad = 8 - (len(data) & 0x7)
+                    if pad != 8:
+                        data = data + ('\0' * pad)
+                    data = msdes.des(data)
+                elif guid == LZXCOMPRESS_GUID:
+                    cdata = LZXC_CONTROL + cdata
+                    if not data: continue
+                    unlen = len(data)
+                    with Compressor(17) as lzx:
+                        data, rtable = lzx.compress(data, flush=True)
+                    rdata = StringIO()
+                    rdata.write(pack('<IIIIQQQQ',
+                        3, len(rtable), 8, 0x28, unlen, len(data), 0x8000, 0))
+                    for uncomp, comp in rtable[:-1]:
+                        rdata.write(pack('<Q', comp))
+                    rdata = rdata.getvalue()
+            self._add_file(root + '/Content', data)
+            self._add_file(root + '/ControlData', cdata)
+            self._add_file(root + '/SpanInfo', sdata)
+            self._add_file(root + '/Transform/List', tdata)
+            troot = root + '/Transform'
+            for guid in transforms:
+                dname = self._djoin(troot, guid, 'InstanceData')
+                self._add_folder(dname)
+                if guid == LZXCOMPRESS_GUID:
+                    dname += '/ResetTable'
+                    self._add_file(dname, rdata)
+
+    def _build_transforms(self):
+        for guid in (LZXCOMPRESS_GUID, DESENCRYPT_GUID):
+            self._add_folder('::Transform/'+ guid)
+    
+    def _calculate_deskey(self, hashdata):
+        prepad = 2
+        hash = mssha1.new()
+        for data in hashdata:
+            if prepad > 0:
+                data = ("\000" * prepad) + data
+                prepad = 0
+            postpad = 64 - (len(data) % 64)
+            if postpad < 64:
+                data = data + ("\000" * postpad)
+            hash.update(data)
+        digest = hash.digest()
+        key = [0] * 8
+        for i in xrange(0, len(digest)):
+            key[i % 8] ^= ord(digest[i])
+        return ''.join(chr(x) for x in key)
+    
+    def _build_dchunks(self):
+        ddata = []
+        directory = list(self._directory)
+        directory.sort(cmp=lambda x, y: \
+            cmp(x.name.lower(), y.name.lower()))
+        qrn = 1 + (1 << 2)
+        dchunk = StringIO()
+        dcount = 0
+        quickref = []
+        name = directory[0].name
+        for entry in directory:
+            next = ''.join([decint(len(entry.name)), entry.name,
+                decint(entry.section), decint(entry.offset),
+                decint(entry.size)])
+            usedlen = dchunk.tell() + len(next) + (len(quickref) * 2) + 52
+            if usedlen >= DCHUNK_SIZE:
+                ddata.append((dchunk.getvalue(), quickref, dcount, name))
+                dchunk = StringIO()
+                dcount = 0
+                quickref = []
+                name = entry.name
+            if (dcount % qrn) == 0:
+                quickref.append(dchunk.tell())
+            dchunk.write(next)
+            dcount = dcount + 1
+        ddata.append((dchunk.getvalue(), quickref, dcount, name))
+        cidmax = len(ddata) - 1
+        rdcount = 0
+        dchunks = []
+        dcounts = []
+        ichunk = None
+        if len(ddata) > 1:
+            ichunk = StringIO()
+        for cid, (content, quickref, dcount, name) in izip(count(), ddata):
+            dchunk = StringIO()
+            prev = cid - 1 if cid > 0 else ULL_NEG1
+            next = cid + 1 if cid < cidmax else ULL_NEG1
+            rem = DCHUNK_SIZE - (len(content) + 50)
+            pad = rem - (len(quickref) * 2)
+            dchunk.write('AOLL')
+            dchunk.write(pack('<IQQQQQ', rem, cid, prev, next, rdcount, 1))
+            dchunk.write(content)
+            dchunk.write('\0' * pad)
+            for ref in reversed(quickref):
+                dchunk.write(pack('<H', ref))
+            dchunk.write(pack('<H', dcount))
+            rdcount = rdcount + dcount
+            dchunks.append(dchunk.getvalue())
+            dcounts.append(dcount)
+            if ichunk:
+                ichunk.write(decint(len(name)))
+                ichunk.write(name)
+                ichunk.write(decint(cid))
+        if ichunk:
+            rem = DCHUNK_SIZE - (ichunk.tell() + 16)
+            pad = rem - 2
+            ichunk = ''.join(['AOLI', pack('<IQ', rem, len(dchunks)),
+                ichunk.getvalue(), ('\0' * pad), pack('<H', len(dchunks))])
+        return dcounts, dchunks, ichunk
+
+
+def option_parser():
+    from calibre.utils.config import OptionParser
+    parser = OptionParser(usage=_('%prog [options] OPFFILE'))
+    parser.add_option(
+        '-o', '--output', default=None, 
+        help=_('Output file. Default is derived from input filename.'))
+    return parser
+
+def main(argv=sys.argv):
+    parser = option_parser()
+    opts, args = parser.parse_args(argv[1:])
+    if len(args) != 1:
+        parser.print_help()
+        return 1
+    opfpath = args[0]
+    litpath = opts.output
+    if litpath is None:
+        litpath = os.path.basename(opfpath)
+        litpath = os.path.splitext(litpath)[0] + '.lit'
+    lit = LitWriter(Oeb(opfpath))
+    with open(litpath, 'wb') as f:
+        lit.dump(f)
+    print _('LIT ebook created at'), litpath
+    return 0
+    
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/src/calibre/linux.py b/src/calibre/linux.py
index f69b306b2b..c467df324f 100644
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@@ -55,6 +55,7 @@ entry_points = {
                              'mobi2oeb  = calibre.ebooks.mobi.reader:main',
                              'lrf2html  = calibre.ebooks.lrf.html.convert_to:main',
                              'lit2oeb   = calibre.ebooks.lit.reader:main',
+                             'oeb2lit   = calibre.ebooks.lit.writer:main',
                              'comic2lrf = calibre.ebooks.lrf.comic.convert_from:main',
                              'comic2epub = calibre.ebooks.epub.from_comic:main',
                              'calibre-debug      = calibre.debug:main',

From 6e24dcddffce7f98e0a275b9773ade385ad282d3 Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Tue, 9 Dec 2008 08:02:09 -0500
Subject: [PATCH 02/15] Unify handling of URIs/IRIs, storing in encoded,
 normalized form.

---
 src/calibre/ebooks/lit/oeb.py    | 64 +++++++++++++++++++++++---------
 src/calibre/ebooks/lit/reader.py | 12 +++---
 src/calibre/ebooks/lit/writer.py | 16 ++++----
 3 files changed, 62 insertions(+), 30 deletions(-)

diff --git a/src/calibre/ebooks/lit/oeb.py b/src/calibre/ebooks/lit/oeb.py
index a4ad927fed..d3773a61f1 100644
--- a/src/calibre/ebooks/lit/oeb.py
+++ b/src/calibre/ebooks/lit/oeb.py
@@ -4,7 +4,8 @@ import sys
 from collections import defaultdict
 from types import StringTypes
 from itertools import izip, count
-from urlparse import urldefrag
+from urlparse import urldefrag, urlparse, urlunparse
+from urllib import unquote as urlunquote
 from lxml import etree
 
 XML_PARSER = etree.XMLParser(
@@ -55,6 +56,22 @@ def barename(name):
 def xpath(elem, expr):
     return elem.xpath(expr, namespaces=XPNSMAP)
 
+URL_UNSAFE = r"""`!@#$%^&*[](){}?+=;:'",<>\| """
+def urlquote(href):
+    result = []
+    for char in href:
+        if char in URL_UNSAFE:
+            char = "%%%02x" % ord(char)
+        result.append(char)
+    return ''.join(result)
+
+def urlnormalize(href):
+    parts = urlparse(href)
+    parts = (part.replace('\\', '/') for part in parts)
+    parts = (urlunquote(part) for part in parts)
+    parts = (urlquote(part) for part in parts)
+    return urlunparse(parts)
+
 
 class AbstractContainer(object):
     def read_xml(self, path):
@@ -68,12 +85,12 @@ class DirContainer(AbstractContainer):
 
     def read(self, path):
         path = os.path.join(self.rootdir, path)
-        with open(path, 'rb') as f:
+        with open(urlunquote(path), 'rb') as f:
             return f.read()
 
     def write(self, path, data):
         path = os.path.join(self.rootdir, path)
-        with open(path, 'wb') as f:
+        with open(urlunquote(path), 'wb') as f:
             return f.write(data)
 
 
@@ -178,7 +195,7 @@ class Metadata(object):
         return elem
         
     def to_opf2(self, parent=None):
-        elem = element(parent, OPF('metadata'), nsmap=self.NSMAP)
+        elem = element(parent, OPF('metadata'), nsmap=self.OPF2_NSMAP)
         for term in self.items:
             for item in self.items[term]:
                 item.to_opf2(elem)
@@ -189,7 +206,7 @@ class Manifest(object):
     class Item(object):
         def __init__(self, id, href, media_type, loader=str):
             self.id = id
-            self.href = self.path = href.replace('%20', ' ')
+            self.href = self.path = urlnormalize(href)
             self.media_type = media_type
             self.spine_position = None
             self.linear = True
@@ -235,8 +252,8 @@ class Manifest(object):
 
     def add(self, id, href, media_type):
         item = self.Item(id, href, media_type, self.oeb.container.read)
-        self.items[id] = item
-        self.hrefs[href] = item
+        self.items[item.id] = item
+        self.hrefs[item.href] = item
         return item
 
     def remove(self, id):
@@ -331,7 +348,7 @@ class Guide(object):
         def __init__(self, type, title, href):
             self.type = type
             self.title = title
-            self.href = href
+            self.href = urlnormalize(href)
 
         def __repr__(self):
             return 'Reference(type=%r, title=%r, href=%r)' \
@@ -390,7 +407,7 @@ class Guide(object):
 class Toc(object):
     def __init__(self, title=None, href=None, klass=None, id=None):
         self.title = title
-        self.href = href
+        self.href = urlnormalize(href) if href else href
         self.klass = klass
         self.id = id
         self.nodes = []
@@ -414,8 +431,8 @@ class Toc(object):
 
     def to_opf1(self, tour):
         for node in self.nodes:
-            element(tour, 'site',
-                attrib={'title': node.title, 'href': node.href})
+            element(tour, 'site', attrib={
+                'title': node.title, 'href': node.href})
             node.to_opf1(tour)
         return tour
     
@@ -431,8 +448,9 @@ class Toc(object):
                 point.attrib['id'] = self.id
             label = etree.SubElement(point, NCX('navLabel'))
             etree.SubElement(label, NCX('text')).text = node.title
-            href = node.href if depth > 1 else node.href.split('#', 1)[0]
-            etree.SubElement(point, NCX('content'), attrib={'src': href})
+            href = node.href if depth > 1 else urldefrag(node.href)[0]
+            child = etree.SubElement(point,
+                NCX('content'), attrib={'src': href})
             node.to_ncx(point, playorder, depth+1)
         return parent
 
@@ -490,7 +508,8 @@ class Oeb(object):
         uid = opf.attrib['unique-identifier']
         self.metadata = metadata = Metadata(self)        
         for elem in xpath(opf, '/o2:package/o2:metadata/*'):
-            metadata.add(elem.tag, elem.text, elem.attrib)
+            if elem.text or elem.attrib:
+                metadata.add(elem.tag, elem.text, elem.attrib)
         for item in metadata.identifier:
             if item.id == uid:
                 self.uid = item
@@ -524,7 +543,7 @@ class Oeb(object):
     def _toc_from_navpoint(self, toc, navpoint):
         children = xpath(navpoint, 'ncx:navPoint')
         for child in children:
-            title = xpath(child, 'ncx:navLabel/ncx:text/text()')[0]
+            title = ''.join(xpath(child, 'ncx:navLabel/ncx:text/text()'))
             href = xpath(child, 'ncx:content/@src')[0]
             id = child.get('id')
             klass = child.get('class')
@@ -564,8 +583,13 @@ class Oeb(object):
         item = self.manifest.hrefs[itempath]
         html = item.data
         if frag:
-            elem = xpath(html, './/*[@id="%s"]' % frag)
-            html = elem[0] if elem else html
+            elems = xpath(html, './/*[@id="%s"]' % frag)
+            if not elems:
+                elems = xpath(html, './/*[@name="%s"]' % frag)
+            elem = elems[0] if elems else html
+            while elem != html and not xpath(elem, './/h:a[@href]'):
+                elem = elem.getparent()
+            html = elem
         titles = defaultdict(list)
         order = []
         for anchor in xpath(html, './/h:a[@href]'):
@@ -574,6 +598,7 @@ class Oeb(object):
             if not path:
                 href = '#'.join((itempath, frag))
             title = ' '.join(xpath(anchor, './/text()'))
+            href = urlnormalize(href)
             if href not in titles:
                 order.append(href)
             titles[href].append(title)
@@ -679,10 +704,13 @@ class Oeb(object):
         return {OPF_MIME: ('content.opf', package),
                 NCX_MIME: (href, ncx)}
 
+
 def main(argv=sys.argv):
     for arg in argv[1:]:
         oeb = Oeb(arg)
-        for name, doc in oeb.to_opf2().items():
+        for name, doc in oeb.to_opf1().values():
+            print etree.tostring(doc, pretty_print=True)
+        for name, doc in oeb.to_opf2().values():
             print etree.tostring(doc, pretty_print=True)
     return 0
 
diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py
index 671e48ab76..c04a845d69 100644
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@@ -10,10 +10,12 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
 import sys, struct, cStringIO, os
 import functools
 import re
+from urlparse import urldefrag
 from lxml import etree
 from calibre.ebooks.lit import LitError
 from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
 import calibre.ebooks.lit.mssha1 as mssha1
+from calibre.ebooks.lit.oeb import urlnormalize
 from calibre.ebooks import DRMError
 from calibre import plugins
 lzx, lxzerror = plugins['lzx']
@@ -322,12 +324,12 @@ class UnBinary(object):
                 href += c
                 count -= 1
                 if count == 0:
-                    doc, m, frag = href[1:].partition('#')
+                    doc, frag = urldefrag(href[1:])
                     path = self.item_path(doc)
-                    if m and frag:
-                        path += m + frag
-                    self.buf.write((u'"%s"' % path).encode(
-                        'ascii', 'xmlcharrefreplace'))
+                    if frag:
+                        path = '#'.join((path, frag))
+                    path = urlnormalize(path)
+                    self.buf.write((u'"%s"' % path).encode('utf-8'))
                     state = 'get attr'
         return index
     
diff --git a/src/calibre/ebooks/lit/writer.py b/src/calibre/ebooks/lit/writer.py
index 5ed3bdf8ec..62c3877785 100644
--- a/src/calibre/ebooks/lit/writer.py
+++ b/src/calibre/ebooks/lit/writer.py
@@ -10,11 +10,14 @@ import re
 import copy
 import uuid
 import functools
+from urlparse import urldefrag
+from urllib import unquote as urlunquote
 from lxml import etree
 from calibre.ebooks.lit.reader import msguid, DirectoryEntry
 import calibre.ebooks.lit.maps as maps
 from calibre.ebooks.lit.oeb import CSS_MIME, OPF_MIME
-from calibre.ebooks.lit.oeb import Oeb, namespace, barename
+from calibre.ebooks.lit.oeb import namespace, barename, urlnormalize
+from calibre.ebooks.lit.oeb import Oeb
 from calibre.ebooks.lit.stylizer import Stylizer
 from calibre.ebooks.lit.lzxcomp import Compressor
 import calibre
@@ -173,15 +176,13 @@ class ReBinary(object):
         for attr, value in attrib.items():
             attr = prefixname(attr, nsrmap)
             if attr in ('href', 'src'):
-                path, hash, frag = value.partition('#')
-                path = os.path.join(self.dir, path)
-                path = os.path.normpath(path)
-                path = path.replace('\\', '/')
+                value = urlnormalize(value)
+                path, frag = urldefrag(value)
                 prefix = unichr(3)
                 if path in self.manifest.hrefs:
                     prefix = unichr(2)
                     value = self.manifest.hrefs[path].id
-                    if hash and frag:
+                    if frag:
                         value = '#'.join((value, frag))
                 value = prefix + value
             elif attr in ('id', 'name'):
@@ -420,7 +421,8 @@ class LitWriter(object):
             items.sort()
             data.write(pack('<I', len(items)))
             for item in items:
-                id, href, media_type = item.id, item.href, item.media_type
+                id, media_type = item.id, item.media_type
+                href = urlunquote(item.href)
                 item.offset = offset \
                     if state in ('linear', 'nonlinear') else 0
                 data.write(pack('<I', item.offset))

From 4a0a5711b15eb792154cc58d960f2e8a880727af Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Tue, 9 Dec 2008 08:54:43 -0500
Subject: [PATCH 03/15] Integrated LZX compression code.

---
 setup.py                                |    4 +-
 src/calibre/ebooks/lit/lzxcomp.py       |   34 +-
 src/calibre/utils/lzx/lzc.c             |  389 +++++++
 src/calibre/utils/lzx/lzc.h             |   60 ++
 src/calibre/utils/lzx/lzxc.c            | 1259 +++++++++++++++++++++++
 src/calibre/utils/lzx/lzxc.h            |   57 +
 src/calibre/utils/lzx/lzxd.c            |    2 +-
 src/calibre/utils/lzx/{lzx.h => lzxd.h} |    0
 src/calibre/utils/lzx/lzxmodule.c       |   16 +-
 9 files changed, 1794 insertions(+), 27 deletions(-)
 create mode 100644 src/calibre/utils/lzx/lzc.c
 create mode 100644 src/calibre/utils/lzx/lzc.h
 create mode 100644 src/calibre/utils/lzx/lzxc.c
 create mode 100644 src/calibre/utils/lzx/lzxc.h
 rename src/calibre/utils/lzx/{lzx.h => lzxd.h} (100%)

diff --git a/setup.py b/setup.py
index aa72b46f00..0465795970 100644
--- a/setup.py
+++ b/setup.py
@@ -374,7 +374,9 @@ if __name__ == '__main__':
     ext_modules = [
                    Extension('calibre.plugins.lzx',
                              sources=['src/calibre/utils/lzx/lzxmodule.c',
-                                      'src/calibre/utils/lzx/lzxd.c'],
+                                      'src/calibre/utils/lzx/lzxd.c',
+                                      'src/calibre/utils/lzx/lzc.c',
+                                      'src/calibre/utils/lzx/lzxc.c'],
                              include_dirs=['src/calibre/utils/lzx']),
                    
                    Extension('calibre.plugins.msdes',
diff --git a/src/calibre/ebooks/lit/lzxcomp.py b/src/calibre/ebooks/lit/lzxcomp.py
index 4f147a90a1..1a3f944c89 100644
--- a/src/calibre/ebooks/lit/lzxcomp.py
+++ b/src/calibre/ebooks/lit/lzxcomp.py
@@ -3,11 +3,11 @@ import sys
 import os
 from cStringIO import StringIO
 from ctypes import *
+from calibre import plugins
+_lzx, LzxError = plugins['lzx']
 
 __all__ = ['Compressor']
 
-liblzxcomp = cdll.LoadLibrary('liblzxcomp.so')
-
 class lzx_data(Structure):
     pass
     
@@ -25,32 +25,22 @@ class lzx_results(Structure):
 #              lzx_at_eof_t at_eof,
 #              lzx_put_bytes_t put_bytes, void *put_bytes_arg,
 #              lzx_mark_frame_t mark_frame, void *mark_frame_arg);
-lzx_init = liblzxcomp.lzx_init
-lzx_init.restype = c_int
-lzx_init.argtypes = [POINTER(POINTER(lzx_data)), c_int,
-                     lzx_get_bytes_t, c_voidp,
-                     lzx_at_eof_t,
-                     lzx_put_bytes_t, c_voidp,
-                     lzx_mark_frame_t, c_voidp]
+lzx_init_t = CFUNCTYPE(
+    c_int, POINTER(POINTER(lzx_data)), c_int, lzx_get_bytes_t, c_voidp,
+    lzx_at_eof_t, lzx_put_bytes_t, c_voidp, lzx_mark_frame_t, c_voidp)
+lzx_init = lzx_init_t(_lzx._lzxc_init)
 
 # void  lzx_reset(lzx_data *lzxd);
-lzx_reset = liblzxcomp.lzx_reset
-lzx_reset.restype = None
-lzx_reset.argtypes = [POINTER(lzx_data)]
+lzx_reset_t = CFUNCTYPE(None, POINTER(lzx_data))
+lzx_reset = lzx_reset_t(_lzx._lzxc_reset)
 
 # int lzx_compress_block(lzx_data *lzxd, int block_size, int subdivide);
-lzx_compress_block = liblzxcomp.lzx_compress_block
-lzx_compress_block.restype = c_int
-lzx_compress_block.argtypes = [POINTER(lzx_data), c_int, c_int]
+lzx_compress_block_t = CFUNCTYPE(c_int, POINTER(lzx_data), c_int, c_int)
+lzx_compress_block = lzx_compress_block_t(_lzx._lzxc_compress_block)
 
 # int lzx_finish(struct lzx_data *lzxd, struct lzx_results *lzxr);
-lzx_finish = liblzxcomp.lzx_finish
-lzx_finish.restype = c_int
-lzx_finish.argtypes = [POINTER(lzx_data), POINTER(lzx_results)]
-
-
-class LzxError(Exception):
-    pass
+lzx_finish_t = CFUNCTYPE(c_int, POINTER(lzx_data), POINTER(lzx_results))
+lzx_finish = lzx_finish_t(_lzx._lzxc_finish)
 
 
 class Compressor(object):
diff --git a/src/calibre/utils/lzx/lzc.c b/src/calibre/utils/lzx/lzc.c
new file mode 100644
index 0000000000..4ce6f24227
--- /dev/null
+++ b/src/calibre/utils/lzx/lzc.c
@@ -0,0 +1,389 @@
+/*
+    File lz_nonslide.c, part of lzxcomp library
+    Copyright (C) 2002 Matthew T. Russotto
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU Lesser General Public License as published by
+    the Free Software Foundation; version 2.1 only
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/* 
+ * Document here
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <strings.h>
+#include <string.h>
+#ifdef DEBUG_PERF
+#include <sys/time.h>
+#include <sys/resource.h>
+#endif
+#include <lzc.h>
+
+#define MAX_MATCH 253
+#define MIN_MATCH 2
+
+void lz_init(lz_info *lzi, int wsize, int max_dist,
+	     int max_match, int min_match,
+	     int frame_size,
+	     get_chars_t get_chars,
+	     output_match_t output_match,
+	     output_literal_t output_literal, void *user_data)
+{
+  /* the reason for the separate max_dist value is LZX can't reach the 
+     first three characters in its nominal window.  But using a smaller
+     window results in inefficiency when dealing with reset intervals
+     which are the length of the nominal window */
+
+  lzi->wsize = wsize;
+  if (max_match > wsize)
+    lzi->max_match = wsize;
+  else
+    lzi->max_match = max_match;
+
+  lzi->min_match = min_match;
+  if (lzi->min_match < 3) lzi->min_match = 3;
+
+  lzi->max_dist = max_dist; 
+  lzi->block_buf_size = wsize + lzi->max_dist; 
+  lzi->block_buf = malloc(lzi->block_buf_size);
+  lzi->block_bufe = lzi->block_buf + lzi->block_buf_size;
+  assert(lzi->block_buf != NULL);
+  
+  lzi->cur_loc = 0;
+  lzi->block_loc = 0;
+  lzi->chars_in_buf = 0;
+  lzi->eofcount = 0;
+  lzi->get_chars = get_chars;
+  lzi->output_match = output_match;
+  lzi->output_literal = output_literal;
+  lzi->user_data = user_data;
+  lzi->frame_size = frame_size;
+  lzi->lentab = calloc(lzi->block_buf_size + 1, sizeof(int));
+  lzi->prevtab = calloc(lzi->block_buf_size + 1, sizeof(u_char *));
+  lzi->analysis_valid = 0;
+}
+
+void lz_release(lz_info *lzi)
+{
+  free(lzi->block_buf);
+  free(lzi->lentab);
+  free(lzi->prevtab);
+}
+
+void lz_reset(lz_info *lzi)
+{
+  int residual = lzi->chars_in_buf - lzi->block_loc;
+  memmove(lzi->block_buf, lzi->block_buf + lzi->block_loc, residual);
+  lzi->chars_in_buf = residual;
+  lzi->block_loc = 0;
+  lzi->analysis_valid = 0;
+}
+
+#ifdef LZNONSLIDE_MAIN
+typedef struct lz_user_data
+{
+  FILE *infile;
+  FILE *outfile;
+  int R0, R1, R2;
+} lz_user_data;
+
+int tmp_get_chars(lz_info *lzi, int n, u_char *buf)
+{
+  lz_user_data *lzud = (lz_user_data *)lzi->user_data;
+  return fread(buf, 1, n, lzud->infile);
+}
+
+int tmp_output_match(lz_info *lzi, int match_pos, int match_len)
+{
+  lz_user_data *lzud = (lz_user_data *)lzi->user_data;
+  int mod_match_loc;
+  
+  mod_match_loc = match_pos;
+
+  fprintf(lzud->outfile, "(%d, %d)(%d)\n", match_pos, match_len, mod_match_loc);
+  return 0;
+}
+
+void tmp_output_literal(lz_info *lzi, u_char ch)
+{
+  lz_user_data *lzud = (lz_user_data *)lzi->user_data;
+  fprintf(lzud->outfile, "'%c'", ch);
+}
+
+int main(int argc, char *argv[])
+{
+  int wsize = atoi(argv[1]);
+  lz_info lzi;
+  lz_user_data lzu = {stdin, stdout, 1, 1, 1};
+
+  lz_init(&lzi, wsize, wsize, MAX_MATCH, MIN_MATCH, 8192, tmp_get_chars, tmp_output_match, tmp_output_literal,&lzu);
+  lz_compress(&lzi);
+  return 0;
+}
+#endif
+
+__inline__ int lz_left_to_process(lz_info *lzi)
+{
+  return lzi->chars_in_buf - lzi->block_loc;
+}
+
+static void
+fill_blockbuf(lz_info *lzi, int maxchars)
+{
+  int toread;
+  u_char *readhere;
+  int nread;
+
+  if (lzi->eofcount) return;
+  maxchars -= lz_left_to_process(lzi);
+  toread = lzi->block_buf_size - lzi->chars_in_buf;
+  if (toread > maxchars) toread = maxchars;
+  readhere = lzi->block_buf + lzi->chars_in_buf;
+  nread = lzi->get_chars(lzi, toread, readhere);
+  lzi->chars_in_buf += nread;
+  if (nread != toread)
+    lzi->eofcount++;
+}
+
+static void lz_analyze_block(lz_info *lzi)
+{
+  int *lentab, *lenp;
+  u_char **prevtab, **prevp;
+  u_char *bbp, *bbe;
+  u_char *chartab[256];
+  u_char *cursor;
+  int prevlen;
+  int ch;
+  int maxlen;
+  long wasinc;
+  int max_dist = lzi->max_dist;
+#ifdef DEBUG_ANALYZE_BLOCK
+  static short n = 0;
+#endif
+#ifdef DEBUG_PERF
+  struct rusage innerloop;
+  struct timeval innertime, tmptime;
+  struct rusage outerloop;
+  struct timeval outertime;
+  struct rusage initialloop;
+  struct timeval initialtime;
+  struct rusage totalloop;
+  struct timeval totaltime;
+#endif
+
+#ifdef DEBUG_ANALYZE_BLOCK
+  fprintf(stderr, "Analyzing block %d, cur_loc = %06x\n", n, lzi->cur_loc);
+#endif
+  memset(chartab, 0, sizeof(chartab));
+  prevtab = prevp = lzi->prevtab;
+  lentab = lenp = lzi->lentab;
+  memset(prevtab, 0, sizeof(*prevtab) * lzi->chars_in_buf);
+  memset(lentab, 0, sizeof(*lentab) * lzi->chars_in_buf);
+#ifdef DEBUG_PERF
+  memset(&innertime, 0, sizeof(innertime));
+  memset(&outertime, 0, sizeof(outertime));
+  getrusage(RUSAGE_SELF, &initialloop);
+  totalloop = initialloop;
+#endif
+  bbp = lzi->block_buf;
+  bbe = bbp + lzi->chars_in_buf;
+  while (bbp < bbe) {
+    if (chartab[ch = *bbp]) {
+      *prevp = chartab[ch];
+      *lenp = 1;
+    }
+    chartab[ch] = bbp;
+    bbp++;
+    prevp++;
+    lenp++;
+  }
+#ifdef DEBUG_PERF
+  initialtime = initialloop.ru_utime;
+  getrusage(RUSAGE_SELF, &initialloop);
+  timersub(&initialloop.ru_utime, &initialtime, &initialtime);
+#endif
+  wasinc = 1;
+  for (maxlen = 1; wasinc && (maxlen < lzi->max_match); maxlen++) {
+#ifdef DEBUG_PERF
+    getrusage(RUSAGE_SELF, &outerloop);
+#endif
+    bbp = bbe - maxlen - 1;
+    lenp = lentab + lzi->chars_in_buf - maxlen - 1;
+    prevp = prevtab + lzi->chars_in_buf - maxlen - 1;
+    wasinc = 0;
+    while (bbp > lzi->block_buf) {
+      if (*lenp == maxlen) {
+#ifdef DEBUG_PERF
+	getrusage(RUSAGE_SELF, &innerloop);
+#endif
+	ch = bbp[maxlen];
+	cursor = *prevp;
+	while(cursor && ((bbp - cursor) <= max_dist)) {
+	  prevlen = *(cursor - lzi->block_buf + lentab);
+	  if (cursor[maxlen] == ch) {
+	    *prevp = cursor;
+	    (*lenp)++;
+	    wasinc++;
+	    break;
+	  }
+	  if (prevlen != maxlen) break;
+	  cursor = *(cursor - lzi->block_buf + prevtab);
+	}
+#ifdef DEBUG_PERF
+	tmptime = innerloop.ru_utime;
+	getrusage(RUSAGE_SELF, &innerloop);
+	timersub(&innerloop.ru_utime, &tmptime, &tmptime);
+	timeradd(&tmptime, &innertime, &innertime);
+#endif
+      }
+      bbp--;
+      prevp--;
+      lenp--;
+    }
+#ifdef DEBUG_PERF
+    tmptime = outerloop.ru_utime;
+    getrusage(RUSAGE_SELF, &outerloop);
+    timersub(&outerloop.ru_utime, &tmptime, &tmptime);
+    timeradd(&tmptime, &outertime, &outertime);
+#endif
+    //    fprintf(stderr, "maxlen = %d, wasinc = %ld\n", maxlen, wasinc);
+  }
+#ifdef DEBUG_PERF
+  totaltime = totalloop.ru_utime;
+  getrusage(RUSAGE_SELF, &totalloop);
+  timersub(&totalloop.ru_utime, &totaltime, &totaltime);
+  fprintf(stderr, "Time spend in initial loop = %f\n", initialtime.tv_sec + initialtime.tv_usec/(double)1E6);
+  fprintf(stderr, "Time spend in outer loop = %f\n", outertime.tv_sec + outertime.tv_usec/(double)1E6);
+  fprintf(stderr, "Time spend in inner loop = %f\n", innertime.tv_sec + innertime.tv_usec/(double)1E6);
+  fprintf(stderr, "Time spend in all loops = %f\n", totaltime.tv_sec + totaltime.tv_usec/(double)1E6);
+#endif
+  lzi->analysis_valid = 1;
+#ifdef DEBUG_ANALYZE_BLOCK
+  fprintf(stderr, "Done analyzing block %d, cur_loc = %06x\n", n++, lzi->cur_loc);
+#endif
+}
+
+void lz_stop_compressing(lz_info *lzi) 
+{
+  lzi->stop = 1;
+  /*  fprintf(stderr, "Stopping...\n");*/
+}
+
+int lz_compress(lz_info *lzi, int nchars) 
+{
+
+  u_char *bbp, *bbe;
+  int *lentab, *lenp;
+  u_char **prevtab, **prevp;
+  int len;
+  int holdback;
+  short trimmed;
+
+  lzi->stop = 0;
+  while ((lz_left_to_process(lzi) || !lzi->eofcount) && !lzi->stop && nchars > 0) {
+#if 1
+    if (!lzi->analysis_valid ||
+	(!lzi->eofcount &&
+	 ((lzi->chars_in_buf- lzi->block_loc) < nchars))) {
+      int residual = lzi->chars_in_buf - lzi->block_loc;
+      int bytes_to_move = lzi->max_dist + residual;
+      if (bytes_to_move > lzi->chars_in_buf)
+	bytes_to_move = lzi->chars_in_buf;
+#ifdef DEBUG_ANALYZE_BLOCK
+      fprintf(stderr, "Moving %06x, chars_in_buf %06x, residual = %06x, nchars= %06x block_loc = %06x\n", bytes_to_move, lzi->chars_in_buf, residual, nchars, lzi->block_loc);
+#endif
+      memmove(lzi->block_buf, lzi->block_buf + lzi->chars_in_buf - bytes_to_move,
+	      bytes_to_move);
+      
+      lzi->block_loc = bytes_to_move - residual;
+      lzi->chars_in_buf = bytes_to_move;
+#ifdef DEBUG_ANALYZE_BLOCK
+      fprintf(stderr, "New chars_in_buf %06x,  new block_loc = %06x, eof = %1d\n", lzi->chars_in_buf, lzi->block_loc, lzi->eofcount);
+#endif
+      fill_blockbuf(lzi, nchars);
+#ifdef DEBUG_ANALYZE_BLOCK
+      fprintf(stderr, "Really new chars_in_buf %06x,  new block_loc = %06x, eof = %1d\n", lzi->chars_in_buf, lzi->block_loc, lzi->eofcount);
+#endif
+      lz_analyze_block(lzi);
+    }
+#else
+    if (!lzi->analysis_valid ||
+	(lzi->block_loc - lzi->chars_in_buf) == 0) {
+      lzi->block_loc = 0;
+      lzi->chars_in_buf = 0;
+      fill_blockbuf(lzi, nchars);
+      lz_analyze_block(lzi);
+    }
+#endif
+    prevtab = prevp = lzi->prevtab + lzi->block_loc;
+    lentab = lenp = lzi->lentab + lzi->block_loc;
+    bbp = lzi->block_buf + lzi->block_loc;
+    holdback = lzi->max_match;
+    if (lzi->eofcount) holdback = 0;
+    if (lzi->chars_in_buf < (nchars + lzi->block_loc))
+      bbe = lzi->block_buf + lzi->chars_in_buf - holdback;
+    else
+      bbe = bbp + nchars;
+    while ((bbp < bbe) && (!lzi->stop)) {
+      trimmed = 0;
+      len = *lenp;
+      if (lzi->frame_size && (len > (lzi->frame_size - lzi->cur_loc % lzi->frame_size))) {
+#ifdef DEBUG_TRIMMING
+	fprintf(stderr, "Trim for framing: %06x %d %d\n", lzi->cur_loc,len, (lzi->frame_size - lzi->cur_loc % lzi->frame_size));
+#endif
+	trimmed = 1;
+	len = (lzi->frame_size - lzi->cur_loc % lzi->frame_size);
+      }
+      if (len > nchars) {
+#ifdef DEBUG_TRIMMING
+	fprintf(stderr, "Trim for blocking: %06x %d %d\n", lzi->cur_loc,len, nchars);
+#endif
+	trimmed = 1;
+	len = nchars;
+      }
+      if (len >= lzi->min_match) {
+#ifdef LAZY
+	if ((bbp < bbe -1) && !trimmed &&
+	    ((lenp[1] > (len + 1)) /* || ((lenp[1] == len) && (prevp[1] > prevp[0])) */)) {
+	  len = 1;
+	  /* this is the lazy eval case */
+	}
+	else 
+#endif
+	  if (lzi->output_match(lzi, (*prevp - lzi->block_buf) - lzi->block_loc,
+				len) < 0) {
+	    //	    fprintf(stderr, "Match rejected: %06x %d\n", lzi->cur_loc, len);
+	    len = 1; /* match rejected */
+	  }
+      }
+      else
+	len = 1;
+      
+      if (len < lzi->min_match) {
+	assert(len == 1);
+	lzi->output_literal(lzi, *bbp);
+      }
+      //      fprintf(stderr, "len = %3d, *lenp = %3d, cur_loc = %06x, block_loc = %06x\n", len, *lenp, lzi->cur_loc, lzi->block_loc);
+      bbp += len;
+      prevp += len;
+      lenp += len;
+      lzi->cur_loc += len;
+      lzi->block_loc += len;
+      assert(nchars >= len);
+      nchars -= len;
+
+    }
+  }
+  return 0;
+}
diff --git a/src/calibre/utils/lzx/lzc.h b/src/calibre/utils/lzx/lzc.h
new file mode 100644
index 0000000000..a721fede60
--- /dev/null
+++ b/src/calibre/utils/lzx/lzc.h
@@ -0,0 +1,60 @@
+/*
+    File lz_nonslide.h, part of lzxcomp library
+    Copyright (C) 2002 Matthew T. Russotto
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU Lesser General Public License as published by
+    the Free Software Foundation; version 2.1 only
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+typedef struct lz_info lz_info;
+typedef int (*get_chars_t)(lz_info *lzi, int n, u_char *buf);
+typedef int (*output_match_t)(lz_info *lzi, int match_pos, int match_len);
+typedef void (*output_literal_t)(lz_info *lzi, u_char ch);
+
+struct lz_info
+{
+  int wsize; /* window size in bytes */
+  int max_match; /* size of longest match in bytes */
+  int min_match;
+  u_char *block_buf;
+  u_char *block_bufe;
+  int block_buf_size;
+  int chars_in_buf;
+  int cur_loc;            /* location within stream */
+  int block_loc;
+  int frame_size;
+  int max_dist;
+  u_char **prevtab;
+  int *lentab;
+  short eofcount;
+  short stop;
+  short analysis_valid;
+
+  get_chars_t get_chars;
+  output_match_t output_match;
+  output_literal_t output_literal;
+  void *user_data;
+};
+
+void lz_init(lz_info *lzi, int wsize, int max_dist,
+	     int max_match, int min_match,
+	     int frame_size,
+	     get_chars_t get_chars,
+	     output_match_t output_match,
+	     output_literal_t output_literal, void *user_data);
+
+void lz_release(lz_info *lzi);
+
+void lz_reset(lz_info *lzi);
+void lz_stop_compressing(lz_info *lzi);
+int lz_left_to_process(lz_info *lzi); /* returns # chars read in but unprocessed */
+int lz_compress(lz_info *lzi, int nchars);
diff --git a/src/calibre/utils/lzx/lzxc.c b/src/calibre/utils/lzx/lzxc.c
new file mode 100644
index 0000000000..445cf92767
--- /dev/null
+++ b/src/calibre/utils/lzx/lzxc.c
@@ -0,0 +1,1259 @@
+/*
+    File lzx_layer.c, part of lzxcomp library
+    Copyright (C) 2002 Matthew T. Russotto
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU Lesser General Public License as published by
+    the Free Software Foundation; version 2.1 only
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h> /* for memset on Linux */
+#include <assert.h>
+#include <math.h>
+
+#include <lzc.h>
+#include <lzxc.h>
+
+/* Force using (actually working) non-sliding version. */
+#define NONSLIDE
+
+/* these named constants are from the Microsoft LZX documentation */
+#define MIN_MATCH                            2
+#define MAX_MATCH                          257
+#define NUM_CHARS                          256
+#define NUM_PRIMARY_LENGTHS                  7
+#define NUM_SECONDARY_LENGTHS              249
+
+/* Debugging defines useful during development.  All add diagnostic output
+   at various points in the system */
+
+/*#define DEBUG_MATCHES       *//* When matches come in from the LZ engine */
+/*#define DEBUG_MATCHES_2     *//* When matches are being output           */
+/*#define DEBUG_HUFFMAN       *//* When huffman trees are built            */
+/*#define DEBUG_ENTROPY       *//* In entropy calculation                  */
+/*#define DEBUG_LZ            *//* Uncompressed input reconstructed from
+                                 LZ engine                               */
+/*#define DEBUG_BITBUF        *//* Raw output to upper layer               */
+/*#define DEBUG_EXTRA_BITS    *//* Savings due to extra bits huffman tree  */
+/*#define DEBUG_POSITION_SLOT_LOOKUP */
+/*#define DEBUG_TREE_COMPRESSION   *//* During RLE compression of trees    */
+
+/* number of position slots given window_size-5 */
+/* as corrected by Caie */
+short num_position_slots[] = {30, 32, 34, 36, 38, 42, 50};
+unsigned long position_base[51];
+u_char extra_bits[52];
+double rloge2;
+
+typedef struct ih_elem {
+  int freq;
+  short sym;
+  short pathlength;
+  struct ih_elem *parent;
+  struct ih_elem *left;
+  struct ih_elem *right;
+} ih_elem;
+
+typedef struct h_elem {
+  int freq;
+  short sym;
+  short pathlength;
+  struct ih_elem *parent;
+  unsigned short code;
+} h_elem;
+
+typedef struct huff_entry {
+  short codelength;
+  unsigned short code;
+} huff_entry;
+
+static int cmp_leaves(const void *in_a, const void *in_b)
+{
+  const struct h_elem *a = in_a;
+  const struct h_elem *b = in_b;
+  
+  if (!a->freq && b->freq)
+    return 1;
+  if (a->freq && !b->freq)
+    return -1;
+  
+  if (a->freq == b->freq)
+    return a->sym - b->sym;
+      
+  return a->freq - b->freq;
+}
+
+static int
+cmp_pathlengths(const void *in_a, const void *in_b)
+{
+  const struct h_elem *a = in_a;
+  const struct h_elem *b = in_b;
+  
+  if (a->pathlength == b->pathlength)
+#if 0
+    return a->sym - b->sym;
+#else
+  /* see note on canonical pathlengths */
+    return b->sym - a->sym;
+#endif
+  return b->pathlength - a->pathlength;
+}
+
+/* standard huffman building algorithm */
+static void 
+build_huffman_tree(int nelem, int max_code_length, int *freq, huff_entry *tree)
+{
+  h_elem *leaves = malloc(nelem * sizeof(h_elem));
+  ih_elem *inodes;
+  ih_elem *next_inode;
+  ih_elem *cur_inode;
+  h_elem *cur_leaf;
+  int leaves_left;
+  int nleaves;
+  int pathlength;
+  unsigned short cur_code;
+  short codes_too_long = 0;
+  ih_elem *f1, *f2;
+  int i;
+
+  for (i = 0; i < nelem; i++) {
+    leaves[i].freq = freq[i];
+    leaves[i].sym = i;
+    leaves[i].pathlength = 0;
+  }
+  qsort(leaves, nelem, sizeof(h_elem), cmp_leaves);
+  for (leaves_left = 0; leaves_left < nelem; leaves_left++) {
+#ifdef DEBUG_HUFFMAN
+    fprintf(stderr, "%3d: %3d '%c'\n", leaves_left, leaves[leaves_left].freq,
+	    leaves[leaves_left].sym);
+#endif
+    if (!leaves[leaves_left].freq) break;
+  }
+  nleaves = leaves_left;
+
+  if (nleaves >= 2) {
+    inodes = malloc((nelem-1) * sizeof(ih_elem));
+    do {
+      if (codes_too_long) {
+	for (leaves_left = 0; leaves_left < nelem; leaves_left++) {
+	  if (!leaves[leaves_left].freq) break;
+	  if (leaves[leaves_left].freq != 1) {
+	    leaves[leaves_left].freq >>= 1;
+	    codes_too_long = 0;
+	  }
+	}
+	assert (!codes_too_long);
+      }
+      
+      cur_leaf = leaves;
+      next_inode = cur_inode = inodes;
+      
+      do {
+	f1 = f2 = NULL;
+	if (leaves_left && 
+	    ((cur_inode == next_inode) ||
+	     (cur_leaf->freq <= cur_inode->freq))) {
+	  f1 = (ih_elem *)cur_leaf++;
+	  leaves_left--;
+	}
+	else if (cur_inode != next_inode) {
+	  f1 = cur_inode++;
+	}
+	
+	if (leaves_left && 
+	    ((cur_inode == next_inode) ||
+	     (cur_leaf->freq <= cur_inode->freq))) {
+	  f2 = (ih_elem *)cur_leaf++;
+	  leaves_left--;
+	}
+	else if (cur_inode != next_inode) {
+	  f2 = cur_inode++;
+	}
+	
+#ifdef DEBUG_HUFFMAN
+	fprintf(stderr, "%d %d\n", f1, f2);
+#endif
+	if (f1 && f2) {
+	  next_inode->freq = f1->freq + f2->freq;
+	  next_inode->sym = -1;
+	  next_inode->left = f1;
+	  next_inode->right = f2;
+	  next_inode->parent = NULL;
+	  f1->parent = next_inode;
+	  f2->parent = next_inode;
+	  if (f1->pathlength > f2->pathlength)
+	    next_inode->pathlength = f1->pathlength + 1;
+	  else
+	    next_inode->pathlength = f2->pathlength + 1;
+	  if (next_inode->pathlength > max_code_length) {
+	    codes_too_long = 1;
+	    break;
+	  }
+	  next_inode++;
+	}
+      }
+      while (f1 && f2);
+    }
+    while (codes_too_long);
+    
+#ifdef DEBUG_HUFFMAN
+    cur_inode = inodes;
+    while (cur_inode < next_inode) {
+      fprintf(stderr, "%d l: %3d%c  r: %3d%c  freq: %8d\n", 
+	      cur_inode - inodes,
+	      (cur_inode->left->sym!=-1)?(((struct h_elem *)cur_inode->left)-leaves):(cur_inode->left-inodes),
+	      (cur_inode->left->sym!=-1)?'l':'i',
+	      (cur_inode->right->sym!=-1)?(((struct h_elem *)cur_inode->right)-leaves):(cur_inode->right-inodes),
+	      (cur_inode->right->sym!=-1)?'l':'i',
+	      (cur_inode->freq)
+	      );
+      cur_inode++;
+    }
+#endif
+    
+    /* now traverse tree depth-first */
+    cur_inode = next_inode - 1;
+    pathlength = 0;
+    cur_inode->pathlength = -1;
+    do {
+      /* precondition: at unmarked node*/
+      if (cur_inode->sym == -1) /*&& (cur_inode->left)*/ {
+	/* left node of unmarked node is unmarked */
+	cur_inode = cur_inode->left;
+	cur_inode->pathlength = -1;
+	pathlength++;
+      }
+      else {
+	/* mark node */
+	cur_inode->pathlength = pathlength;
+#if 0 
+	if (cur_inode->right) {
+	  /* right node of previously unmarked node is unmarked */
+	  cur_inode = cur_inode->right;
+	  cur_inode->pathlength = -1;
+	  pathlength++;
+	}
+	else 
+#endif
+	  {
+	    
+	    /* time to come up.  Keep coming up until an unmarked node is reached */
+	    /* or the tree is exhausted */
+	    do {
+	      cur_inode = cur_inode->parent;
+	      pathlength--;
+	    }
+	    while (cur_inode && (cur_inode->pathlength != -1));
+	    if (cur_inode) {
+	      /* found unmarked node; mark it and go right */
+	      cur_inode->pathlength = pathlength;
+	      cur_inode = cur_inode->right;
+	      cur_inode->pathlength = -1;
+	      pathlength++;
+	      /* would be complex if cur_inode could be null here.  It can't */
+	    }
+	  }
+      }
+    }
+    while (cur_inode);
+    
+#ifdef DEBUG_HUFFMAN
+    cur_inode = inodes;
+    while (cur_inode < next_inode) {
+      fprintf(stderr, "%d l: %3d%c  r: %3d%c  freq: %8d  pathlength %4d\n", 
+	      cur_inode - inodes,
+	      (cur_inode->left->sym!=-1)?(((struct h_elem *)cur_inode->left)-leaves):(cur_inode->left-inodes),
+	      (cur_inode->left->sym!=-1)?'l':'i',
+	      (cur_inode->right->sym!=-1)?(((struct h_elem *)cur_inode->right)-leaves):(cur_inode->right-inodes),
+	      (cur_inode->right->sym!=-1)?'l':'i',	
+	      (cur_inode->freq),
+	      (cur_inode->pathlength)
+	      );
+      cur_inode++;
+    }
+#endif
+    free(inodes);
+    
+    /* the pathlengths are already in order, so this sorts by symbol */
+    qsort(leaves, nelem, sizeof(h_elem), cmp_pathlengths);
+    
+    /** 
+	Microsoft's second condition on its canonical huffman codes is:
+
+	For each level, starting at the deepest level of the tree and then
+	moving upwards, leaf nodes must start as far left as possible. An
+	alternative way of stating this constraint is that if any tree node
+	has children then all tree nodes to the left of it with the same path
+	length must also have children.
+	
+	These 'alternatives' are not equivalent.  The latter alternative gives
+	the common canonical code where the longest code is all zeros.  The former
+	gives an opposite code where the longest code is all ones.  Microsoft uses the
+	former alternative.
+    **/
+    
+#if 0
+    pathlength = leaves[0].pathlength;
+    cur_code = 0;
+    for (i = 0; i < nleaves; i++) {
+      while (leaves[i].pathlength < pathlength) {
+	assert(!(cur_code & 1));
+	cur_code >>= 1;
+	pathlength--;
+      }
+      leaves[i].code = cur_code;
+      cur_code++;
+    }
+#else
+    pathlength = leaves[nleaves-1].pathlength;
+    assert(leaves[0].pathlength <= 16); /* this method cannot deal with bigger codes, though
+					   the other canonical method can in some cases
+					   (because it starts with zeros ) */
+    cur_code = 0;
+    for (i = nleaves - 1; i >= 0; i--) {
+      while (leaves[i].pathlength > pathlength) {
+	cur_code <<= 1;
+	pathlength++;
+      }
+      leaves[i].code = cur_code;
+      cur_code++;
+    }
+#endif
+    
+#ifdef DEBUG_HUFFMAN
+    for (i = 0; i < nleaves; i++) {
+      char code[18];
+      int j;
+      
+      cur_code = leaves[i].code;
+      code[leaves[i].pathlength] = 0;
+      for (j = leaves[i].pathlength-1; j >= 0; j--) {
+	if (cur_code & 1) code[j] = '1';
+	else code[j] = '0';
+	cur_code >>= 1;
+      }
+      fprintf(stderr, "%3d: %3d %3d %-16.16s '%c'\n", i, leaves[i].freq, leaves[i].pathlength, code,
+	      leaves[i].sym);
+    }
+#endif
+  }
+  else if (nleaves == 1) {
+    /* 0 symbols is OK (not according to doc, but according to Caie) */
+    /* but if only one symbol is present, two symbols are required */
+    nleaves = 2;
+    leaves[0].pathlength = leaves[1].pathlength = 1;
+    if (leaves[1].sym > leaves[0].sym) {
+      leaves[1].code = 1;
+      leaves[0].code = 0;
+    }
+    else {
+      leaves[0].code = 1;
+      leaves[1].code = 0;
+    }
+  }
+   
+  memset(tree, 0, nelem * sizeof(huff_entry));
+  for (i = 0; i < nleaves; i++) {
+    tree[leaves[i].sym].codelength = leaves[i].pathlength;
+    tree[leaves[i].sym].code = leaves[i].code;
+  }
+  
+  free(leaves);
+}
+  
+/* from Stuart Caie's code -- I'm hoping this code is too small to encumber
+   this file.  If not, you could rip it out and hard-code the tables */
+   
+static void lzx_init_static(void)
+{
+  int i, j;
+
+  if (extra_bits[49]) return;
+
+  rloge2 = 1.0/log(2);
+  for (i=0, j=0; i <= 50; i += 2) {
+    extra_bits[i] = extra_bits[i+1] = j; /* 0,0,0,0,1,1,2,2,3,3... */
+    if ((i != 0) && (j < 17)) j++; /* 0,0,1,2,3,4...15,16,17,17,17,17... */
+  }
+
+  for (i=0, j=0; i <= 50; i++) {
+    position_base[i] = j; /* 0,1,2,3,4,6,8,12,16,24,32,... */
+    j += 1 << extra_bits[i]; /* 1,1,1,1,2,2,4,4,8,8,16,16,32,32,... */
+  }
+}
+
+struct lzx_data
+{
+  void *in_arg;
+  void *out_arg;
+  void *mark_frame_arg;
+  lzx_get_bytes_t get_bytes;
+  lzx_at_eof_t at_eof;
+  lzx_put_bytes_t put_bytes;
+  lzx_mark_frame_t mark_frame;
+  struct lz_info *lzi;
+  /* a 'frame' is an 0x8000 byte thing.  Called that because otherwise
+     I'd confuse myself overloading 'block' */
+  int left_in_frame;
+  int left_in_block;
+  int R0, R1, R2;
+  int num_position_slots;
+  /* this is the LZX block size */
+  int block_size;
+  int *main_freq_table;
+  int length_freq_table[NUM_SECONDARY_LENGTHS];
+  int aligned_freq_table[LZX_ALIGNED_SIZE];
+  uint32_t *block_codes;
+  uint32_t *block_codesp;
+  huff_entry *main_tree;
+  huff_entry length_tree[NUM_SECONDARY_LENGTHS];
+  huff_entry aligned_tree[LZX_ALIGNED_SIZE];
+  int main_tree_size;
+  uint16_t bit_buf;
+  int bits_in_buf;
+  double main_entropy;
+  double last_ratio;
+  uint8_t *prev_main_treelengths;
+  uint8_t prev_length_treelengths[NUM_SECONDARY_LENGTHS];
+  uint32_t len_uncompressed_input;
+  uint32_t len_compressed_output;
+  short need_1bit_header;
+  short subdivide; /* 0 = don't subdivide, 1 = allowed, -1 = requested */
+};
+
+static int
+lzx_get_chars(lz_info *lzi, int n, u_char *buf)
+{
+  /* force lz compression to stop after every block */
+  int chars_read;
+  int chars_pad;
+
+  lzx_data *lzud = (lzx_data *)lzi->user_data;
+#ifdef OLDFRAMING
+  if (lzud->subdivide < 0) return 0;
+  if (n > lzud->left_in_frame)
+    n = lzud->left_in_frame;
+  if (n > lzud->left_in_block)
+    n = lzud->left_in_block;
+#endif
+  chars_read = lzud->get_bytes(lzud->in_arg, n, buf);
+#ifdef OLDFRAMING
+  lzud->left_in_frame -= chars_read;
+  lzud->left_in_block -= chars_read;
+#else
+  lzud->left_in_frame -= chars_read % LZX_FRAME_SIZE;
+  if (lzud->left_in_frame < 0) 
+    lzud->left_in_frame += LZX_FRAME_SIZE;
+#endif
+  if ((chars_read < n) && (lzud->left_in_frame)) {
+    chars_pad = n - chars_read;
+    if (chars_pad > lzud->left_in_frame) chars_pad = lzud->left_in_frame;
+    /* never emit a full frame of padding.  This prevents silliness when
+       lzx_compress is called when at EOF but EOF not yet detected */
+    if (chars_pad == LZX_FRAME_SIZE) chars_pad = 0;
+#ifdef OLDFRAMING
+    if (chars_pad > lzud->left_in_block) chars_pad = lzud->left_in_block;
+#endif
+    memset(buf + chars_read, 0, chars_pad);
+    lzud->left_in_frame -= chars_pad;
+#ifdef OLDFRAMING
+    lzud->left_in_block -= chars_pad;
+#endif
+    chars_read += chars_pad;
+  }
+  return chars_read;
+}
+
+#ifdef NONSLIDE
+static int find_match_at(lz_info *lzi, int loc, int match_len, int *match_locp)
+{
+  u_char *matchb;
+  u_char *nmatchb;
+  u_char *c1, *c2;
+  int j;
+
+  if (-*match_locp == loc) return -1;
+  if (loc < match_len) return -1;
+
+  matchb = lzi->block_buf + lzi->block_loc + *match_locp;
+  nmatchb = lzi->block_buf + lzi->block_loc - loc;
+  c1 = matchb;
+  c2 = nmatchb;
+  for (j = 0; j < match_len; j++) {
+    if (*c1++ != *c2++) break;
+  }
+  if (j == match_len) {
+#ifdef DEBUG_MATCHES
+    fprintf(stderr, "match found %d, old = %d new = %d len = %d\n", lzi->cur_loc, -*match_locp, loc, match_len);
+#endif
+    *match_locp = -loc;
+    return 0;
+  }
+  return -1;
+}
+#else
+static int find_match_at(lz_info *lzi, int loc, int match_len, int *match_locp)
+{
+  u_char *matchb;
+  u_char *nmatchb;
+  u_char *c1, *c2;
+  int j;
+
+  if (-*match_locp == loc) return -1;
+  if (loc < match_len) return -1;
+
+  matchb = lzi->slide_bufp + *match_locp;
+  if (matchb < lzi->slide_buf) matchb += lzi->slide_buf_size;
+  nmatchb = lzi->slide_bufp - loc;
+  if (nmatchb < lzi->slide_buf) nmatchb += lzi->slide_buf_size;
+  c1 = matchb;
+  c2 = nmatchb;
+  for (j = 0; j < match_len; j++) {
+    if (*c1++ != *c2++) break;
+    if (c1 == lzi->slide_bufe) c1 = lzi->slide_buf;
+    if (c2 == lzi->slide_bufe) c2 = lzi->slide_buf;
+  }
+  if (j == match_len) {
+#ifdef DEBUG_MATCHES
+    fprintf(stderr, "match found %d, old = %d new = %d len = %d\n", lzi->cur_loc, -*match_locp, loc, match_len);
+#endif
+    *match_locp = -loc;
+    return 0;
+  }
+  return -1;
+}
+#endif
+static void check_entropy(lzx_data *lzud, int main_index) 
+{
+  /* entropy = - sum_alphabet P(x) * log2 P(x) */
+  /* entropy = - sum_alphabet f(x)/N * log2 (f(x)/N) */
+  /* entropy = - 1/N sum_alphabet f(x) * (log2 f(x) - log2 N) */
+  /* entropy = - 1/N (sum_alphabet f(x) * log2 f(x)) - sum_alphabet f(x) log2 N */
+  /* entropy = - 1/N (sum_alphabet f(x) * log2 f(x)) - log2 N sum_alphabet f(x)  */
+  /* entropy = - 1/N (sum_alphabet f(x) * log2 f(x)) - N * log2 N   */
+  
+  /* entropy = - 1/N ((sum_alphabet f(x) * log2 f(x) ) - N * log2 N) */
+  /* entropy = - 1/N ((sum_alphabet f(x) * ln f(x) * 1/ln 2) - N * ln N * 1/ln 2) */
+  /* entropy = 1/(N ln 2) (N * ln N - (sum_alphabet f(x) * ln f(x))) */
+  /* entropy = 1/(N ln 2) (N * ln N + (sum_alphabet -f(x) * ln f(x))) */
+  
+  /* entropy = 1/(N ln 2) ( sum_alphabet ln N * f(x) + (sum_alphabet -f(x) * ln f(x))) */
+  /* entropy = 1/(N ln 2) ( sum_alphabet ln N * f(x) +  (-f(x) * ln f(x))) */
+  /* entropy = -1/(N ln 2) ( sum_alphabet -ln N * f(x) +  (f(x) * ln f(x))) */
+  /* entropy = -1/(N ln 2) ( sum_alphabet f(x)(- ln N  + ln f(x))) */
+  /* entropy = -1/(N ln 2) ( sum_alphabet f(x)(ln f(x)/N)) */
+  /* entropy = -1/N  ( sum_alphabet (1/(ln 2))f(x)(ln f(x)/N)) */
+  /* entropy = -1/N  ( sum_alphabet f(x)(log2 f(x)/N)) */
+  /* entropy = -  ( sum_alphabet f(x)/N(log2 f(x)/N)) */
+  /* entropy = -  ( sum_alphabet P(x)(log2 P(x))) */
+  
+
+    double freq;
+    double n_ln_n;
+    double rn_ln2;
+    double cur_ratio;
+    int n;
+    
+    /* delete old entropy accumulation */
+    if (lzud->main_freq_table[main_index] != 1) {
+      freq = (double)lzud->main_freq_table[main_index]-1;
+      lzud->main_entropy += freq * log(freq);
+    }
+    /* add new entropy accumulation */
+    freq = (double)lzud->main_freq_table[main_index];
+    lzud->main_entropy -= freq * log(freq);
+    n = lzud->block_codesp - lzud->block_codes;
+
+    if (((n & 0xFFF) == 0) && (lzud->left_in_block >= 0x1000)) {
+      n_ln_n = (double)n * log((double)n);
+      rn_ln2 = rloge2 / (double)n;
+      cur_ratio = (n * rn_ln2 *(n_ln_n + lzud->main_entropy) + 24 + 3 * 80 + NUM_CHARS + (lzud->main_tree_size-NUM_CHARS)*3 + NUM_SECONDARY_LENGTHS ) / (double)n;
+#ifdef DEBUG_ENTROPY
+      fprintf(stderr, "n = %d\n", n);
+      fprintf(stderr, "main entropy = %f\n", rn_ln2 *(n_ln_n + lzud->main_entropy) );
+      fprintf(stderr, "compression ratio (raw) = %f\n", 100.0 * rn_ln2 *(n_ln_n + lzud->main_entropy) /9.0 );
+      fprintf(stderr, "compression ratio (ovh) = %f\n", 100.0 * cur_ratio/9.0);
+#endif
+      if (cur_ratio > lzud->last_ratio) {
+#ifdef DEBUG_ENTROPY
+	fprintf(stderr, "resetting huffman tables at %d\n", n);
+#endif
+	lzud->subdivide = -1;
+	lz_stop_compressing(lzud->lzi);
+      }
+      lzud->last_ratio = cur_ratio;
+    }
+}
+
+static int
+lzx_output_match(lz_info *lzi, int match_pos, int match_len)
+{
+  lzx_data *lzud = (lzx_data *)lzi->user_data;
+  uint32_t formatted_offset;
+  uint32_t position_footer;
+  uint8_t length_footer;
+  uint8_t length_header;
+  uint16_t len_pos_header;
+  int position_slot;
+  short btdt;
+
+#ifdef DEBUG_LZ
+  {
+    int i;
+    int pos;
+    for (i = 0; i < match_len; i++) {
+      
+#ifdef NONSLIDE
+      pos = match_pos + lzi->block_loc + i;
+      fprintf(stderr, "%c", lzi->block_buf[pos]);
+#else
+      pos = match_pos + lzi->front_offset + i;
+      if (pos > lzi->slide_buf_size)
+	pos -= lzi->slide_buf_size;
+      fprintf(stderr, "%c", lzi->slide_buf[pos]);
+#endif
+    }
+  }
+#endif
+  position_footer = 0;
+  btdt = 0;
+ testforr:
+  if (match_pos == -lzud->R0) {
+    match_pos = 0;
+    formatted_offset = 0;
+    position_slot = 0;
+  }
+  else if (match_pos == -lzud->R1) {
+    lzud->R1 = lzud->R0;
+    lzud->R0 = -match_pos;
+    match_pos = 1;
+    formatted_offset = 1;
+    position_slot = 1;
+  }
+  else if (match_pos == -lzud->R2) {
+    lzud->R2 = lzud->R0;
+    lzud->R0 = -match_pos;
+    match_pos = 2;
+    formatted_offset = 2;
+    position_slot = 2;
+  }
+  else {
+    if (!btdt) {
+      btdt = 1;
+      if (find_match_at(lzi, lzud->R0, match_len, &match_pos) == 0)
+	goto testforr;
+      if (find_match_at(lzi, lzud->R1, match_len, &match_pos) == 0)
+	goto testforr;
+      if (find_match_at(lzi, lzud->R2, match_len, &match_pos) == 0)
+	goto testforr;
+    }
+
+    formatted_offset = -match_pos + 2;
+
+    if ((match_len < 3) ||
+	((formatted_offset >= 64) && (match_len < 4)) ||
+	((formatted_offset >= 2048) && (match_len < 5)) ||
+	((formatted_offset >= 65536) && (match_len < 6))) {
+      /* reject matches where extra_bits will likely be bigger than just outputting
+	 literals.  The numbers are basically derived through guessing
+         and trial and error */
+      return -1; /* reject the match */
+    }
+
+    lzud->R2 = lzud->R1;
+    lzud->R1 = lzud->R0;
+    lzud->R0 = -match_pos;
+
+  /* calculate position base using binary search of table; if log2 can be
+     done in hardware, approximation might work; 
+     trunc(log2(formatted_offset*formatted_offset)) gets either the proper
+     position slot or the next one, except for slots 0, 1, and 39-49
+
+     Slots 0-1 are handled by the R0-R1 procedures
+
+     Slots 36-49 (formatted_offset >= 262144) can be found by 
+     (formatted_offset/131072) + 34 ==
+     (formatted_offset >> 17) + 34;
+  */
+    if (formatted_offset >= 262144) {
+      position_slot = (formatted_offset >> 17) + 34;
+    }
+    else {
+      int left, right, mid;
+
+      left = 3;
+      right = lzud->num_position_slots - 1;
+      position_slot = -1;
+      while (left <= right) {
+	mid = (left + right)/2;
+	if ((position_base[mid] <= formatted_offset) &&
+	    position_base[mid+1] > formatted_offset) {
+	  position_slot = mid;
+	  break;
+	}
+#if 0
+	fprintf(stderr, "BEFORE: %06x %06x %06x %06x\n",
+		position_base[left], position_base[mid],
+		formatted_offset, position_base[right]);
+#endif
+	if (formatted_offset > position_base[mid])
+	  /* too low */
+	  left = mid + 1;
+	else /* too high */
+	  right = mid;
+#if 0
+	fprintf(stderr, "AFTER : %06x %06x %06x %06x\n",
+		position_base[left], position_base[mid],
+		formatted_offset, position_base[right]);
+#endif
+      }
+#ifdef DEBUG_POSITION_SLOT_LOOKUP
+      if (position_slot < 0) {
+	fprintf(stderr, "lmr npr: %d %d %d %d\n", left, mid, right, lzud->num_position_slots);
+	fprintf(stderr, "AFTER : %07d %07d %07d %07d\n",
+		position_base[left], position_base[mid],
+		formatted_offset, position_base[right]);
+	fprintf(stderr, "(%d, %d, %d, %d, %d)\n", match_pos, match_len, formatted_offset, position_slot, position_footer);
+      }
+#endif
+      assert(position_slot >= 0);
+      /* FIXME precalc extra_mask table */
+    }
+    position_footer = ((1UL << extra_bits[position_slot]) - 1) & formatted_offset;
+  }
+#ifdef DEBUG_MATCHES
+#ifdef NONSLIDE
+  fprintf(stderr, "(%08x, %d, %d, %d, %d, %d)\n", lzud->lzi->cur_loc , match_pos, match_len, formatted_offset, position_slot, position_footer);
+#else
+  fprintf(stderr, "(%08x, %d, %d, %d, %d, %d)\n", lzud->lzi->cur_loc - lzud->lzi->chars_in_match , match_pos, match_len, formatted_offset, position_slot, position_footer);
+#endif
+#endif
+  /* match length = 8 bits */
+  /* position_slot = 6 bits */
+  /* position_footer = 17 bits */
+  /* total = 31 bits */
+  /* plus one to say whether it's a literal or not */
+  *lzud->block_codesp++ = 0x80000000 | /* bit 31 in intelligent bit ordering */
+    (position_slot << 25) | /* bits 30-25 */
+    (position_footer << 8) | /* bits 8-24 */
+    (match_len - MIN_MATCH); /* bits 0-7 */
+
+  if (match_len < (NUM_PRIMARY_LENGTHS + MIN_MATCH)) {
+    length_header = match_len - MIN_MATCH;
+    /*    length_footer = 255; */ /* not necessary */
+  }
+  else {
+    length_header = NUM_PRIMARY_LENGTHS;
+    length_footer = match_len - (NUM_PRIMARY_LENGTHS + MIN_MATCH);
+    lzud->length_freq_table[length_footer]++;
+  }
+  len_pos_header = (position_slot << 3) | length_header;
+  lzud->main_freq_table[len_pos_header + NUM_CHARS]++;
+  if (extra_bits[position_slot] >= 3) {
+    lzud->aligned_freq_table[position_footer & 7]++;
+  }
+#ifndef OLDFRAMING
+  lzud->left_in_block -= match_len;
+#endif
+  if (lzud->subdivide)
+    check_entropy(lzud, len_pos_header + NUM_CHARS);
+  return 0; /* accept the match */
+}
+
+static void 
+lzx_output_literal(lz_info *lzi, u_char ch)
+{
+  lzx_data *lzud = (lzx_data *)lzi->user_data;
+
+#ifndef OLDFRAMING
+  lzud->left_in_block--;
+#endif
+  *lzud->block_codesp++ = ch;
+#ifdef DEBUG_LZ
+  fprintf(stderr, "%c", ch);
+#endif
+  lzud->main_freq_table[ch]++;
+  if (lzud->subdivide)
+    check_entropy(lzud, ch);
+}
+
+static void lzx_write_bits(lzx_data *lzxd, int nbits, uint32_t bits)
+{
+  int cur_bits;
+  int shift_bits;
+  int rshift_bits;
+  uint16_t mask_bits;
+
+#ifdef DEBUG_BITBUF
+  fprintf(stderr, "WB: %2d %08x\n", nbits, bits);
+#endif
+  cur_bits = lzxd->bits_in_buf;
+  while ((cur_bits + nbits) >= 16) {
+    shift_bits = 16 - cur_bits;
+    rshift_bits = nbits - shift_bits;
+    if (shift_bits == 16) {
+      lzxd->bit_buf = (bits>>rshift_bits) & 0xFFFF;
+    }
+    else {
+      mask_bits = (1U << shift_bits) - 1;
+      lzxd->bit_buf <<= shift_bits;
+      lzxd->bit_buf |= (bits>>rshift_bits) & mask_bits;
+    }
+#ifdef DEBUG_BITBUF
+    fprintf(stderr, "WBB: %04x\n", lzxd->bit_buf);
+#endif
+#ifdef LZX_BIG_ENDIAN
+    lzxd->bit_buf = ((lzxd->bit_buf & 0xFF)<<8) | (lzxd->bit_buf >> 8);
+#endif
+    lzxd->put_bytes(lzxd->out_arg, sizeof(lzxd->bit_buf), &lzxd->bit_buf);
+    lzxd->len_compressed_output += sizeof(lzxd->bit_buf);
+    lzxd->bit_buf = 0;
+    nbits -= shift_bits;
+    cur_bits = 0;
+  }
+  /* (cur_bits + nbits) < 16.  If nbits = 0, we're done. 
+     otherwise move bits in */
+  shift_bits = nbits;
+  mask_bits = (1U << shift_bits) - 1;
+  lzxd->bit_buf <<= shift_bits;
+  lzxd->bit_buf |= bits & mask_bits;
+  cur_bits += nbits;
+
+#ifdef DEBUG_BITBUF
+  fprintf(stderr, "OBB: %2d %04x\n", cur_bits, lzxd->bit_buf);
+#endif
+  lzxd->bits_in_buf = cur_bits;
+}
+
+static void lzx_align_output(lzx_data *lzxd)
+{
+  if (lzxd->bits_in_buf) {
+    lzx_write_bits(lzxd, 16 - lzxd->bits_in_buf, 0);
+  }
+  if (lzxd->mark_frame)
+    lzxd->mark_frame(lzxd->mark_frame_arg, lzxd->len_uncompressed_input, lzxd->len_compressed_output);
+}
+
+static void
+lzx_write_compressed_literals(lzx_data *lzxd, int block_type)
+{
+  uint32_t *cursor = lzxd->block_codes;
+  uint32_t *endp = lzxd->block_codesp;
+  uint16_t position_slot;
+  uint32_t position_footer;
+  uint32_t match_len_m2; /* match length minus 2, which is MIN_MATCH */
+  uint32_t verbatim_bits;
+  uint32_t block_code;
+  uint16_t length_header;
+  uint16_t length_footer;
+  uint16_t len_pos_header;
+  huff_entry *huffe;
+  int frame_count = (lzxd->len_uncompressed_input % LZX_FRAME_SIZE);
+
+  lzxd->len_uncompressed_input -= frame_count; /* will be added back in later */
+  while (cursor < endp) {
+    block_code = *cursor++;
+    if (block_code & 0x80000000) {
+      /*
+       *    0x80000000 |                bit 31 in intelligent bit ordering
+       * (position_slot << 25) |        bits 30-25
+       * (position_footer << 8) |       bits 8-24 
+       * (match_len - MIN_MATCH);       bits 0-7
+       *
+       */
+      
+      match_len_m2 = block_code & 0xFF; /* 8 bits */
+      position_footer = (block_code >> 8)& 0x1FFFF; /* 17 bits */
+      position_slot = (block_code >> 25) & 0x3F; /* 6 bits */
+
+#ifdef DEBUG_MATCHES_2
+      fprintf(stderr, "%08x, %3d %2d %d\n", lzxd->len_uncompressed_input + frame_count, match_len_m2, position_slot, position_footer);
+#endif      
+      if (match_len_m2 < NUM_PRIMARY_LENGTHS) {
+	length_header = match_len_m2;
+	length_footer = 255; /* personal encoding for NULL */
+      }
+      else {
+	length_header = NUM_PRIMARY_LENGTHS;
+	length_footer = match_len_m2 - NUM_PRIMARY_LENGTHS;
+      }
+      len_pos_header = (position_slot << 3) | length_header;
+      huffe = &lzxd->main_tree[len_pos_header+NUM_CHARS];
+      lzx_write_bits(lzxd, huffe->codelength, huffe->code);
+      if (length_footer != 255) {
+	huffe = &lzxd->length_tree[length_footer];
+	lzx_write_bits(lzxd, huffe->codelength, huffe->code);
+      }
+      if ((block_type == LZX_ALIGNED_OFFSET_BLOCK) && (extra_bits[position_slot] >= 3)) {
+	/* aligned offset block and code */
+	verbatim_bits = position_footer >> 3;
+	lzx_write_bits(lzxd, extra_bits[position_slot] - 3, verbatim_bits);
+	huffe = &lzxd->aligned_tree[position_footer&7];
+	lzx_write_bits(lzxd, huffe->codelength, huffe->code);
+      }
+      else {
+	verbatim_bits = position_footer;
+	lzx_write_bits(lzxd, extra_bits[position_slot], verbatim_bits);
+      }
+      frame_count += match_len_m2 + 2;
+    }
+    else {
+      /* literal */
+      assert(block_code < NUM_CHARS);
+      huffe = &lzxd->main_tree[block_code];
+      lzx_write_bits(lzxd, huffe->codelength, huffe->code);
+      frame_count++;
+    }
+    if (frame_count == LZX_FRAME_SIZE) {
+      lzxd->len_uncompressed_input += frame_count;
+      lzx_align_output(lzxd);
+      frame_count = 0;
+    }
+#ifdef DEBUG_MATCHES_2
+    if (frame_count > LZX_FRAME_SIZE) {
+      fprintf(stderr, "uncomp_len = %x, frame_count = %x, block_code = %08x, match_len_m2 = %d", lzxd->len_uncompressed_input, frame_count, block_code, match_len_m2);
+    }
+#endif
+    assert (frame_count < LZX_FRAME_SIZE);
+  }
+  lzxd->len_uncompressed_input += frame_count;
+}
+
+static int 
+lzx_write_compressed_tree(struct lzx_data *lzxd,
+			  struct huff_entry *tree, uint8_t *prevlengths,
+			  int treesize)
+{
+  u_char *codes;
+  u_char *runs;
+  int freqs[LZX_PRETREE_SIZE];
+  int cur_run;
+  int last_len;
+  huff_entry pretree[20];
+  u_char *codep;
+  u_char *codee;
+  u_char *runp;
+  int excess;
+  int i;
+  int cur_code;
+
+  codep = codes = malloc(treesize*sizeof(char));
+  runp = runs = malloc(treesize*sizeof(char));
+  memset(freqs, 0, sizeof(freqs));
+  cur_run = 1;
+  last_len = tree[0].codelength;
+  for (i = 1; i <= treesize; i++) {
+    if ((i == treesize) || (tree[i].codelength != last_len)) {
+      if (last_len == 0) {
+	while (cur_run >= 20) {
+	  excess =  cur_run - 20;
+	  if (excess > 31) excess = 31;
+	  *codep++ = 18;
+	  *runp++ = excess;
+	  cur_run -= excess + 20;
+	  freqs[18]++;
+	}
+	while (cur_run >= 4) {
+	  excess =  cur_run - 4;
+	  if (excess > 15) excess = 15;
+	  *codep++ = 17;
+	  *runp++ = excess;
+	  cur_run -= excess + 4;
+	  freqs[17]++;
+	}
+	while (cur_run > 0) {
+	  *codep = prevlengths[i - cur_run];
+	  freqs[*codep++]++;
+	  *runp++ = 0; /* not necessary */
+	  cur_run--;
+	}
+      }
+      else {
+	while (cur_run >= 4) {
+	  if (cur_run == 4) excess = 0;
+	  else excess = 1;
+	  *codep++ = 19;
+	  *runp++ = excess;
+	  freqs[19]++;
+	  /* right, MS lies again.  Code is NOT 
+	     prev_len + len (mod 17), it's prev_len - len (mod 17)*/
+	  *codep = prevlengths[i-cur_run] - last_len;
+	  if (*codep > 16) *codep += 17;
+	  freqs[*codep++]++;
+	  *runp++ = 0; /* not necessary */
+	  cur_run -= excess+4;
+	}
+	while (cur_run > 0) {
+	  *codep = prevlengths[i-cur_run] - last_len;
+	  if (*codep > 16) *codep += 17;
+	  *runp++ = 0; /* not necessary */
+	  cur_run--;
+	  freqs[*codep++]++;
+	}
+      }
+      if (i != treesize)
+	last_len = tree[i].codelength;
+      cur_run = 0;
+    }
+    cur_run++;
+  }
+  codee = codep;
+#ifdef DEBUG_TREE_COMPRESSION
+  *codep++ = 255;
+  *runp++ = 255;
+  fprintf(stderr, "num:  len  code  run\n");
+  for (i = 0; i < treesize; i++) {
+    fprintf(stderr, "%3d:  %2d   %2d    %2d\n", i, tree[i].codelength, codes[i], runs[i]);
+  }
+#endif
+  /* now create the huffman table and write out the pretree */
+  build_huffman_tree(LZX_PRETREE_SIZE, 16, freqs, pretree);
+  for (i = 0; i < LZX_PRETREE_SIZE; i++) {
+    lzx_write_bits(lzxd, 4, pretree[i].codelength);
+  }
+  codep = codes;
+  runp = runs;
+  cur_run = 0;
+  while (codep < codee) {
+    cur_code = *codep++;
+    lzx_write_bits(lzxd, pretree[cur_code].codelength, pretree[cur_code].code);
+    if (cur_code == 17) {
+      cur_run += *runp + 4;
+      lzx_write_bits(lzxd, 4, *runp);
+    }
+    else if (cur_code == 18) {
+      cur_run += *runp + 20;
+      lzx_write_bits(lzxd, 5, *runp);
+    }
+    else if (cur_code == 19) {
+      cur_run += *runp + 4;
+      lzx_write_bits(lzxd, 1, *runp);
+      cur_code = *codep++;
+      lzx_write_bits(lzxd, pretree[cur_code].codelength, pretree[cur_code].code);
+      runp++;
+    }
+    else {
+      cur_run++;
+    }
+    runp++;
+  }
+  free(codes);
+  free(runs);
+  return 0;
+}
+
+void 
+lzx_reset(lzx_data *lzxd)
+{
+  lzxd->need_1bit_header = 1;
+  lzxd->R0 = lzxd->R1 = lzxd->R2 = 1;
+  memset(lzxd->prev_main_treelengths, 0, lzxd->main_tree_size * sizeof(uint8_t));
+  memset(lzxd->prev_length_treelengths, 0, NUM_SECONDARY_LENGTHS * sizeof(uint8_t));
+  lz_reset(lzxd->lzi);
+}
+
+int lzx_compress_block(lzx_data *lzxd, int block_size, int subdivide)
+{
+  int i;
+  uint32_t written_sofar = 0;
+  int block_type;
+  long uncomp_bits;
+  long comp_bits;
+  long comp_bits_ovh;
+  long uncomp_length;
+  
+  if ((lzxd->block_size != block_size) || (lzxd->block_codes == NULL)) {
+    if (lzxd->block_codes != NULL) free(lzxd->block_codes);
+    lzxd->block_size = block_size;
+    lzxd->block_codes =  malloc(block_size * sizeof(uint32_t));
+  }
+  lzxd->subdivide = subdivide?1:0;
+
+  lzxd->left_in_block = block_size;
+  lzxd->left_in_frame = LZX_FRAME_SIZE;
+  lzxd->main_entropy = 0.0;
+  lzxd->last_ratio = 9999999.0;
+  lzxd->block_codesp = lzxd->block_codes;
+
+  memset(lzxd->length_freq_table, 0, NUM_SECONDARY_LENGTHS * sizeof(int));
+  memset(lzxd->main_freq_table, 0, lzxd->main_tree_size * sizeof(int));
+  memset(lzxd->aligned_freq_table, 0, LZX_ALIGNED_SIZE * sizeof(int));
+  do {
+    lz_compress(lzxd->lzi, lzxd->left_in_block);
+    if (lzxd->left_in_frame == 0)
+      lzxd->left_in_frame = LZX_FRAME_SIZE;
+    
+    if ((lzxd->subdivide<0) || !lzxd->left_in_block || 
+	(!lz_left_to_process(lzxd->lzi) && lzxd->at_eof(lzxd->in_arg))) {
+      /* now one block is LZ-analyzed. */
+      /* time to write it out */
+      uncomp_length = lzxd->block_size - lzxd->left_in_block - written_sofar;
+      /* uncomp_length will sometimes be 0 when input length is 
+	 an exact multiple of frame size */
+      if (uncomp_length == 0)
+	  continue;
+      if (lzxd->subdivide < 0) {
+#ifdef DEBUG_ENTROPY
+	fprintf(stderr, "subdivided\n");
+#endif
+	lzxd->subdivide = 1;
+      }
+      
+      if (lzxd->need_1bit_header) {
+	/* one bit Intel preprocessing header */
+	/* always 0 because this implementation doesn't do Intel preprocessing */
+	lzx_write_bits(lzxd, 1, 0);
+	lzxd->need_1bit_header = 0;
+      }
+
+      /* handle extra bits */
+      uncomp_bits = comp_bits = 0;
+      build_huffman_tree(LZX_ALIGNED_SIZE, 7, lzxd->aligned_freq_table, lzxd->aligned_tree);
+      for (i = 0; i < LZX_ALIGNED_SIZE; i++) {
+	uncomp_bits += lzxd->aligned_freq_table[i]* 3;
+	comp_bits += lzxd->aligned_freq_table[i]* lzxd->aligned_tree[i].codelength;
+      }
+      comp_bits_ovh = comp_bits + LZX_ALIGNED_SIZE * 3;
+      if (comp_bits_ovh < uncomp_bits)
+      	block_type = LZX_ALIGNED_OFFSET_BLOCK;
+      else
+	block_type = LZX_VERBATIM_BLOCK;
+
+#ifdef DEBUG_EXTRA_BITS
+      fprintf(stderr, "Extra bits uncompressed: %5d  compressed:  %5d  compressed w/overhead %5d gain/loss %5d\n", uncomp_bits, comp_bits, comp_bits_ovh, uncomp_bits - comp_bits_ovh);
+#endif
+
+      /* block type */
+      lzx_write_bits(lzxd, 3, block_type);
+      /* uncompressed length */
+      lzx_write_bits(lzxd, 24, uncomp_length);
+
+      written_sofar = lzxd->block_size - lzxd->left_in_block;
+
+      /* now write out the aligned offset trees if present */
+      if (block_type == LZX_ALIGNED_OFFSET_BLOCK) {
+	for (i = 0; i < LZX_ALIGNED_SIZE; i++) {
+	  lzx_write_bits(lzxd, 3, lzxd->aligned_tree[i].codelength); 
+	}
+      }
+      /* end extra bits */
+      build_huffman_tree(lzxd->main_tree_size, LZX_MAX_CODE_LENGTH,
+			 lzxd->main_freq_table, lzxd->main_tree);
+      build_huffman_tree(NUM_SECONDARY_LENGTHS, 16, 
+			 lzxd->length_freq_table, lzxd->length_tree);
+
+
+
+      /* now write the pre-tree and tree for main 1 */
+      lzx_write_compressed_tree(lzxd, lzxd->main_tree, lzxd->prev_main_treelengths, NUM_CHARS);
+    
+      /* now write the pre-tree and tree for main 2*/
+      lzx_write_compressed_tree(lzxd, lzxd->main_tree + NUM_CHARS,
+				lzxd->prev_main_treelengths + NUM_CHARS,
+				lzxd->main_tree_size - NUM_CHARS);
+      
+      /* now write the pre tree and tree for length */
+      lzx_write_compressed_tree(lzxd, lzxd->length_tree, lzxd->prev_length_treelengths,
+				NUM_SECONDARY_LENGTHS);
+      
+      /* now write literals */
+      lzx_write_compressed_literals(lzxd, block_type);
+      
+      /* copy treelengths somewhere safe to do delta compression */
+      for (i = 0; i < lzxd->main_tree_size; i++) {
+	lzxd->prev_main_treelengths[i] = lzxd->main_tree[i].codelength;
+      }
+      for (i = 0; i < NUM_SECONDARY_LENGTHS; i++) {
+	lzxd->prev_length_treelengths[i] = lzxd->length_tree[i].codelength;
+      }
+      lzxd->main_entropy = 0.0;
+      lzxd->last_ratio = 9999999.0;
+      lzxd->block_codesp = lzxd->block_codes;
+      
+      memset(lzxd->length_freq_table, 0, NUM_SECONDARY_LENGTHS * sizeof(int));
+      memset(lzxd->main_freq_table, 0, lzxd->main_tree_size * sizeof(int));
+      memset(lzxd->aligned_freq_table, 0, LZX_ALIGNED_SIZE * sizeof(int));
+    }
+  }
+  while (lzxd->left_in_block && (lz_left_to_process(lzxd->lzi) || !lzxd->at_eof(lzxd->in_arg)));
+  return 0;
+}
+
+int lzx_init(struct lzx_data **lzxdp, int wsize_code, 
+	     lzx_get_bytes_t get_bytes, void *get_bytes_arg,
+	     lzx_at_eof_t at_eof,
+	     lzx_put_bytes_t put_bytes, void *put_bytes_arg,
+	     lzx_mark_frame_t mark_frame, void *mark_frame_arg)
+{
+  int wsize;
+  struct lzx_data *lzxd;
+
+  if ((wsize_code < 15) || (wsize_code > 21)) {
+    return -1;
+  }
+  lzx_init_static();
+
+  *lzxdp = lzxd = malloc(sizeof(*lzxd));
+  if (lzxd == 0)
+    return -2;
+
+  lzxd->in_arg = get_bytes_arg;
+  lzxd->out_arg = put_bytes_arg;
+  lzxd->mark_frame_arg = mark_frame_arg;
+  lzxd->get_bytes = get_bytes;
+  lzxd->put_bytes = put_bytes;
+  lzxd->at_eof = at_eof;
+  lzxd->mark_frame = mark_frame;
+
+  wsize = 1 << (wsize_code);
+
+  lzxd->bits_in_buf = 0;
+  lzxd->block_size = 0;
+  lzxd->block_codes = NULL;
+  lzxd->num_position_slots = num_position_slots[wsize_code-15];
+  lzxd->main_tree_size = (NUM_CHARS + 8 * lzxd->num_position_slots);
+
+  lzxd->main_freq_table = malloc(sizeof(int) * lzxd->main_tree_size);
+  lzxd->main_tree = malloc(sizeof(huff_entry)* lzxd->main_tree_size);
+  lzxd->prev_main_treelengths = malloc(sizeof(uint8_t)*lzxd->main_tree_size);
+
+  lzxd->lzi = malloc(sizeof (*lzxd->lzi));
+  /* the -3 prevents matches at wsize, wsize-1, wsize-2, all of which are illegal */
+  lz_init(lzxd->lzi, wsize, wsize - 3, MAX_MATCH, MIN_MATCH, LZX_FRAME_SIZE,
+	  lzx_get_chars, lzx_output_match, lzx_output_literal,lzxd);
+  lzxd->len_uncompressed_input = 0;
+  lzxd->len_compressed_output = 0;
+  lzx_reset(lzxd);
+  return 0;
+}
+
+int lzx_finish(struct lzx_data *lzxd, struct lzx_results *lzxr)
+{
+  /*  lzx_align_output(lzxd);  Not needed as long as frame padding is in place */
+  if (lzxr) {
+    lzxr->len_compressed_output = lzxd->len_compressed_output;
+    lzxr->len_uncompressed_input = lzxd->len_uncompressed_input;
+  }
+  lz_release(lzxd->lzi);
+  free(lzxd->lzi);
+  free(lzxd->prev_main_treelengths);
+  free(lzxd->main_tree);
+  free(lzxd->main_freq_table);
+  if (lzxd->block_codes) {
+    free(lzxd->block_codes);
+  }
+  free(lzxd);
+  return 0;
+}
+
diff --git a/src/calibre/utils/lzx/lzxc.h b/src/calibre/utils/lzx/lzxc.h
new file mode 100644
index 0000000000..32cb1f721a
--- /dev/null
+++ b/src/calibre/utils/lzx/lzxc.h
@@ -0,0 +1,57 @@
+/*
+    File lzx_compress.h, part of lzxcomp library
+    Copyright (C) 2002 Matthew T. Russotto
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU Lesser General Public License as published by
+    the Free Software Foundation; version 2.1 only
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#if BYTE_ORDER == BIG_ENDIAN
+# define LZX_BIG_ENDIAN
+#endif
+
+/* the names of these constants are specific to this library */
+#define LZX_MAX_CODE_LENGTH                 16
+#define LZX_FRAME_SIZE                   32768
+#define LZX_PRETREE_SIZE                    20
+#define LZX_ALIGNED_BITS                     3
+#define LZX_ALIGNED_SIZE                     8
+
+#define LZX_VERBATIM_BLOCK                   1
+#define LZX_ALIGNED_OFFSET_BLOCK             2
+
+typedef struct lzx_data lzx_data;
+typedef int (*lzx_get_bytes_t)(void *arg, int n, void *buf);
+typedef int (*lzx_put_bytes_t)(void *arg, int n, void *buf);
+typedef void (*lzx_mark_frame_t)(void *arg, uint32_t uncomp, uint32_t comp);
+typedef int (*lzx_at_eof_t)(void *arg);
+
+typedef struct lzx_results
+{
+  /* add more here? Error codes, # blocks, # frames, etc? */
+  long len_compressed_output;
+  long len_uncompressed_input;
+} lzx_results;
+
+int lzx_init(struct lzx_data **lzxdp, int wsize_code, 
+	     lzx_get_bytes_t get_bytes, void *get_bytes_arg,
+	     lzx_at_eof_t at_eof,
+	     lzx_put_bytes_t put_bytes, void *put_bytes_arg,
+	     lzx_mark_frame_t mark_frame, void *mark_frame_arg);
+
+void  lzx_reset(lzx_data *lzxd);
+
+int lzx_compress_block(lzx_data *lzxd, int block_size, int subdivide);
+
+int lzx_finish(struct lzx_data *lzxd, struct lzx_results *lzxr);
+
diff --git a/src/calibre/utils/lzx/lzxd.c b/src/calibre/utils/lzx/lzxd.c
index 337af441fd..e683a9ec23 100644
--- a/src/calibre/utils/lzx/lzxd.c
+++ b/src/calibre/utils/lzx/lzxd.c
@@ -18,7 +18,7 @@
 
 #include <mspack.h>
 #include <system.h>
-#include <lzx.h>
+#include <lzxd.h>
 
 /* Microsoft's LZX document and their implementation of the
  * com.ms.util.cab Java package do not concur.
diff --git a/src/calibre/utils/lzx/lzx.h b/src/calibre/utils/lzx/lzxd.h
similarity index 100%
rename from src/calibre/utils/lzx/lzx.h
rename to src/calibre/utils/lzx/lzxd.h
diff --git a/src/calibre/utils/lzx/lzxmodule.c b/src/calibre/utils/lzx/lzxmodule.c
index c45bb22c95..2f72b58ae7 100644
--- a/src/calibre/utils/lzx/lzxmodule.c
+++ b/src/calibre/utils/lzx/lzxmodule.c
@@ -4,14 +4,15 @@
  * Python module C glue code.
  */
 
-
 #include <Python.h>
 
 #include <mspack.h>
-#include <lzx.h>
+#include <lzxd.h>
+#include <lzxc.h>
 
 static char lzx_doc[] = 
-"Provide basic LZX decompression using the code from libmspack.";
+    "Provide basic LZX compression and decompression using the code from\n"
+    "liblzxcomp and libmspack respectively.";
 
 static PyObject *LzxError = NULL;
 
@@ -214,6 +215,15 @@ initlzx(void)
     LzxError = PyErr_NewException("lzx.LzxError", NULL, NULL);
     Py_INCREF(LzxError);
     PyModule_AddObject(m, "LzxError", LzxError);
+
+    PyModule_AddObject(m, "_lzxc_init",
+        Py_BuildValue("k", (unsigned long)lzx_init));
+    PyModule_AddObject(m, "_lzxc_reset",
+        Py_BuildValue("k", (unsigned long)lzx_reset));
+    PyModule_AddObject(m, "_lzxc_compress_block",
+        Py_BuildValue("k", (unsigned long)lzx_compress_block));
+    PyModule_AddObject(m, "_lzxc_finish",
+        Py_BuildValue("k", (unsigned long)lzx_finish));
     
     return;
 }

From 210ad8d20ada2e13b927af9fbe43e4911cd31fe3 Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Wed, 10 Dec 2008 00:56:10 -0500
Subject: [PATCH 04/15] Implement "ugly-printing" for LIT markup.

---
 src/calibre/ebooks/lit/html.css    |  8 ++++-
 src/calibre/ebooks/lit/oeb.py      |  5 +--
 src/calibre/ebooks/lit/reader.py   |  2 +-
 src/calibre/ebooks/lit/stylizer.py | 11 +++---
 src/calibre/ebooks/lit/writer.py   | 56 +++++++++++++++++++++---------
 5 files changed, 56 insertions(+), 26 deletions(-)

diff --git a/src/calibre/ebooks/lit/html.css b/src/calibre/ebooks/lit/html.css
index 5b75ea6649..9401b19cf2 100644
--- a/src/calibre/ebooks/lit/html.css
+++ b/src/calibre/ebooks/lit/html.css
@@ -410,7 +410,7 @@ tr:focus, tt:focus, u:focus, ul:focus, var:focus {
 
 /* hidden elements */
 area, base, basefont, head, meta, script, style, title,
-noembed, param {
+noembed, param, link {
    display: none;
 }
 
@@ -418,3 +418,9 @@ noembed, param {
 body {
   page-break-before: always;
 }
+
+/* Explicit line-breaks are blocks, sure... */
+br {
+  display: block;
+}
+
diff --git a/src/calibre/ebooks/lit/oeb.py b/src/calibre/ebooks/lit/oeb.py
index d3773a61f1..ae2e6136b7 100644
--- a/src/calibre/ebooks/lit/oeb.py
+++ b/src/calibre/ebooks/lit/oeb.py
@@ -8,8 +8,8 @@ from urlparse import urldefrag, urlparse, urlunparse
 from urllib import unquote as urlunquote
 from lxml import etree
 
-XML_PARSER = etree.XMLParser(
-    remove_blank_text=True, recover=True, resolve_entities=False)
+XML_PARSER = etree.XMLParser(recover=True, resolve_entities=False)
+XML_NS = 'http://www.w3.org/XML/1998/namespace'
 XHTML_NS = 'http://www.w3.org/1999/xhtml'
 OPF1_NS = 'http://openebook.org/namespaces/oeb-package/1.0/'
 OPF2_NS = 'http://www.idpf.org/2007/opf'
@@ -23,6 +23,7 @@ XPNSMAP = {'h': XHTML_NS, 'o1': OPF1_NS, 'o2': OPF2_NS,
            'd09': DC09_NS, 'd10': DC10_NS, 'd11': DC11_NS,
            'xsi': XSI_NS, 'dt': DCTERMS_NS, 'ncx': NCX_NS}
 
+def XML(name): return '{%s}%s' % (XML_NS, name)
 def XHTML(name): return '{%s}%s' % (XHTML_NS, name)
 def OPF(name): return '{%s}%s' % (OPF2_NS, name)
 def DC(name): return '{%s}%s' % (DC11_NS, name)
diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py
index c04a845d69..71e5b081b8 100644
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@@ -387,7 +387,7 @@ def preserve(function):
 class LitReader(object):
     PIECE_SIZE = 16
     XML_PARSER = etree.XMLParser(
-        remove_blank_text=True, resolve_entities=False)
+        recover=True, resolve_entities=False)
 
     def magic():
         @preserve
diff --git a/src/calibre/ebooks/lit/stylizer.py b/src/calibre/ebooks/lit/stylizer.py
index 97b7e2d91d..1986f6a2ed 100644
--- a/src/calibre/ebooks/lit/stylizer.py
+++ b/src/calibre/ebooks/lit/stylizer.py
@@ -14,7 +14,8 @@ import cssutils
 from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \
     CSSValueList, cssproperties
 from lxml import etree
-from calibre.ebooks.lit.oeb import XHTML_NS, CSS_MIME, OEB_STYLES, barename
+from calibre.ebooks.lit.oeb import XHTML_NS, CSS_MIME, OEB_STYLES
+from calibre.ebooks.lit.oeb import barename, urlnormalize
 from calibre.resources import html_css
 
 HTML_CSS_STYLESHEET = cssutils.parseString(html_css)
@@ -125,7 +126,7 @@ class Stylizer(object):
             elif tag == 'link' \
                  and elem.get('rel', 'stylesheet') == 'stylesheet' \
                  and elem.get('type', CSS_MIME) in OEB_STYLES:
-                href = elem.attrib['href']
+                href = urlnormalize(elem.attrib['href'])
                 path = os.path.join(base, href)
                 path = os.path.normpath(path).replace('\\', '/')
                 if path in self.STYLESHEETS:
@@ -275,13 +276,13 @@ class Style(object):
                 if name1 != name2:
                     return False
             elif item.type == 'id':
-                name1 = item.value[1:].lower()
-                name2 = element.attrib.get('id', '').lower().split()
+                name1 = item.value[1:]
+                name2 = element.get('id', '')
                 if name1 != name2:
                     return False
             elif item.type == 'class':
                 name = item.value[1:].lower()
-                classes = element.attrib.get('class', '').lower().split()
+                classes = element.get('class', '').lower().split()
                 if name not in classes:
                     return False
             elif item.type == 'child':
diff --git a/src/calibre/ebooks/lit/writer.py b/src/calibre/ebooks/lit/writer.py
index 62c3877785..e1b6b645d0 100644
--- a/src/calibre/ebooks/lit/writer.py
+++ b/src/calibre/ebooks/lit/writer.py
@@ -3,7 +3,7 @@ import sys
 import os
 from cStringIO import StringIO
 from struct import pack, unpack
-from itertools import izip, count
+from itertools import izip, count, chain
 import time
 import random
 import re
@@ -15,7 +15,7 @@ from urllib import unquote as urlunquote
 from lxml import etree
 from calibre.ebooks.lit.reader import msguid, DirectoryEntry
 import calibre.ebooks.lit.maps as maps
-from calibre.ebooks.lit.oeb import CSS_MIME, OPF_MIME
+from calibre.ebooks.lit.oeb import CSS_MIME, OPF_MIME, XML_NS, XML
 from calibre.ebooks.lit.oeb import namespace, barename, urlnormalize
 from calibre.ebooks.lit.oeb import Oeb
 from calibre.ebooks.lit.stylizer import Stylizer
@@ -116,6 +116,8 @@ def randbytes(n):
     return ''.join(chr(random.randint(0, 255)) for x in xrange(n))
 
 class ReBinary(object):
+    NSRMAP = {'': None, XML_NS: 'xml'}
+    
     def __init__(self, root, path, oeb, map=HTML_MAP):
         self.dir = os.path.dirname(path)
         self.manifest = oeb.manifest
@@ -135,8 +137,11 @@ class ReBinary(object):
             if isinstance(value, (int, long)):
                 value = unichr(value)
             self.buf.write(value.encode('utf-8'))
-        
-    def tree_to_binary(self, elem, nsrmap={'': None}, parents=[],
+
+    def is_block(self, style):
+        return style['display'] not in ('inline', 'inline-block')
+            
+    def tree_to_binary(self, elem, nsrmap=NSRMAP, parents=[],
                        inhead=False, preserve=False):
         if not isinstance(elem.tag, basestring):
             self.write(etree.tostring(elem))
@@ -158,7 +163,7 @@ class ReBinary(object):
             flags |= FLAG_CLOSING
         if inhead:
             flags |= FLAG_HEAD
-        if style and style['display'] in ('block', 'table'):
+        if style and self.is_block(style):
             flags |= FLAG_BLOCK
         self.write(0, flags)
         tattrs = self.tattrs[0]
@@ -198,24 +203,41 @@ class ReBinary(object):
             except ValueError:
                 self.write(len(value)+1, value)
         self.write(0)
+        old_preserve = preserve
+        if style:
+            preserve = (style['white-space'] in ('pre', 'pre-wrap'))
+        xml_space = elem.get(XML('space'))
+        if xml_space == 'preserve':
+            preserve = True
+        elif xml_space == 'normal':
+            preserve = False
         if elem.text:
-            text = elem.text
-            if style and style['white-space'] == 'pre':
-                preserve = True
-            if elem.get('xml:space') == 'preserve':
-                preserve = True
-            if not preserve:
-                text = COLLAPSE.sub(' ', text)
-            self.write(text)
+            if preserve:
+                self.write(elem.text)
+            elif len(elem) > 0 or not elem.text.isspace():
+                self.write(COLLAPSE.sub(' ', elem.text))
         parents.append(tag_offset)
-        for child in elem:
-            self.tree_to_binary(child, nsrmap, parents, inhead, preserve)
+        child = cstyle = nstyle = None
+        for next in chain(elem, [None]):
+            if self.stylizer:
+                nstyle = self.stylizer.style(next) \
+                    if (next is not None) else None
+            if child is not None:
+                if not preserve \
+                   and (inhead or not nstyle
+                        or self.is_block(cstyle)
+                        or self.is_block(nstyle)) \
+                   and child.tail and child.tail.isspace():
+                    child.tail = None
+                self.tree_to_binary(child, nsrmap, parents, inhead, preserve)
+            child, cstyle = next, nstyle
         parents.pop()
+        preserve = old_preserve
         if not flags & FLAG_CLOSING:
             self.write(0, (flags & ~FLAG_OPENING) | FLAG_CLOSING, 0)
-        if elem.tail:
+        if elem.tail and tag != 'html':
             tail = elem.tail
-            if tag != 'pre':
+            if not preserve:
                 tail = COLLAPSE.sub(' ', tail)
             self.write(tail)
         if style and style['page-break-after'] not in ('avoid', 'auto'):

From 475a5eb899ddc6a8a5ce1f63d00c2cdaf0fa7387 Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Wed, 10 Dec 2008 08:29:55 -0500
Subject: [PATCH 05/15] Fix bracket-fixup error.

---
 src/calibre/ebooks/lit/reader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py
index 71e5b081b8..c4f854ae10 100644
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@@ -112,7 +112,7 @@ class UnBinary(object):
     AMPERSAND_RE = re.compile(
         r'&(?!(?:#[0-9]+|#x[0-9a-fA-F]+|[a-zA-Z_:][a-zA-Z0-9.-_:]+);)')
     OPEN_ANGLE_RE = re.compile(r'<<(?![!]--)')
-    CLOSE_ANGLE_RE = re.compile(r'(?<!--)>>')
+    CLOSE_ANGLE_RE = re.compile(r'(?<!--)>>(?=>>|[^>])')
     DOUBLE_ANGLE_RE = re.compile(r'([<>])\1')
     
     def __init__(self, bin, path, manifest={}, map=HTML_MAP):

From 361d2942327a46216a475e8038d5c4e427e4ebe6 Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Wed, 10 Dec 2008 09:13:11 -0500
Subject: [PATCH 06/15] Produce more correct OEBPS 1.x output, and support OPF
 fallbacks.

---
 src/calibre/ebooks/lit/oeb.py    | 30 +++++++++++++++++++++---------
 src/calibre/ebooks/lit/writer.py |  8 +++++---
 2 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/src/calibre/ebooks/lit/oeb.py b/src/calibre/ebooks/lit/oeb.py
index ae2e6136b7..2f0540df77 100644
--- a/src/calibre/ebooks/lit/oeb.py
+++ b/src/calibre/ebooks/lit/oeb.py
@@ -33,10 +33,11 @@ XHTML_MIME = 'application/xhtml+xml'
 CSS_MIME = 'text/css'
 NCX_MIME = 'application/x-dtbncx+xml'
 OPF_MIME = 'application/oebps-package+xml'
+OEB_DOC_MIME = 'text/x-oeb1-document'
+OEB_CSS_MIME = 'text/x-oeb1-css'
 
-OEB_STYLES = set([CSS_MIME, 'text/x-oeb1-css', 'text/x-oeb-css'])
-OEB_DOCS = set([XHTML_MIME, 'text/html', 'text/x-oeb1-document',
-                'text/x-oeb-document'])
+OEB_STYLES = set([CSS_MIME, OEB_CSS_MIME, 'text/x-oeb-css'])
+OEB_DOCS = set([XHTML_MIME, 'text/html', OEB_DOC_MIME, 'text/x-oeb-document'])
 
 
 def element(parent, *args, **kwargs):
@@ -205,10 +206,11 @@ class Metadata(object):
 
 class Manifest(object):
     class Item(object):
-        def __init__(self, id, href, media_type, loader=str):
+        def __init__(self, id, href, media_type, fallback=None, loader=str):
             self.id = id
             self.href = self.path = urlnormalize(href)
             self.media_type = media_type
+            self.fallback = fallback
             self.spine_position = None
             self.linear = True
             self._loader = loader
@@ -251,8 +253,9 @@ class Manifest(object):
         self.items = {}
         self.hrefs = {}
 
-    def add(self, id, href, media_type):
-        item = self.Item(id, href, media_type, self.oeb.container.read)
+    def add(self, id, href, media_type, fallback=None):
+        item = self.Item(
+            id, href, media_type, fallback, self.oeb.container.read)
         self.items[item.id] = item
         self.hrefs[item.href] = item
         return item
@@ -283,16 +286,25 @@ class Manifest(object):
     def to_opf1(self, parent=None):
         elem = element(parent, 'manifest')
         for item in self.items.values():
+            media_type = item.media_type
+            if media_type == XHTML_MIME:
+                media_type = OEB_DOC_MIME
+            elif media_type == CSS_MIME:
+                media_type = OEB_CSS_MIME
             attrib = {'id': item.id, 'href': item.href,
-                      'media-type': item.media_type}
+                      'media-type': media_type}
+            if item.fallback:
+                attrib['fallback'] = item.fallback
             element(elem, 'item', attrib=attrib)
-        return elem            
+        return elem
     
     def to_opf2(self, parent=None):
         elem = element(parent, OPF('manifest'))
         for item in self.items.values():
             attrib = {'id': item.id, 'href': item.href,
                       'media-type': item.media_type}
+            if item.fallback:
+                attrib['fallback'] = item.fallback
             element(elem, OPF('item'), attrib=attrib)
         return elem
 
@@ -520,7 +532,7 @@ class Oeb(object):
         self.manifest = manifest = Manifest(self)
         for elem in xpath(opf, '/o2:package/o2:manifest/o2:item'):
             manifest.add(elem.get('id'), elem.get('href'),
-                         elem.get('media-type'))
+                         elem.get('media-type'), elem.get('fallback'))
     
     def _spine_from_opf(self, opf):
         self.spine = spine = Spine(self)
diff --git a/src/calibre/ebooks/lit/writer.py b/src/calibre/ebooks/lit/writer.py
index e1b6b645d0..c409c704d9 100644
--- a/src/calibre/ebooks/lit/writer.py
+++ b/src/calibre/ebooks/lit/writer.py
@@ -15,7 +15,8 @@ from urllib import unquote as urlunquote
 from lxml import etree
 from calibre.ebooks.lit.reader import msguid, DirectoryEntry
 import calibre.ebooks.lit.maps as maps
-from calibre.ebooks.lit.oeb import CSS_MIME, OPF_MIME, XML_NS, XML
+from calibre.ebooks.lit.oeb import OEB_STYLES, OEB_CSS_MIME, CSS_MIME, \
+    OPF_MIME, XML_NS, XML
 from calibre.ebooks.lit.oeb import namespace, barename, urlnormalize
 from calibre.ebooks.lit.oeb import Oeb
 from calibre.ebooks.lit.stylizer import Stylizer
@@ -194,6 +195,8 @@ class ReBinary(object):
                 self.anchors.append((value, tag_offset))
             elif attr.startswith('ms--'):
                 attr = '%' + attr[4:]
+            elif attr == 'type' and value in OEB_STYLES:
+                value = OEB_CSS_MIME
             if attr in tattrs:
                 self.write(tattrs[attr])
             else:
@@ -220,8 +223,7 @@ class ReBinary(object):
         child = cstyle = nstyle = None
         for next in chain(elem, [None]):
             if self.stylizer:
-                nstyle = self.stylizer.style(next) \
-                    if (next is not None) else None
+                nstyle = None if next is None else self.stylizer.style(next)
             if child is not None:
                 if not preserve \
                    and (inhead or not nstyle

From 8f1e5cad883b470349d665b9a6047c37c1be9283 Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Wed, 10 Dec 2008 15:36:11 -0500
Subject: [PATCH 07/15] Removed accidentally added file.

---
 src/calibre/ebooks/lit/split.py | 149 --------------------------------
 1 file changed, 149 deletions(-)
 delete mode 100644 src/calibre/ebooks/lit/split.py

diff --git a/src/calibre/ebooks/lit/split.py b/src/calibre/ebooks/lit/split.py
deleted file mode 100644
index 2083f95016..0000000000
--- a/src/calibre/ebooks/lit/split.py
+++ /dev/null
@@ -1,149 +0,0 @@
-#! /usr/bin/python
-
-from __future__ import with_statement
-import sys
-import os
-import re
-import types
-import copy
-import itertools
-from collections import defaultdict
-from lxml import etree
-from stylizer import Page, Stylizer, Style
-
-XHTML_NS = 'http://www.w3.org/1999/xhtml'
-XPNSMAP = {'h': XHTML_NS,}
-
-class Splitter(object):
-    XML_PARSER = etree.XMLParser(remove_blank_text=True)
-    COLLAPSE = re.compile(r'[ \n\r]+')
-    CONTENT_TAGS = set(['img', 'object', 'embed'])
-    for tag in list(CONTENT_TAGS):
-        CONTENT_TAGS.add('{%s}%s' % (XHTML_NS, tag))
-    
-    def __init__(self, path):
-        with open(path, 'rb') as f:
-            self.tree = etree.parse(f, parser=self.XML_PARSER)
-        self.stylizer = Stylizer(self.tree, path)
-        self.path = path
-        self.basename = os.path.splitext(
-            os.path.basename(path))[0].lower()
-        self.splits = []
-        self.names = []
-        self.idmap = {}
-        self.fonts = defaultdict(int)
-        self.content = False
-
-    def split(self):
-        tree = self.tree
-        for prefix in ('', 'h:'):
-            d = {'h': prefix}
-            roots = tree.xpath('/%(h)shtml' % d, namespaces=XPNSMAP)
-            if roots: break
-        self.root, = roots
-        self.head, = tree.xpath('/%(h)shtml/%(h)shead' % d, namespaces=XPNSMAP)
-        body, = tree.xpath('/%(h)shtml/%(h)sbody' % d, namespaces=XPNSMAP)
-        self._split(body, [self.new_root(str(self.basename))], 9.0)
-        results = zip(self.names, self.splits)
-        self.post_process_links(results, d)
-        return results
-
-    def new_root(self, name):
-        nroot = self.dup(self.root)
-        nroot.append(copy.deepcopy(self.head))
-        self.splits.append(nroot)
-        self.names.append(name + '.html')
-        return nroot
-
-    def dup(self, e):
-        new = etree.Element(e.tag, nsmap=e.nsmap, **dict(e.attrib))
-        new.text = e.text
-        new.tail = e.tail
-        return new
-    
-    def dupsub(self, p, e):
-        new = etree.SubElement(p, e.tag, nsmap=e.nsmap, **dict(e.attrib))
-        new.text = e.text
-        new.tail = e.tail
-        return new
-
-    def _split(self, src, dstq, psize):
-        style = self.stylizer.style(src)
-        if self.new_page(style, 'before'):
-            self.new_split(src, dstq)
-        attrib = src.attrib
-        name = self.names[-1]
-        for aname in ('id', 'name'):
-            if aname in attrib:
-                self.idmap[attrib[aname]] = name
-        text = self.COLLAPSE.sub(' ', src.text or '')
-        tail = self.COLLAPSE.sub(' ', src.text or '')
-        if text or tail or src.tag.lower() in self.CONTENT_TAGS:
-            self.content = True
-        size = style['font-size']
-        self.fonts[size] += len(text)
-        self.fonts[psize] += len(tail)
-        new = self.dupsub(dstq[-1], src)
-        if len(src) > 0:
-            dstq.append(new)
-            for child in src:
-                self._split(child, dstq, size)
-            dstq.pop()
-        if self.new_page(style, 'after'):
-            self.new_split(src, dstq)
-
-    def new_page(self, style, when):
-        if self.content \
-                and (style['page-break-%s' % when] \
-                         in ('always', 'odd', 'even')):
-            return True
-        return False
-            
-    def new_split(self, src, dstq):
-        name = self.basename
-        attrib = src.attrib
-        if 'class' in attrib:
-            name = src.attrib['class']            
-            if ' ' in name:
-                name = name.split(' ', 2)[0]
-        if 'id' in attrib:
-            name = '%s-%s' % (name, attrib['id'])
-        name = name.lower().replace('_', '-')
-        if (name + '.html') in self.names:
-            name = '%s-%02d' % (name, len(self.names))
-        prev = None
-        for i in xrange(len(dstq)):
-            new = self.new_root(name) if prev is None \
-                else self.dupsub(prev, dstq[i])
-            prev = dstq[i] = new
-        self.content = False
-
-    def post_process_links(self, results, prefixes):
-        basename = os.path.basename(self.path)
-        query = '//%(h)sa[@href]' % prefixes
-        for name, root in results:
-            elements = root.xpath(query, namespaces=XPNSMAP)
-            for element in elements:
-                href = element.attrib['href']
-                if '#' not in href: continue
-                fname, id = href.split('#', 2)
-                if fname in ('', basename):
-                    href = '#'.join((self.idmap[id], id))
-                    element.attrib['href'] = href
-
-def main():
-    def xml2str(root):
-        return etree.tostring(root, pretty_print=True,
-                              encoding='utf-8', xml_declaration=True)
-    tree = None
-    path = sys.argv[1]
-    dest = sys.argv[2]
-    splitter = Splitter(path)
-    for name, root in splitter.split():
-        print name
-        with open(os.path.join(dest, name), 'wb') as f:
-            f.write(xml2str(root))
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())

From 96dbea53942b008031b375ef425aff1f9850a0ee Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Thu, 11 Dec 2008 08:00:51 -0500
Subject: [PATCH 08/15] Make the LIT-encased OEB1 books a bit more compliant.

---
 src/calibre/ebooks/lit/oeb.py    |  2 +-
 src/calibre/ebooks/lit/writer.py | 13 +++++++++----
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/calibre/ebooks/lit/oeb.py b/src/calibre/ebooks/lit/oeb.py
index 2f0540df77..aa26107e0e 100644
--- a/src/calibre/ebooks/lit/oeb.py
+++ b/src/calibre/ebooks/lit/oeb.py
@@ -499,7 +499,7 @@ class Oeb(object):
         for element in opf.xpath('metadata/x-metadata/meta'):
             metadata.append(element)
         for item in opf.xpath('manifest/item'):
-            media_type = item.attrib['media-type']
+            media_type = item.attrib['media-type'].lower()
             if media_type in OEB_DOCS:
                 media_type = XHTML_MIME
             elif media_type in OEB_STYLES:
diff --git a/src/calibre/ebooks/lit/writer.py b/src/calibre/ebooks/lit/writer.py
index c409c704d9..52e70b9805 100644
--- a/src/calibre/ebooks/lit/writer.py
+++ b/src/calibre/ebooks/lit/writer.py
@@ -15,8 +15,8 @@ from urllib import unquote as urlunquote
 from lxml import etree
 from calibre.ebooks.lit.reader import msguid, DirectoryEntry
 import calibre.ebooks.lit.maps as maps
-from calibre.ebooks.lit.oeb import OEB_STYLES, OEB_CSS_MIME, CSS_MIME, \
-    OPF_MIME, XML_NS, XML
+from calibre.ebooks.lit.oeb import OEB_CSS_MIME, CSS_MIME, XHTML_MIME, \
+    OPF_MIME, OEB_STYLES, OEB_DOCS, XML_NS, XML
 from calibre.ebooks.lit.oeb import namespace, barename, urlnormalize
 from calibre.ebooks.lit.oeb import Oeb
 from calibre.ebooks.lit.stylizer import Stylizer
@@ -28,6 +28,9 @@ import calibre.ebooks.lit.mssha1 as mssha1
 
 __all__ = ['LitWriter']
 
+LIT_IMAGES = set(['image/png', 'image/jpeg', 'image/gif'])
+LIT_MIMES = OEB_DOCS | OEB_STYLES | LIT_IMAGES
+
 def invert_tag_map(tag_map):
     tags, dattrs, tattrs = tag_map
     tags = dict((tags[i], i) for i in xrange(len(tags)))
@@ -195,7 +198,7 @@ class ReBinary(object):
                 self.anchors.append((value, tag_offset))
             elif attr.startswith('ms--'):
                 attr = '%' + attr[4:]
-            elif attr == 'type' and value in OEB_STYLES:
+            elif tag == 'link' and attr == 'type' and value in OEB_STYLES:
                 value = OEB_CSS_MIME
             if attr in tattrs:
                 self.write(tattrs[attr])
@@ -411,6 +414,8 @@ class LitWriter(object):
     def _build_data(self):
         self._add_folder('/data')
         for item in self._oeb.manifest.values():
+            if item.media_type not in LIT_MIMES:
+                continue
             name = '/data/' + item.id
             data = item.data
             secnum = 0
@@ -435,7 +440,7 @@ class LitWriter(object):
                 manifest[key].append(item)
             elif item.media_type == CSS_MIME:
                 manifest['css'].append(item)
-            else:
+            elif item.media_type in LIT_IMAGES:
                 manifest['images'].append(item)
         data = StringIO()
         data.write(pack('<Bc', 1, '\\'))

From ee23d9bcde333772c8d93fd8be2f0936c3b06b61 Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Thu, 11 Dec 2008 08:03:51 -0500
Subject: [PATCH 09/15] CamelCase class names the PEP-8 way.

---
 src/calibre/ebooks/lit/oeb.py    | 16 ++++++++--------
 src/calibre/ebooks/lit/writer.py |  8 ++++----
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/calibre/ebooks/lit/oeb.py b/src/calibre/ebooks/lit/oeb.py
index aa26107e0e..fc1aaeef90 100644
--- a/src/calibre/ebooks/lit/oeb.py
+++ b/src/calibre/ebooks/lit/oeb.py
@@ -417,7 +417,7 @@ class Guide(object):
         return elem
 
 
-class Toc(object):
+class TOC(object):
     def __init__(self, title=None, href=None, klass=None, id=None):
         self.title = title
         self.href = urlnormalize(href) if href else href
@@ -426,7 +426,7 @@ class Toc(object):
         self.nodes = []
     
     def add(self, title, href, klass=None, id=None):
-        node = Toc(title, href, klass, id)
+        node = TOC(title, href, klass, id)
         self.nodes.append(node)
         return node
     
@@ -468,7 +468,7 @@ class Toc(object):
         return parent
 
 
-class Oeb(object):
+class OEBBook(object):
     def __init__(self, opfpath, container=None):
         if not container:
             container = DirContainer(os.path.dirname(opfpath))
@@ -571,7 +571,7 @@ class Oeb(object):
         ncx = self.manifest[id].data
         self.manifest.remove(id)
         title = xpath(ncx, 'ncx:docTitle/ncx:text/text()')[0]
-        self.toc = toc = Toc(title)
+        self.toc = toc = TOC(title)
         navmaps = xpath(ncx, 'ncx:navMap')
         for navmap in navmaps:
             self._toc_from_navpoint(toc, navmap)
@@ -582,7 +582,7 @@ class Oeb(object):
         if not result:
             return False
         tour = result[0]
-        self.toc = toc = Toc(tour.get('title'))
+        self.toc = toc = TOC(tour.get('title'))
         sites = xpath(tour, 'o2:site')
         for site in sites:
             toc.add(site.get('title'), site.get('href'))
@@ -591,7 +591,7 @@ class Oeb(object):
     def _toc_from_html(self, opf):
         if 'toc' not in self.guide:
             return False
-        self.toc = toc = Toc()
+        self.toc = toc = TOC()
         itempath, frag = urldefrag(self.guide['toc'].href)
         item = self.manifest.hrefs[itempath]
         html = item.data
@@ -620,7 +620,7 @@ class Oeb(object):
         return True
     
     def _toc_from_spine(self, opf):
-        self.toc = toc = Toc()
+        self.toc = toc = TOC()
         titles = []
         headers = []
         for item in self.spine:
@@ -720,7 +720,7 @@ class Oeb(object):
 
 def main(argv=sys.argv):
     for arg in argv[1:]:
-        oeb = Oeb(arg)
+        oeb = OEBBook(arg)
         for name, doc in oeb.to_opf1().values():
             print etree.tostring(doc, pretty_print=True)
         for name, doc in oeb.to_opf2().values():
diff --git a/src/calibre/ebooks/lit/writer.py b/src/calibre/ebooks/lit/writer.py
index 52e70b9805..f374d49ddf 100644
--- a/src/calibre/ebooks/lit/writer.py
+++ b/src/calibre/ebooks/lit/writer.py
@@ -15,10 +15,10 @@ from urllib import unquote as urlunquote
 from lxml import etree
 from calibre.ebooks.lit.reader import msguid, DirectoryEntry
 import calibre.ebooks.lit.maps as maps
-from calibre.ebooks.lit.oeb import OEB_CSS_MIME, CSS_MIME, XHTML_MIME, \
-    OPF_MIME, OEB_STYLES, OEB_DOCS, XML_NS, XML
+from calibre.ebooks.lit.oeb import OEB_STYLES, OEB_CSS_MIME, CSS_MIME, \
+    XHTML_MIME, OPF_MIME, XML_NS, XML
 from calibre.ebooks.lit.oeb import namespace, barename, urlnormalize
-from calibre.ebooks.lit.oeb import Oeb
+from calibre.ebooks.lit.oeb import OEBBook
 from calibre.ebooks.lit.stylizer import Stylizer
 from calibre.ebooks.lit.lzxcomp import Compressor
 import calibre
@@ -676,7 +676,7 @@ def main(argv=sys.argv):
     if litpath is None:
         litpath = os.path.basename(opfpath)
         litpath = os.path.splitext(litpath)[0] + '.lit'
-    lit = LitWriter(Oeb(opfpath))
+    lit = LitWriter(OEBBook(opfpath))
     with open(litpath, 'wb') as f:
         lit.dump(f)
     print _('LIT ebook created at'), litpath

From ac9baea183d99043628ecc7a1faf446e9a7b2fbe Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Thu, 11 Dec 2008 08:12:20 -0500
Subject: [PATCH 10/15] Add license and copyright information to all
 LIT-related files missing them.

---
 src/calibre/ebooks/lit/lzxcomp.py  |  7 +++++++
 src/calibre/ebooks/lit/mssha1.py   |  3 +++
 src/calibre/ebooks/lit/oeb.py      |  7 +++++++
 src/calibre/ebooks/lit/stylizer.py | 11 +++++++++--
 src/calibre/ebooks/lit/writer.py   |  8 ++++++++
 5 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/lit/lzxcomp.py b/src/calibre/ebooks/lit/lzxcomp.py
index 1a3f944c89..badfa807fb 100644
--- a/src/calibre/ebooks/lit/lzxcomp.py
+++ b/src/calibre/ebooks/lit/lzxcomp.py
@@ -1,4 +1,11 @@
+'''
+Higher-level LZX compression/decompression routines.
+'''
 from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
+
 import sys
 import os
 from cStringIO import StringIO
diff --git a/src/calibre/ebooks/lit/mssha1.py b/src/calibre/ebooks/lit/mssha1.py
index 1708c8dd8b..29261e7313 100644
--- a/src/calibre/ebooks/lit/mssha1.py
+++ b/src/calibre/ebooks/lit/mssha1.py
@@ -4,6 +4,9 @@ Modified version of SHA-1 used in Microsoft LIT files.
 Adapted from the PyPy pure-Python SHA-1 implementation.
 """
 
+__license__   = 'GPL v3'
+__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
+
 import struct, copy
 
 # ======================================================================
diff --git a/src/calibre/ebooks/lit/oeb.py b/src/calibre/ebooks/lit/oeb.py
index fc1aaeef90..2a553006ab 100644
--- a/src/calibre/ebooks/lit/oeb.py
+++ b/src/calibre/ebooks/lit/oeb.py
@@ -1,4 +1,11 @@
+'''
+Basic support for manipulating OEB 1.x/2.0 content and metadata.
+'''
 from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
+
 import os
 import sys
 from collections import defaultdict
diff --git a/src/calibre/ebooks/lit/stylizer.py b/src/calibre/ebooks/lit/stylizer.py
index 1986f6a2ed..47018201ae 100644
--- a/src/calibre/ebooks/lit/stylizer.py
+++ b/src/calibre/ebooks/lit/stylizer.py
@@ -1,6 +1,13 @@
-#! /usr/bin/python2.5
 # -*- encoding: utf-8 -*-
 
+'''
+CSS property propagation class.
+'''
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
+
 from __future__ import with_statement
 import sys
 import os
@@ -324,12 +331,12 @@ class Style(object):
     
     def _get(self, name):
         result = None
-        styles = self._stylizer._styles
         if name in self._style:
             result = self._style[name]
         if (result == 'inherit'
             or (result is None and name in INHERITED
                 and self._has_parent())):
+            styles = self._stylizer._styles
             result = styles[self._element.getparent()]._get(name)
         if result is None:
             result = DEFAULTS[name]
diff --git a/src/calibre/ebooks/lit/writer.py b/src/calibre/ebooks/lit/writer.py
index f374d49ddf..e5eea46baa 100644
--- a/src/calibre/ebooks/lit/writer.py
+++ b/src/calibre/ebooks/lit/writer.py
@@ -1,3 +1,11 @@
+'''
+Basic support for writing LIT files.
+'''
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
+
 from __future__ import with_statement
 import sys
 import os

From 052657e6af5b0667bace1582c6a81b7061a7c8f5 Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Fri, 12 Dec 2008 03:22:55 -0500
Subject: [PATCH 11/15] Python/C LZX Compressor good to go.  Fixed a minor bug
 in LIT HTML ugly-printing.

---
 setup.py                           |   1 +
 src/calibre/ebooks/lit/lzx.py      |  27 +++
 src/calibre/ebooks/lit/lzxcomp.py  | 173 -------------
 src/calibre/ebooks/lit/reader.py   |   4 +-
 src/calibre/ebooks/lit/stylizer.py |   1 -
 src/calibre/ebooks/lit/writer.py   |  17 +-
 src/calibre/utils/lzx/compressor.c | 375 +++++++++++++++++++++++++++++
 src/calibre/utils/lzx/lzc.c        |   7 +-
 src/calibre/utils/lzx/lzxc.c       |  76 +++---
 src/calibre/utils/lzx/lzxc.h       |  45 ++--
 src/calibre/utils/lzx/lzxmodule.c  |  56 +++--
 src/calibre/utils/lzx/lzxmodule.h  |  15 ++
 12 files changed, 526 insertions(+), 271 deletions(-)
 create mode 100644 src/calibre/ebooks/lit/lzx.py
 delete mode 100644 src/calibre/ebooks/lit/lzxcomp.py
 create mode 100644 src/calibre/utils/lzx/compressor.c
 create mode 100644 src/calibre/utils/lzx/lzxmodule.h

diff --git a/setup.py b/setup.py
index e57491fea5..ef8159abf7 100644
--- a/setup.py
+++ b/setup.py
@@ -374,6 +374,7 @@ if __name__ == '__main__':
     ext_modules = [
                    Extension('calibre.plugins.lzx',
                              sources=['src/calibre/utils/lzx/lzxmodule.c',
+                                      'src/calibre/utils/lzx/compressor.c',
                                       'src/calibre/utils/lzx/lzxd.c',
                                       'src/calibre/utils/lzx/lzc.c',
                                       'src/calibre/utils/lzx/lzxc.c'],
diff --git a/src/calibre/ebooks/lit/lzx.py b/src/calibre/ebooks/lit/lzx.py
new file mode 100644
index 0000000000..f91f3871b7
--- /dev/null
+++ b/src/calibre/ebooks/lit/lzx.py
@@ -0,0 +1,27 @@
+'''
+LZX compression/decompression wrapper.
+'''
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
+
+import sys
+from calibre import plugins
+_lzx, LZXError = plugins['lzx']
+
+__all__ = ['Compressor', 'Decompressor', 'LZXError']
+
+Compressor = _lzx.Compressor
+
+class Decompressor(object):
+    def __init__(self, wbits):
+        self.wbits = wbits
+        self.blocksize = 1 << wbits
+        _lzx.init(wbits)
+
+    def decompress(self, data, outlen):
+        return _lzx.decompress(data, outlen)
+
+    def reset(self):
+        return _lzx.reset()
diff --git a/src/calibre/ebooks/lit/lzxcomp.py b/src/calibre/ebooks/lit/lzxcomp.py
deleted file mode 100644
index badfa807fb..0000000000
--- a/src/calibre/ebooks/lit/lzxcomp.py
+++ /dev/null
@@ -1,173 +0,0 @@
-'''
-Higher-level LZX compression/decompression routines.
-'''
-from __future__ import with_statement
-
-__license__   = 'GPL v3'
-__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
-
-import sys
-import os
-from cStringIO import StringIO
-from ctypes import *
-from calibre import plugins
-_lzx, LzxError = plugins['lzx']
-
-__all__ = ['Compressor']
-
-class lzx_data(Structure):
-    pass
-    
-lzx_get_bytes_t = CFUNCTYPE(c_int, c_voidp, c_int, c_voidp)
-lzx_put_bytes_t = CFUNCTYPE(c_int, c_voidp, c_int, c_voidp)
-lzx_mark_frame_t = CFUNCTYPE(None, c_voidp, c_uint32, c_uint32)
-lzx_at_eof_t = CFUNCTYPE(c_int, c_voidp)
-
-class lzx_results(Structure):
-    _fields_ = [('len_compressed_output', c_long),
-                ('len_uncompressed_input', c_long)]
-
-# int lzx_init(struct lzx_data **lzxdp, int wsize_code, 
-#              lzx_get_bytes_t get_bytes, void *get_bytes_arg,
-#              lzx_at_eof_t at_eof,
-#              lzx_put_bytes_t put_bytes, void *put_bytes_arg,
-#              lzx_mark_frame_t mark_frame, void *mark_frame_arg);
-lzx_init_t = CFUNCTYPE(
-    c_int, POINTER(POINTER(lzx_data)), c_int, lzx_get_bytes_t, c_voidp,
-    lzx_at_eof_t, lzx_put_bytes_t, c_voidp, lzx_mark_frame_t, c_voidp)
-lzx_init = lzx_init_t(_lzx._lzxc_init)
-
-# void  lzx_reset(lzx_data *lzxd);
-lzx_reset_t = CFUNCTYPE(None, POINTER(lzx_data))
-lzx_reset = lzx_reset_t(_lzx._lzxc_reset)
-
-# int lzx_compress_block(lzx_data *lzxd, int block_size, int subdivide);
-lzx_compress_block_t = CFUNCTYPE(c_int, POINTER(lzx_data), c_int, c_int)
-lzx_compress_block = lzx_compress_block_t(_lzx._lzxc_compress_block)
-
-# int lzx_finish(struct lzx_data *lzxd, struct lzx_results *lzxr);
-lzx_finish_t = CFUNCTYPE(c_int, POINTER(lzx_data), POINTER(lzx_results))
-lzx_finish = lzx_finish_t(_lzx._lzxc_finish)
-
-
-class Compressor(object):
-    def __init__(self, wbits, reset=True):
-        self._reset = reset
-        self._blocksize = 1 << wbits
-        self._buffered = 0
-        self._input = StringIO()
-        self._output = StringIO()
-        self._flushing = False
-        self._rtable = []
-        self._get_bytes = lzx_get_bytes_t(self._get_bytes)
-        self._at_eof = lzx_at_eof_t(self._at_eof)
-        self._put_bytes = lzx_put_bytes_t(self._put_bytes)
-        self._mark_frame = lzx_mark_frame_t(self._mark_frame)
-        self._lzx = POINTER(lzx_data)()
-        self._results = lzx_results()
-        rv = lzx_init(self._lzx, wbits, self._get_bytes, c_voidp(),
-                      self._at_eof, self._put_bytes, c_voidp(),
-                      self._mark_frame, c_voidp())
-        if rv != 0:
-            raise LzxError("lzx_init() failed with %d" % rv)
-
-    def _add_input(self, data):
-        self._input.seek(0, 2)
-        self._input.write(data)
-        self._input.seek(0)
-        self._buffered += len(data)
-
-    def _reset_input(self):
-        data = self._input.read()
-        self._input.seek(0)
-        self._input.truncate()
-        self._input.write(data)
-        self._input.seek(0)
-
-    def _reset_output(self):
-        data = self._output.getvalue()
-        self._output.seek(0)
-        self._output.truncate()
-        return data
-
-    def _reset_rtable(self):
-        rtable = list(self._rtable)
-        del self._rtable[:]
-        return rtable
-        
-    def _get_bytes(self, arg, n, buf):
-        data = self._input.read(n)
-        memmove(buf, data, len(data))
-        self._buffered -= len(data)
-        return len(data)
-
-    def _put_bytes(self, arg, n, buf):
-        self._output.write(string_at(buf, n))
-        return n
-
-    def _at_eof(self, arg):
-        if self._flushing and self._buffered == 0:
-            return 1
-        return 0
-
-    def _mark_frame(self, arg, uncomp, comp):
-        self._rtable.append((uncomp, comp))
-        return
-
-    def _compress_block(self):
-        rv = lzx_compress_block(self._lzx, self._blocksize, 1)
-        if rv != 0:
-            raise LzxError("lzx_compress_block() failed with %d" % rv)
-        if self._reset:
-            lzx_reset(self._lzx)        
-    
-    def compress(self, data, flush=False):
-        self._add_input(data)
-        self._flushing = flush
-        while self._buffered >= self._blocksize:
-            self._compress_block()
-        if self._buffered > 0 and flush:
-            self._compress_block()
-        self._reset_input()
-        data = self._reset_output()
-        rtable = self._reset_rtable()
-        return (data, rtable)
-
-    def flush(self):
-        self._flushing = True
-        if self._buffered > 0:
-            self._compress_block()
-            self._reset_input()
-        data = self._reset_output()
-        rtable = self._reset_rtable()
-        return (data, rtable)
-
-    def close(self):
-        if self._lzx:
-            lzx_finish(self._lzx, self._results)
-            self._lzx = None
-        pass
-    
-    def __enter__(self):
-        return self
-
-    def __exit__(self, *exc_info):
-        self.close()
-
-    def __del__(self):
-        self.close()
-
-
-def main(argv=sys.argv):
-    wbits, inf, outf = argv[1:]
-    with open(inf, 'rb') as f:
-        data = f.read()
-    with Compressor(int(wbits)) as lzx:
-        data, rtable = lzx.compress(data, flush=True)
-    print rtable
-    with open(outf, 'wb') as f:
-        f.write(data)
-    return 0
-    
-if __name__ == '__main__':
-    sys.exit(main())
diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py
index c4f854ae10..90df14e2c0 100644
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@@ -783,7 +783,7 @@ class LitReader(object):
                     try:
                         result.append(
                             lzx.decompress(content[base:size], window_bytes))
-                    except lzx.LzxError:
+                    except lzx.LZXError:
                         self._warn("LZX decompression error; skipping chunk")
                     bytes_remaining -= window_bytes
                     base = size
@@ -793,7 +793,7 @@ class LitReader(object):
             lzx.reset()
             try:
                 result.append(lzx.decompress(content[base:], bytes_remaining))
-            except lzx.LzxError:
+            except lzx.LZXError:
                 self._warn("LZX decompression error; skipping chunk")
             bytes_remaining = 0
         if bytes_remaining > 0:
diff --git a/src/calibre/ebooks/lit/stylizer.py b/src/calibre/ebooks/lit/stylizer.py
index 47018201ae..ae89d8f390 100644
--- a/src/calibre/ebooks/lit/stylizer.py
+++ b/src/calibre/ebooks/lit/stylizer.py
@@ -8,7 +8,6 @@ from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
 
-from __future__ import with_statement
 import sys
 import os
 import locale
diff --git a/src/calibre/ebooks/lit/writer.py b/src/calibre/ebooks/lit/writer.py
index e5eea46baa..2777fceba1 100644
--- a/src/calibre/ebooks/lit/writer.py
+++ b/src/calibre/ebooks/lit/writer.py
@@ -6,7 +6,6 @@ from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
 
-from __future__ import with_statement
 import sys
 import os
 from cStringIO import StringIO
@@ -23,12 +22,12 @@ from urllib import unquote as urlunquote
 from lxml import etree
 from calibre.ebooks.lit.reader import msguid, DirectoryEntry
 import calibre.ebooks.lit.maps as maps
-from calibre.ebooks.lit.oeb import OEB_STYLES, OEB_CSS_MIME, CSS_MIME, \
-    XHTML_MIME, OPF_MIME, XML_NS, XML
+from calibre.ebooks.lit.oeb import OEB_DOCS, OEB_STYLES, OEB_CSS_MIME, \
+    CSS_MIME, XHTML_MIME, OPF_MIME, XML_NS, XML
 from calibre.ebooks.lit.oeb import namespace, barename, urlnormalize
 from calibre.ebooks.lit.oeb import OEBBook
 from calibre.ebooks.lit.stylizer import Stylizer
-from calibre.ebooks.lit.lzxcomp import Compressor
+from calibre.ebooks.lit.lzx import Compressor
 import calibre
 from calibre import plugins
 msdes, msdeserror = plugins['msdes']
@@ -104,7 +103,7 @@ LZXC_CONTROL = \
     "\x04\x00\x00\x00\x02\x00\x00\x00" \
     "\x00\x00\x00\x00\x00\x00\x00\x00"
 
-COLLAPSE = re.compile(r'[ \r\n\v]+')
+COLLAPSE = re.compile(r'[ \t\r\n\v]+')
 
 def prefixname(name, nsrmap):
     prefix = nsrmap[namespace(name)]
@@ -228,8 +227,9 @@ class ReBinary(object):
         if elem.text:
             if preserve:
                 self.write(elem.text)
-            elif len(elem) > 0 or not elem.text.isspace():
+            elif len(elem) == 0 or not elem.text.isspace():
                 self.write(COLLAPSE.sub(' ', elem.text))
+            # else: de nada
         parents.append(tag_offset)
         child = cstyle = nstyle = None
         for next in chain(elem, [None]):
@@ -423,6 +423,7 @@ class LitWriter(object):
         self._add_folder('/data')
         for item in self._oeb.manifest.values():
             if item.media_type not in LIT_MIMES:
+                print "WARNING: excluding item %r" % item.href
                 continue
             name = '/data/' + item.id
             data = item.data
@@ -563,8 +564,8 @@ class LitWriter(object):
                     cdata = LZXC_CONTROL + cdata
                     if not data: continue
                     unlen = len(data)
-                    with Compressor(17) as lzx:
-                        data, rtable = lzx.compress(data, flush=True)
+                    lzx = Compressor(17)
+                    data, rtable = lzx.compress(data, flush=True)
                     rdata = StringIO()
                     rdata.write(pack('<IIIIQQQQ',
                         3, len(rtable), 8, 0x28, unlen, len(data), 0x8000, 0))
diff --git a/src/calibre/utils/lzx/compressor.c b/src/calibre/utils/lzx/compressor.c
new file mode 100644
index 0000000000..ee4436d4d6
--- /dev/null
+++ b/src/calibre/utils/lzx/compressor.c
@@ -0,0 +1,375 @@
+/* __license__   = 'GPL v3'
+ * __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
+ *
+ * Python/C implementation of an LZX compressor type.
+ */
+
+#include <Python.h>
+#include <structmember.h>
+#include <lzxc.h>
+#include <lzxmodule.h>
+
+#define BUFFER_INIT(buffer)                                             \
+    do {                                                                \
+        (buffer).data = NULL;                                           \
+        (buffer).size = 0;                                              \
+        (buffer).offset = 0;                                            \
+    } while (0)
+
+#define COMPRESSOR_REMAINING(compressor)                                \
+    (((compressor)->residue.size - (compressor)->residue.offset)        \
+     + ((compressor)->input.size - (compressor)->input.offset))
+
+typedef struct buffer_t {
+    void *data;
+    unsigned int size;
+    unsigned int offset;
+} buffer_t;
+
+typedef struct Compressor {
+    PyObject_HEAD
+    int reset;
+    int wbits;
+    int blocksize;
+    int flushing;
+    struct lzxc_data *stream;
+    buffer_t residue;
+    buffer_t input;
+    buffer_t output;
+    PyObject *rtable;
+} Compressor;
+
+static PyMemberDef Compressor_members[] = {
+    { "reset", T_INT, offsetof(Compressor, reset), READONLY,
+      "whether or not the Compressor resets each block" },
+    { "wbits", T_INT, offsetof(Compressor, wbits), READONLY,
+      "window size in bits" },
+    { "blocksize", T_INT, offsetof(Compressor, blocksize), READONLY,
+      "block size in bytes" },
+    { NULL }
+};
+
+static int
+Compressor_traverse(Compressor *self, visitproc visit, void *arg)
+{
+    Py_VISIT(self->rtable);
+    return 0;
+}
+
+static int
+Compressor_clear(Compressor *self)
+{
+    Py_CLEAR(self->rtable);
+    return 0;
+}
+
+static void
+Compressor_dealloc(Compressor *self)
+{
+    Compressor_clear(self);
+    
+    if (self->stream) {
+        lzxc_finish(self->stream, NULL);
+        self->stream = NULL;
+    }
+    if (self->residue.data) {
+        PyMem_Free(self->residue.data);
+        self->residue.data = NULL;
+    }
+    if (self->output.data) {
+        PyMem_Free(self->output.data);
+        self->output.data = NULL;
+    }
+
+    self->ob_type->tp_free((PyObject *)self);
+}
+
+static PyObject *
+Compressor_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+    Compressor *self = NULL;
+
+    self = (Compressor *)type->tp_alloc(type, 0);
+    if (self != NULL) {
+        self->rtable = PyList_New(0);
+        if (self->rtable == NULL) {
+            Py_DECREF(self);
+            return NULL;
+        }
+        self->wbits = 0;
+        self->blocksize = 0;
+        self->flushing = 0;
+        
+        BUFFER_INIT(self->residue);
+        BUFFER_INIT(self->input);
+        BUFFER_INIT(self->output);
+    }
+    
+    return (PyObject *)self;
+}
+
+static int
+get_bytes(void *context, int nbytes, void *buf) 
+{
+    Compressor *self = (Compressor *)context;
+    unsigned char *data = (unsigned char *)buf;
+    buffer_t *residue = &self->residue;
+    buffer_t *input = &self->input;
+    int resrem = residue->size - residue->offset;
+    int inrem = input->size - input->offset;
+
+    if (resrem > 0) {
+        if (resrem <= nbytes) {
+            memcpy(data, residue->data + residue->offset, nbytes);
+            residue->offset += nbytes;
+            return nbytes;
+        } else {
+            memcpy(data, residue->data + residue->offset, resrem);
+            residue->offset += resrem;
+            data += resrem;
+            nbytes -= resrem;
+        }
+    }
+    
+    if (inrem == 0) {
+        return resrem;
+    } else if (nbytes > inrem) {
+        nbytes = inrem;
+    }
+    memcpy(data, input->data + input->offset, nbytes);
+    input->offset += nbytes;
+
+    return nbytes + resrem;
+}
+
+static int
+at_eof(void *context) 
+{
+    Compressor *self = (Compressor *)context;
+    return (self->flushing && (COMPRESSOR_REMAINING(self) == 0));
+}
+
+static int
+put_bytes(void *context, int nbytes, void *data)
+{
+    Compressor *self = (Compressor *)context;
+    buffer_t *output = &self->output;
+    int remaining = output->size - output->offset;
+
+    if (nbytes > remaining) {
+        PyErr_SetString(LZXError,
+            "Attempt to write compressed data beyond end of buffer");
+        nbytes = remaining;
+    }
+
+    memcpy(output->data + output->offset, data, nbytes);
+    output->offset += nbytes;
+
+    return nbytes;
+}
+
+static void
+mark_frame(void *context, uint32_t uncomp, uint32_t comp)
+{
+    Compressor *self = (Compressor *)context;
+    PyObject *rtable = self->rtable;
+    PyObject *entry = NULL;
+
+    entry = Py_BuildValue("(LL)", uncomp, comp);
+    if (entry) {
+        PyList_Append(rtable, entry);
+        Py_DECREF(entry);
+    }
+}
+
+static int
+Compressor_init(Compressor *self, PyObject *args, PyObject *kwds)
+{
+    static char *kwlist[] = {"wbits", "reset", NULL};
+    int wbits = 0;
+    int retval = 0;
+
+    self->reset = 1;
+    
+    if (!PyArg_ParseTupleAndKeywords(
+            args, kwds, "I|b", kwlist, &wbits, &self->reset)) {
+        return -1; 
+    }
+    /* TODO: check window size. */
+
+    self->wbits = wbits;
+    self->blocksize = 1 << wbits;
+
+    self->residue.data = PyMem_Realloc(self->residue.data, self->blocksize);
+    if (self->residue.data == NULL) {
+        PyErr_NoMemory();
+        return -1;
+    }
+
+    if (self->stream != NULL) {
+        lzxc_finish(self->stream, NULL);
+    }
+    retval = lzxc_init(&self->stream, wbits, get_bytes, self, at_eof,
+                       put_bytes, self, mark_frame, self);
+    if (retval != 0) {
+        self->stream = NULL;
+        PyErr_SetString(LZXError, "Failed to create compression stream");
+        return -1;
+    }
+    
+    return 0;
+}
+
+static PyObject *
+Compressor_compress__(
+    Compressor *self, unsigned char *data, unsigned int inlen, int flush)
+{
+    buffer_t *residue = &self->residue;
+    buffer_t *input = &self->input;
+    buffer_t *output = &self->output;
+    unsigned int outlen, remainder;
+    int reset = self->reset;
+    unsigned int blocksize = self->blocksize;
+    int retval = 0;
+    PyObject *cdata = NULL;
+    PyObject *rtable = NULL;
+    PyObject *result = NULL;
+    
+    self->flushing = flush;
+    input->data = data;
+    input->size = inlen;
+    input->offset = 0;
+    
+    outlen = inlen;
+    remainder = outlen % blocksize;
+    if (remainder != 0) {
+        outlen += (blocksize - remainder) + 1;
+    }
+    if (output->size < outlen) {
+        output->data = PyMem_Realloc(output->data, outlen);
+        if (output->data == NULL) {
+            return PyErr_NoMemory();
+        }
+        output->size = outlen;
+    }
+    output->offset = 0;
+
+    while (COMPRESSOR_REMAINING(self) >= blocksize) {
+        retval = lzxc_compress_block(self->stream, blocksize, 1);
+        if (retval != 0) {
+            PyErr_SetString(LZXError, "Error during compression");
+            return NULL;
+        }
+        if (reset) {
+            lzxc_reset(self->stream);
+        }
+    }
+    if (flush && COMPRESSOR_REMAINING(self) > 0) {
+        retval = lzxc_compress_block(self->stream, blocksize, 1);
+        if (retval != 0) {
+            PyErr_SetString(LZXError, "Error during compression");
+            return NULL;
+        }
+        if (reset) {
+            lzxc_reset(self->stream);
+        }
+        residue->size = 0;
+        residue->offset = 0;
+    } else {
+        int reslen = input->size - input->offset;
+        memcpy(residue->data, input->data + input->offset, reslen);
+        residue->size = reslen;
+        residue->offset = 0;
+    }
+
+    rtable = self->rtable;
+    self->rtable = PyList_New(0);
+    if (self->rtable == NULL) {
+        self->rtable = rtable;
+        return NULL;
+    }
+    cdata = PyString_FromStringAndSize(output->data, output->offset);
+    if (cdata == NULL) {
+        Py_DECREF(rtable);
+        return NULL;
+    }
+
+    result = Py_BuildValue("(OO)", cdata, rtable);
+    Py_DECREF(rtable);
+    Py_DECREF(cdata);
+
+    return result;
+}
+
+static PyObject *
+Compressor_compress(Compressor *self, PyObject *args, PyObject *kwds)
+{
+    static char *kwlist[] = {"data", "flush", NULL};
+    unsigned char *data = NULL;
+    unsigned int inlen = 0;
+    int flush = 0;
+
+    if (!PyArg_ParseTupleAndKeywords(
+            args, kwds, "s#|b", kwlist, &data, &inlen, &flush)) {
+        return NULL;
+    }
+
+    return Compressor_compress__(self, data, inlen, flush);
+}
+
+static PyObject *
+Compressor_flush(Compressor *self)
+{
+    return Compressor_compress__(self, NULL, 0, 1);
+}
+
+static PyMethodDef Compressor_methods[] = {
+    { "compress", (PyCFunction)Compressor_compress,
+      METH_VARARGS | METH_KEYWORDS,
+      "Return a string containing data LZX compressed." },
+    { "flush", (PyCFunction)Compressor_flush, METH_NOARGS,
+      "Return a string containing any remaining LZX compressed data." },
+    { NULL }
+};
+
+PyTypeObject CompressorType = {
+    PyObject_HEAD_INIT(NULL)
+    0,                                 /*ob_size*/
+    "lzx.Compressor",                  /*tp_name*/
+    sizeof(Compressor),                /*tp_basicsize*/
+    0,                                 /*tp_itemsize*/
+    (destructor)Compressor_dealloc,    /*tp_dealloc*/
+    0,                                 /*tp_print*/
+    0,                                 /*tp_getattr*/
+    0,                                 /*tp_setattr*/
+    0,                                 /*tp_compare*/
+    0,                                 /*tp_repr*/
+    0,                                 /*tp_as_number*/
+    0,                                 /*tp_as_sequence*/
+    0,                                 /*tp_as_mapping*/
+    0,                                 /*tp_hash */
+    0,                                 /*tp_call*/
+    0,                                 /*tp_str*/
+    0,                                 /*tp_getattro*/
+    0,                                 /*tp_setattro*/
+    0,                                 /*tp_as_buffer*/
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
+    "Compressor objects",              /* tp_doc */
+    (traverseproc)Compressor_traverse, /* tp_traverse */
+    (inquiry)Compressor_clear,         /* tp_clear */
+    0,		                       /* tp_richcompare */
+    0,		                       /* tp_weaklistoffset */
+    0,		                       /* tp_iter */
+    0,		                       /* tp_iternext */
+    Compressor_methods,                /* tp_methods */
+    Compressor_members,                /* tp_members */
+    0,                                 /* tp_getset */
+    0,                                 /* tp_base */
+    0,                                 /* tp_dict */
+    0,                                 /* tp_descr_get */
+    0,                                 /* tp_descr_set */
+    0,                                 /* tp_dictoffset */
+    (initproc)Compressor_init,         /* tp_init */
+    0,                                 /* tp_alloc */
+    Compressor_new,                    /* tp_new */
+};
diff --git a/src/calibre/utils/lzx/lzc.c b/src/calibre/utils/lzx/lzc.c
index 4ce6f24227..0709714d81 100644
--- a/src/calibre/utils/lzx/lzc.c
+++ b/src/calibre/utils/lzx/lzc.c
@@ -16,6 +16,11 @@
     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
 
+/* Force using (actually working) non-sliding version. */
+#define NONSLIDE 1
+#define LZ_ONEBUFFER 1
+#define LAZY 1
+
 /* 
  * Document here
  */
@@ -28,7 +33,7 @@
 #include <sys/time.h>
 #include <sys/resource.h>
 #endif
-#include <lzc.h>
+#include "lzc.h"
 
 #define MAX_MATCH 253
 #define MIN_MATCH 2
diff --git a/src/calibre/utils/lzx/lzxc.c b/src/calibre/utils/lzx/lzxc.c
index 445cf92767..c5ad518bc2 100644
--- a/src/calibre/utils/lzx/lzxc.c
+++ b/src/calibre/utils/lzx/lzxc.c
@@ -15,6 +15,12 @@
     along with this program; if not, write to the Free Software
     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
+
+/* Force using (actually working) non-sliding version. */
+#define NONSLIDE 1
+#define LZ_ONEBUFFER 1
+#define LAZY 1
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
@@ -22,11 +28,17 @@
 #include <assert.h>
 #include <math.h>
 
-#include <lzc.h>
-#include <lzxc.h>
+#if BYTE_ORDER == BIG_ENDIAN
+#define LZX_BIG_ENDIAN
+#endif
 
-/* Force using (actually working) non-sliding version. */
-#define NONSLIDE
+#ifdef NONSLIDE
+#include "lzc.h"
+#else
+#include "hash_slide.h"
+#include "lz_slide.h"
+#endif
+#include "lzxc.h"
 
 /* these named constants are from the Microsoft LZX documentation */
 #define MIN_MATCH                            2
@@ -35,6 +47,16 @@
 #define NUM_PRIMARY_LENGTHS                  7
 #define NUM_SECONDARY_LENGTHS              249
 
+/* the names of these constants are specific to this library */
+#define LZX_MAX_CODE_LENGTH                 16
+#define LZX_FRAME_SIZE                   32768
+#define LZX_PRETREE_SIZE                    20
+#define LZX_ALIGNED_BITS                     3
+#define LZX_ALIGNED_SIZE                     8
+
+#define LZX_VERBATIM_BLOCK                   1
+#define LZX_ALIGNED_OFFSET_BLOCK             2
+
 /* Debugging defines useful during development.  All add diagnostic output
    at various points in the system */
 
@@ -393,15 +415,15 @@ static void lzx_init_static(void)
   }
 }
 
-struct lzx_data
+struct lzxc_data
 {
   void *in_arg;
   void *out_arg;
   void *mark_frame_arg;
-  lzx_get_bytes_t get_bytes;
-  lzx_at_eof_t at_eof;
-  lzx_put_bytes_t put_bytes;
-  lzx_mark_frame_t mark_frame;
+  lzxc_get_bytes_t get_bytes;
+  lzxc_at_eof_t at_eof;
+  lzxc_put_bytes_t put_bytes;
+  lzxc_mark_frame_t mark_frame;
   struct lz_info *lzi;
   /* a 'frame' is an 0x8000 byte thing.  Called that because otherwise
      I'd confuse myself overloading 'block' */
@@ -439,7 +461,7 @@ lzx_get_chars(lz_info *lzi, int n, u_char *buf)
   int chars_read;
   int chars_pad;
 
-  lzx_data *lzud = (lzx_data *)lzi->user_data;
+  lzxc_data *lzud = (lzxc_data *)lzi->user_data;
 #ifdef OLDFRAMING
   if (lzud->subdivide < 0) return 0;
   if (n > lzud->left_in_frame)
@@ -534,7 +556,7 @@ static int find_match_at(lz_info *lzi, int loc, int match_len, int *match_locp)
   return -1;
 }
 #endif
-static void check_entropy(lzx_data *lzud, int main_index) 
+static void check_entropy(lzxc_data *lzud, int main_index) 
 {
   /* entropy = - sum_alphabet P(x) * log2 P(x) */
   /* entropy = - sum_alphabet f(x)/N * log2 (f(x)/N) */
@@ -599,7 +621,7 @@ static void check_entropy(lzx_data *lzud, int main_index)
 static int
 lzx_output_match(lz_info *lzi, int match_pos, int match_len)
 {
-  lzx_data *lzud = (lzx_data *)lzi->user_data;
+  lzxc_data *lzud = (lzxc_data *)lzi->user_data;
   uint32_t formatted_offset;
   uint32_t position_footer;
   uint8_t length_footer;
@@ -774,7 +796,7 @@ lzx_output_match(lz_info *lzi, int match_pos, int match_len)
 static void 
 lzx_output_literal(lz_info *lzi, u_char ch)
 {
-  lzx_data *lzud = (lzx_data *)lzi->user_data;
+  lzxc_data *lzud = (lzxc_data *)lzi->user_data;
 
 #ifndef OLDFRAMING
   lzud->left_in_block--;
@@ -788,7 +810,7 @@ lzx_output_literal(lz_info *lzi, u_char ch)
     check_entropy(lzud, ch);
 }
 
-static void lzx_write_bits(lzx_data *lzxd, int nbits, uint32_t bits)
+static void lzx_write_bits(lzxc_data *lzxd, int nbits, uint32_t bits)
 {
   int cur_bits;
   int shift_bits;
@@ -836,7 +858,7 @@ static void lzx_write_bits(lzx_data *lzxd, int nbits, uint32_t bits)
   lzxd->bits_in_buf = cur_bits;
 }
 
-static void lzx_align_output(lzx_data *lzxd)
+static void lzx_align_output(lzxc_data *lzxd)
 {
   if (lzxd->bits_in_buf) {
     lzx_write_bits(lzxd, 16 - lzxd->bits_in_buf, 0);
@@ -846,7 +868,7 @@ static void lzx_align_output(lzx_data *lzxd)
 }
 
 static void
-lzx_write_compressed_literals(lzx_data *lzxd, int block_type)
+lzx_write_compressed_literals(lzxc_data *lzxd, int block_type)
 {
   uint32_t *cursor = lzxd->block_codes;
   uint32_t *endp = lzxd->block_codesp;
@@ -931,7 +953,7 @@ lzx_write_compressed_literals(lzx_data *lzxd, int block_type)
 }
 
 static int 
-lzx_write_compressed_tree(struct lzx_data *lzxd,
+lzx_write_compressed_tree(struct lzxc_data *lzxd,
 			  struct huff_entry *tree, uint8_t *prevlengths,
 			  int treesize)
 {
@@ -1054,7 +1076,7 @@ lzx_write_compressed_tree(struct lzx_data *lzxd,
 }
 
 void 
-lzx_reset(lzx_data *lzxd)
+lzxc_reset(lzxc_data *lzxd)
 {
   lzxd->need_1bit_header = 1;
   lzxd->R0 = lzxd->R1 = lzxd->R2 = 1;
@@ -1063,7 +1085,7 @@ lzx_reset(lzx_data *lzxd)
   lz_reset(lzxd->lzi);
 }
 
-int lzx_compress_block(lzx_data *lzxd, int block_size, int subdivide)
+int lzxc_compress_block(lzxc_data *lzxd, int block_size, int subdivide)
 {
   int i;
   uint32_t written_sofar = 0;
@@ -1190,14 +1212,14 @@ int lzx_compress_block(lzx_data *lzxd, int block_size, int subdivide)
   return 0;
 }
 
-int lzx_init(struct lzx_data **lzxdp, int wsize_code, 
-	     lzx_get_bytes_t get_bytes, void *get_bytes_arg,
-	     lzx_at_eof_t at_eof,
-	     lzx_put_bytes_t put_bytes, void *put_bytes_arg,
-	     lzx_mark_frame_t mark_frame, void *mark_frame_arg)
+int lzxc_init(struct lzxc_data **lzxdp, int wsize_code, 
+	     lzxc_get_bytes_t get_bytes, void *get_bytes_arg,
+	     lzxc_at_eof_t at_eof,
+	     lzxc_put_bytes_t put_bytes, void *put_bytes_arg,
+	     lzxc_mark_frame_t mark_frame, void *mark_frame_arg)
 {
   int wsize;
-  struct lzx_data *lzxd;
+  struct lzxc_data *lzxd;
 
   if ((wsize_code < 15) || (wsize_code > 21)) {
     return -1;
@@ -1234,11 +1256,11 @@ int lzx_init(struct lzx_data **lzxdp, int wsize_code,
 	  lzx_get_chars, lzx_output_match, lzx_output_literal,lzxd);
   lzxd->len_uncompressed_input = 0;
   lzxd->len_compressed_output = 0;
-  lzx_reset(lzxd);
+  lzxc_reset(lzxd);
   return 0;
 }
 
-int lzx_finish(struct lzx_data *lzxd, struct lzx_results *lzxr)
+int lzxc_finish(struct lzxc_data *lzxd, struct lzxc_results *lzxr)
 {
   /*  lzx_align_output(lzxd);  Not needed as long as frame padding is in place */
   if (lzxr) {
diff --git a/src/calibre/utils/lzx/lzxc.h b/src/calibre/utils/lzx/lzxc.h
index 32cb1f721a..bb8407e0e2 100644
--- a/src/calibre/utils/lzx/lzxc.h
+++ b/src/calibre/utils/lzx/lzxc.h
@@ -15,43 +15,28 @@
     along with this program; if not, write to the Free Software
     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
+typedef struct lzxc_data lzxc_data;
+typedef int (*lzxc_get_bytes_t)(void *arg, int n, void *buf);
+typedef int (*lzxc_put_bytes_t)(void *arg, int n, void *buf);
+typedef void (*lzxc_mark_frame_t)(void *arg, uint32_t uncomp, uint32_t comp);
+typedef int (*lzxc_at_eof_t)(void *arg);
 
-#if BYTE_ORDER == BIG_ENDIAN
-# define LZX_BIG_ENDIAN
-#endif
-
-/* the names of these constants are specific to this library */
-#define LZX_MAX_CODE_LENGTH                 16
-#define LZX_FRAME_SIZE                   32768
-#define LZX_PRETREE_SIZE                    20
-#define LZX_ALIGNED_BITS                     3
-#define LZX_ALIGNED_SIZE                     8
-
-#define LZX_VERBATIM_BLOCK                   1
-#define LZX_ALIGNED_OFFSET_BLOCK             2
-
-typedef struct lzx_data lzx_data;
-typedef int (*lzx_get_bytes_t)(void *arg, int n, void *buf);
-typedef int (*lzx_put_bytes_t)(void *arg, int n, void *buf);
-typedef void (*lzx_mark_frame_t)(void *arg, uint32_t uncomp, uint32_t comp);
-typedef int (*lzx_at_eof_t)(void *arg);
-
-typedef struct lzx_results
+typedef struct lzxc_results
 {
   /* add more here? Error codes, # blocks, # frames, etc? */
   long len_compressed_output;
   long len_uncompressed_input;
-} lzx_results;
+} lzxc_results;
 
-int lzx_init(struct lzx_data **lzxdp, int wsize_code, 
-	     lzx_get_bytes_t get_bytes, void *get_bytes_arg,
-	     lzx_at_eof_t at_eof,
-	     lzx_put_bytes_t put_bytes, void *put_bytes_arg,
-	     lzx_mark_frame_t mark_frame, void *mark_frame_arg);
+int lzxc_init(struct lzxc_data **lzxdp, int wsize_code, 
+	     lzxc_get_bytes_t get_bytes, void *get_bytes_arg,
+	     lzxc_at_eof_t at_eof,
+	     lzxc_put_bytes_t put_bytes, void *put_bytes_arg,
+	     lzxc_mark_frame_t mark_frame, void *mark_frame_arg);
 
-void  lzx_reset(lzx_data *lzxd);
+void lzxc_reset(lzxc_data *lzxd);
 
-int lzx_compress_block(lzx_data *lzxd, int block_size, int subdivide);
+int lzxc_compress_block(lzxc_data *lzxd, int block_size, int subdivide);
 
-int lzx_finish(struct lzx_data *lzxd, struct lzx_results *lzxr);
+int lzxc_finish(struct lzxc_data *lzxd, struct lzxc_results *lzxr);
 
diff --git a/src/calibre/utils/lzx/lzxmodule.c b/src/calibre/utils/lzx/lzxmodule.c
index 2f72b58ae7..8b4b1f7c9b 100644
--- a/src/calibre/utils/lzx/lzxmodule.c
+++ b/src/calibre/utils/lzx/lzxmodule.c
@@ -8,22 +8,23 @@
 
 #include <mspack.h>
 #include <lzxd.h>
-#include <lzxc.h>
+
+#include <lzxmodule.h>
 
 static char lzx_doc[] = 
     "Provide basic LZX compression and decompression using the code from\n"
     "liblzxcomp and libmspack respectively.";
 
-static PyObject *LzxError = NULL;
+PyObject *LZXError = NULL;
 
 typedef struct memory_file {
     unsigned int magic;	/* 0xB5 */
-    void * buffer;
+    void *buffer;
     int total_bytes;
     int current_bytes;
 } memory_file;
 
-void *
+static void *
 glue_alloc(struct mspack_system *this, size_t bytes)
 {
     void *p = NULL;
@@ -34,33 +35,33 @@ glue_alloc(struct mspack_system *this, size_t bytes)
     return p;
 }
 
-void
+static void
 glue_free(void *p)
 {
     free(p);
 }
 
-void
+static void
 glue_copy(void *src, void *dest, size_t bytes)
 {
     memcpy(dest, src, bytes);
 }
 
-struct mspack_file *
+static struct mspack_file *
 glue_open(struct mspack_system *this, char *filename, int mode)
 {
-    PyErr_SetString(LzxError, "MSPACK_OPEN unsupported");
+    PyErr_SetString(LZXError, "MSPACK_OPEN unsupported");
     return NULL;
 }
 
-void
+static void
 glue_close(struct mspack_file *file)
 {
     return;
 }
 
-int
-glue_read(struct mspack_file *file, void * buffer, int bytes)
+static int
+glue_read(struct mspack_file *file, void *buffer, int bytes)
 {
     memory_file *mem;
     int remaining;
@@ -77,8 +78,8 @@ glue_read(struct mspack_file *file, void * buffer, int bytes)
     return bytes;
 }
 
-int
-glue_write(struct mspack_file * file, void * buffer, int bytes)
+static int
+glue_write(struct mspack_file *file, void *buffer, int bytes)
 {
     memory_file *mem;
     int remaining;
@@ -87,9 +88,8 @@ glue_write(struct mspack_file * file, void * buffer, int bytes)
     if (mem->magic != 0xB5) return -1;
   
     remaining = mem->total_bytes - mem->current_bytes;
-    if (!remaining)  return 0;
     if (bytes > remaining) {
-        PyErr_SetString(LzxError,
+        PyErr_SetString(LZXError,
             "MSPACK_WRITE tried to write beyond end of buffer");
         bytes = remaining;
     }
@@ -189,7 +189,7 @@ decompress(PyObject *self, PyObject *args)
     if (err != MSPACK_ERR_OK) {
         Py_DECREF(retval);
         retval = NULL;
-        PyErr_SetString(LzxError, "LZX decompression failed");
+        PyErr_SetString(LZXError, "LZX decompression failed");
     }
     
     return retval;
@@ -199,7 +199,7 @@ static PyMethodDef lzx_methods[] = {
     { "init", &init, METH_VARARGS, "Initialize the LZX decompressor" },
     { "reset", &reset, METH_VARARGS, "Reset the LZX decompressor" },
     { "decompress", &decompress, METH_VARARGS, "Run the LZX decompressor" },
-    { NULL, NULL }
+    { NULL }
 };
 
 PyMODINIT_FUNC
@@ -207,23 +207,21 @@ initlzx(void)
 {
     PyObject *m;
 
+    if (PyType_Ready(&CompressorType) < 0) {
+        return;
+    }
+
     m = Py_InitModule3("lzx", lzx_methods, lzx_doc);
     if (m == NULL) {
         return;
     }
     
-    LzxError = PyErr_NewException("lzx.LzxError", NULL, NULL);
-    Py_INCREF(LzxError);
-    PyModule_AddObject(m, "LzxError", LzxError);
+    LZXError = PyErr_NewException("lzx.LZXError", NULL, NULL);
+    Py_INCREF(LZXError);
+    PyModule_AddObject(m, "LZXError", LZXError);
+
+    Py_INCREF(&CompressorType);
+    PyModule_AddObject(m, "Compressor", (PyObject *)&CompressorType);
 
-    PyModule_AddObject(m, "_lzxc_init",
-        Py_BuildValue("k", (unsigned long)lzx_init));
-    PyModule_AddObject(m, "_lzxc_reset",
-        Py_BuildValue("k", (unsigned long)lzx_reset));
-    PyModule_AddObject(m, "_lzxc_compress_block",
-        Py_BuildValue("k", (unsigned long)lzx_compress_block));
-    PyModule_AddObject(m, "_lzxc_finish",
-        Py_BuildValue("k", (unsigned long)lzx_finish));
-    
     return;
 }
diff --git a/src/calibre/utils/lzx/lzxmodule.h b/src/calibre/utils/lzx/lzxmodule.h
new file mode 100644
index 0000000000..d146694fd7
--- /dev/null
+++ b/src/calibre/utils/lzx/lzxmodule.h
@@ -0,0 +1,15 @@
+/* __license__   = 'GPL v3'
+ * __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
+ *
+ * Common declarations for Python module C glue code.
+ */
+
+#include <Python.h>
+
+#ifndef LZXMODULE_H
+#define LZXMODULE_H
+
+extern PyObject *LZXError;
+extern PyTypeObject CompressorType;
+
+#endif /* LZXMODULE_H */

From 89a025ed76caad6767856109125e5c7c8f3126d0 Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Fri, 12 Dec 2008 09:07:30 -0500
Subject: [PATCH 12/15] Changes to LIT-writing support:   - Locate and
 associate cover images in metadata.   - Clean up warnings and language.   -
 Shift to modifying OEBBook object instead of generated OPF.

---
 src/calibre/ebooks/lit/oeb.py    |  9 +++---
 src/calibre/ebooks/lit/writer.py | 53 ++++++++++++++++++++++++++------
 2 files changed, 48 insertions(+), 14 deletions(-)

diff --git a/src/calibre/ebooks/lit/oeb.py b/src/calibre/ebooks/lit/oeb.py
index 2a553006ab..39b0c286e5 100644
--- a/src/calibre/ebooks/lit/oeb.py
+++ b/src/calibre/ebooks/lit/oeb.py
@@ -113,15 +113,14 @@ class Metadata(object):
     
     class Item(object):
         def __init__(self, term, value, fq_attrib={}):
+            self.fq_attrib = dict(fq_attrib)
             if term == OPF('meta') and not value:
-                fq_attrib = dict(fq_attrib)
-                term = fq_attrib.pop('name')
-                value = fq_attrib.pop('content')
+                term = self.fq_attrib.pop('name')
+                value = self.fq_attrib.pop('content')
             elif term in Metadata.TERMS and not namespace(term):
                 term = DC(term)
             self.term = term
             self.value = value
-            self.fq_attrib = dict(fq_attrib)
             self.attrib = attrib = {}
             for fq_attr in fq_attrib:
                 attr = barename(fq_attr)
@@ -171,7 +170,7 @@ class Metadata(object):
         self.oeb = oeb
         self.items = defaultdict(list)
 
-    def add(self, term, value, attrib):
+    def add(self, term, value, attrib={}):
         item = self.Item(term, value, attrib)
         items = self.items[barename(term)]
         items.append(item)
diff --git a/src/calibre/ebooks/lit/writer.py b/src/calibre/ebooks/lit/writer.py
index 2777fceba1..75e6c68adc 100644
--- a/src/calibre/ebooks/lit/writer.py
+++ b/src/calibre/ebooks/lit/writer.py
@@ -20,11 +20,12 @@ import functools
 from urlparse import urldefrag
 from urllib import unquote as urlunquote
 from lxml import etree
+from calibre.ebooks.lit import LitError
 from calibre.ebooks.lit.reader import msguid, DirectoryEntry
 import calibre.ebooks.lit.maps as maps
 from calibre.ebooks.lit.oeb import OEB_DOCS, OEB_STYLES, OEB_CSS_MIME, \
     CSS_MIME, XHTML_MIME, OPF_MIME, XML_NS, XML
-from calibre.ebooks.lit.oeb import namespace, barename, urlnormalize
+from calibre.ebooks.lit.oeb import namespace, barename, urlnormalize, xpath
 from calibre.ebooks.lit.oeb import OEBBook
 from calibre.ebooks.lit.stylizer import Stylizer
 from calibre.ebooks.lit.lzx import Compressor
@@ -38,6 +39,14 @@ __all__ = ['LitWriter']
 LIT_IMAGES = set(['image/png', 'image/jpeg', 'image/gif'])
 LIT_MIMES = OEB_DOCS | OEB_STYLES | LIT_IMAGES
 
+MS_COVER_TYPE = 'other.ms-coverimage-standard'
+ALL_MS_COVER_TYPES = [
+    (MS_COVER_TYPE, 'Standard cover image'),
+    ('other.ms-thumbimage-standard', 'Standard thumbnail image'),
+    ('other.ms-coverimage', 'PocketPC cover image'),
+    ('other.ms-thumbimage', 'PocketPC thumbnail image'),
+    ]
+
 def invert_tag_map(tag_map):
     tags, dattrs, tattrs = tag_map
     tags = dict((tags[i], i) for i in xrange(len(tags)))
@@ -130,6 +139,7 @@ class ReBinary(object):
     NSRMAP = {'': None, XML_NS: 'xml'}
     
     def __init__(self, root, path, oeb, map=HTML_MAP):
+        self.path = path
         self.dir = os.path.dirname(path)
         self.manifest = oeb.manifest
         self.tags, self.tattrs = map
@@ -140,8 +150,8 @@ class ReBinary(object):
         self.stylizer = Stylizer(root, path, oeb) if is_html else None
         self.tree_to_binary(root)
         self.content = self.buf.getvalue()
-        self.ahc = self.build_ahc()
-        self.aht = self.build_aht()
+        self.ahc = self.build_ahc() if is_html else None
+        self.aht = self.build_aht() if is_html else None
 
     def write(self, *values):
         for value in values:
@@ -257,6 +267,9 @@ class ReBinary(object):
             self.page_breaks.append((self.buf.tell(), list(parents)))
 
     def build_ahc(self):
+        if len(self.anchors) > 6:
+            print "calibre: warning: More than six anchors in file %r. " \
+                "Some links may not work properly." % self.path
         data = StringIO()
         data.write(unichr(len(self.anchors)).encode('utf-8'))
         for anchor, offset in self.anchors:
@@ -282,6 +295,31 @@ def preserve(function):
 class LitWriter(object):
     def __init__(self, oeb):
         self._oeb = oeb
+        self._litize_oeb()
+
+    def _litize_oeb(self):
+        oeb = self._oeb
+        oeb.metadata.add('calibre-oeb2lit-version', calibre.__version__)
+        cover = None
+        if oeb.metadata.cover:
+            id = str(oeb.metadata.cover[0])
+            cover = oeb.manifest[id]
+        elif MS_COVER_TYPE in oeb.guide:
+            href = oeb.guide[MS_COVER_TYPE].href
+            cover = oeb.manifest.hrefs[href]
+        else:
+            html = oeb.spine[0].data
+            imgs = xpath(html, '//img[position()=1]')
+            href = imgs[0].get('src') if imgs else None
+            cover = oeb.manifest.hrefs[href] if href else None
+        if cover:
+            if not oeb.metadata.cover:
+                oeb.metadata.add('cover', cover.id)
+            for type, title in ALL_MS_COVER_TYPES:
+                if type not in oeb.guide:
+                    oeb.guide.add(type, title, cover.href)
+        else:
+            print "calibre: warning: No suitable cover image found."
 
     def dump(self, stream):
         self._stream = stream
@@ -423,7 +461,8 @@ class LitWriter(object):
         self._add_folder('/data')
         for item in self._oeb.manifest.values():
             if item.media_type not in LIT_MIMES:
-                print "WARNING: excluding item %r" % item.href
+                print "calibre: warning: File %r of unknown media-type %r " \
+                    "excluded from output." % (item.href, item.media_type)
                 continue
             name = '/data/' + item.id
             data = item.data
@@ -506,10 +545,6 @@ class LitWriter(object):
         
     def _build_meta(self):
         _, meta = self._oeb.to_opf1()[OPF_MIME]
-        xmetadata, = meta.xpath('/package/metadata/x-metadata')
-        etree.SubElement(xmetadata, 'meta', attrib={
-            'name': 'calibre-oeb2lit-version',
-            'content': calibre.__version__})
         meta.attrib['ms--minimum_level'] = '0'
         meta.attrib['ms--attr5'] = '1'
         meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper()
@@ -519,7 +554,7 @@ class LitWriter(object):
         self._add_file('/meta', meta)
         
     def _build_drm_storage(self):
-        drmsource = u'Fuck Microsoft\0'.encode('utf-16-le')
+        drmsource = u'Free as in freedom\0'.encode('utf-16-le')
         self._add_file('/DRMStorage/DRMSource', drmsource)
         tempkey = self._calculate_deskey([self._meta, drmsource])
         msdes.deskey(tempkey, msdes.EN0)

From c6d7e31ea185da3f841f67dace3fc3aed4f68da3 Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Fri, 12 Dec 2008 10:10:48 -0500
Subject: [PATCH 13/15] Fix stupid buf in NCX-generation.

---
 src/calibre/ebooks/lit/oeb.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/lit/oeb.py b/src/calibre/ebooks/lit/oeb.py
index 39b0c286e5..18eca37720 100644
--- a/src/calibre/ebooks/lit/oeb.py
+++ b/src/calibre/ebooks/lit/oeb.py
@@ -462,9 +462,9 @@ class TOC(object):
             point = etree.SubElement(parent,
                 NCX('navPoint'), attrib={'playOrder': str(playorder[0])})
             if self.klass:
-                point.attrib['class'] = self.klass
+                point.attrib['class'] = node.klass
             if self.id:
-                point.attrib['id'] = self.id
+                point.attrib['id'] = node.id
             label = etree.SubElement(point, NCX('navLabel'))
             etree.SubElement(label, NCX('text')).text = node.title
             href = node.href if depth > 1 else urldefrag(node.href)[0]

From 04ebf1ec201cdb6d8fadb5df2917ff95aaea03cf Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Fri, 12 Dec 2008 10:45:49 -0500
Subject: [PATCH 14/15] Fix a metadata error.  Add parsing @import queries.

---
 src/calibre/ebooks/lit/oeb.py      | 2 +-
 src/calibre/ebooks/lit/stylizer.py | 8 +++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/calibre/ebooks/lit/oeb.py b/src/calibre/ebooks/lit/oeb.py
index 18eca37720..6378c99219 100644
--- a/src/calibre/ebooks/lit/oeb.py
+++ b/src/calibre/ebooks/lit/oeb.py
@@ -172,7 +172,7 @@ class Metadata(object):
 
     def add(self, term, value, attrib={}):
         item = self.Item(term, value, attrib)
-        items = self.items[barename(term)]
+        items = self.items[barename(item.term)]
         items.append(item)
         return item
 
diff --git a/src/calibre/ebooks/lit/stylizer.py b/src/calibre/ebooks/lit/stylizer.py
index ae89d8f390..7a89474d89 100644
--- a/src/calibre/ebooks/lit/stylizer.py
+++ b/src/calibre/ebooks/lit/stylizer.py
@@ -122,12 +122,14 @@ class Stylizer(object):
         basename = os.path.basename(path)
         cssname = os.path.splitext(basename)[0] + '.css'
         stylesheets = [HTML_CSS_STYLESHEET]
-        head = xpath(tree, '/h:html/h:head')[0] 
+        head = xpath(tree, '/h:html/h:head')[0]
+        parser = cssutils.CSSParser()
+        parser.setFetcher(lambda path: ('utf-8', oeb.container.read(path)))
         for elem in head:
             tag = barename(elem.tag)
             if tag == 'style':
                 text = ''.join(elem.text)
-                stylesheet = cssutils.parseString(text, href=cssname)
+                stylesheet = parser.parseString(text, href=cssname)
                 stylesheets.append(stylesheet)
             elif tag == 'link' \
                  and elem.get('rel', 'stylesheet') == 'stylesheet' \
@@ -140,7 +142,7 @@ class Stylizer(object):
                 else:
                     data = XHTML_CSS_NAMESPACE
                     data += oeb.manifest.hrefs[path].data
-                    stylesheet = cssutils.parseString(data, href=path)
+                    stylesheet = parser.parseString(data, href=path)
                     self.STYLESHEETS[path] = stylesheet
                 stylesheets.append(stylesheet)
         rules = []

From 8327c7f235e881199b383f551d74979822d875b9 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 16 Dec 2008 13:56:35 -0800
Subject: [PATCH 15/15] IGN:any2lit

---
 src/calibre/ebooks/epub/__init__.py  |  4 +-
 src/calibre/ebooks/epub/from_any.py  | 12 +++---
 src/calibre/ebooks/epub/from_html.py |  3 +-
 src/calibre/ebooks/lit/from_any.py   | 59 ++++++++++++++++++++++++++++
 src/calibre/ebooks/lit/writer.py     | 56 +++++++++++++++-----------
 src/calibre/gui2/viewer/main.py      |  9 +++--
 src/calibre/linux.py                 |  5 ++-
 7 files changed, 113 insertions(+), 35 deletions(-)
 create mode 100644 src/calibre/ebooks/lit/from_any.py

diff --git a/src/calibre/ebooks/epub/__init__.py b/src/calibre/ebooks/epub/__init__.py
index 7bd6eeab50..f1a60ab646 100644
--- a/src/calibre/ebooks/epub/__init__.py
+++ b/src/calibre/ebooks/epub/__init__.py
@@ -88,10 +88,10 @@ def initialize_container(path_to_container, opf_name='metadata.opf'):
     zf.writestr('META-INF/container.xml', CONTAINER)
     return zf
 
-def config(defaults=None):
+def config(defaults=None, name='epub'):
     desc = _('Options to control the conversion to EPUB')
     if defaults is None:
-        c = Config('epub', desc)
+        c = Config(name, desc)
     else:
         c = StringConfig(defaults, desc)
     
diff --git a/src/calibre/ebooks/epub/from_any.py b/src/calibre/ebooks/epub/from_any.py
index b5c1f48937..6340180562 100644
--- a/src/calibre/ebooks/epub/from_any.py
+++ b/src/calibre/ebooks/epub/from_any.py
@@ -148,14 +148,14 @@ def config(defaults=None):
 def formats():
     return ['html', 'rar', 'zip', 'oebzip']+list(MAP.keys())
 
-def option_parser():
-    
-    return config().option_parser(usage=_('''\
+USAGE = _('''\
 %%prog [options] filename
 
-Convert any of a large number of ebook formats to an epub file. Supported formats are: %s
-''')%formats()
-)
+Convert any of a large number of ebook formats to a %s file. Supported formats are: %s
+''')
+
+def option_parser(usage=USAGE):
+    return config().option_parser(usage=usage%('EPUB', formats()))
 
 def main(args=sys.argv):
     parser = option_parser()
diff --git a/src/calibre/ebooks/epub/from_html.py b/src/calibre/ebooks/epub/from_html.py
index 66a3cebbae..3552a1bf70 100644
--- a/src/calibre/ebooks/epub/from_html.py
+++ b/src/calibre/ebooks/epub/from_html.py
@@ -64,7 +64,8 @@ def check(opf_path, pretty_print):
     '''
     Find a remove all invalid links in the HTML files 
     '''
-    print '\tChecking files for bad links...'
+    logger = logging.getLogger('html2epub')
+    logger.info('\tChecking files for bad links...')
     pathtoopf = os.path.abspath(opf_path)
     with CurrentDir(os.path.dirname(pathtoopf)):
         opf = OPF(open(pathtoopf, 'rb'), os.path.dirname(pathtoopf))
diff --git a/src/calibre/ebooks/lit/from_any.py b/src/calibre/ebooks/lit/from_any.py
new file mode 100644
index 0000000000..75cfc01bc2
--- /dev/null
+++ b/src/calibre/ebooks/lit/from_any.py
@@ -0,0 +1,59 @@
+from __future__ import with_statement
+__license__   = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
+__docformat__ = 'restructuredtext en'
+
+'''
+Convert any ebook format to LIT.
+'''
+
+import sys, os, glob, logging
+
+from calibre.ebooks.epub.from_any import any2epub, formats, USAGE
+from calibre.ebooks.epub import config as common_config
+from calibre.ptempfile import TemporaryDirectory
+from calibre.ebooks.lit.writer import oeb2lit
+
+def config(defaults=None):
+    c = common_config(defaults=defaults, name='lit')
+    return c
+
+def option_parser(usage=USAGE):
+    return config().option_parser(usage=usage%('LIT', formats()))
+
+def any2lit(opts, path):
+    ext = os.path.splitext(path)[1]
+    if not ext:
+        raise ValueError('Unknown file type: '+path)
+    ext = ext.lower()[1:]
+    
+    if opts.output is None:
+        opts.output = os.path.splitext(os.path.basename(path))[0]+'.lit'
+    
+    opts.output = os.path.abspath(opts.output)
+    orig_output = opts.output
+    
+    with TemporaryDirectory('_any2lit') as tdir:
+        oebdir = os.path.join(tdir, 'oeb')
+        os.mkdir(oebdir)
+        opts.output = os.path.join(tdir, 'dummy.epub')
+        opts.extract_to = oebdir
+        any2epub(opts, path)
+        opf = glob.glob(os.path.join(oebdir, '*.opf'))[0]
+        opts.output = orig_output
+        logging.getLogger('html2epub').info(_('Creating LIT file from EPUB...'))
+        oeb2lit(opts, opf)
+    
+
+def main(args=sys.argv):
+    parser = option_parser()
+    opts, args = parser.parse_args(args)
+    if len(args) < 2:
+        parser.print_help()
+        print 'No input file specified.'
+        return 1
+    any2lit(opts, args[1])
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(main())
\ No newline at end of file
diff --git a/src/calibre/ebooks/lit/writer.py b/src/calibre/ebooks/lit/writer.py
index 75e6c68adc..12e0d8b718 100644
--- a/src/calibre/ebooks/lit/writer.py
+++ b/src/calibre/ebooks/lit/writer.py
@@ -9,7 +9,7 @@ __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
 import sys
 import os
 from cStringIO import StringIO
-from struct import pack, unpack
+from struct import pack
 from itertools import izip, count, chain
 import time
 import random
@@ -17,14 +17,14 @@ import re
 import copy
 import uuid
 import functools
+import logging
 from urlparse import urldefrag
 from urllib import unquote as urlunquote
 from lxml import etree
-from calibre.ebooks.lit import LitError
-from calibre.ebooks.lit.reader import msguid, DirectoryEntry
+from calibre.ebooks.lit.reader import DirectoryEntry
 import calibre.ebooks.lit.maps as maps
 from calibre.ebooks.lit.oeb import OEB_DOCS, OEB_STYLES, OEB_CSS_MIME, \
-    CSS_MIME, XHTML_MIME, OPF_MIME, XML_NS, XML
+    CSS_MIME, OPF_MIME, XML_NS, XML
 from calibre.ebooks.lit.oeb import namespace, barename, urlnormalize, xpath
 from calibre.ebooks.lit.oeb import OEBBook
 from calibre.ebooks.lit.stylizer import Stylizer
@@ -135,11 +135,15 @@ def decint(value):
 def randbytes(n):
     return ''.join(chr(random.randint(0, 255)) for x in xrange(n))
 
+def warn(x):
+    print x
+
 class ReBinary(object):
     NSRMAP = {'': None, XML_NS: 'xml'}
     
-    def __init__(self, root, path, oeb, map=HTML_MAP):
+    def __init__(self, root, path, oeb, map=HTML_MAP, warn=warn):
         self.path = path
+        self.log_warn = warn
         self.dir = os.path.dirname(path)
         self.manifest = oeb.manifest
         self.tags, self.tattrs = map
@@ -268,8 +272,8 @@ class ReBinary(object):
 
     def build_ahc(self):
         if len(self.anchors) > 6:
-            print "calibre: warning: More than six anchors in file %r. " \
-                "Some links may not work properly." % self.path
+            self.log_warn("More than six anchors in file %r. " \
+                "Some links may not work properly." % self.path)
         data = StringIO()
         data.write(unichr(len(self.anchors)).encode('utf-8'))
         for anchor, offset in self.anchors:
@@ -292,8 +296,10 @@ def preserve(function):
     functools.update_wrapper(wrapper, function)
     return wrapper
     
-class LitWriter(object):
-    def __init__(self, oeb):
+class LitWriter(object, calibre.LoggingInterface):
+    def __init__(self, oeb, verbose=0):
+        calibre.LoggingInterface.__init__(self, logging.getLogger('oeb2lit'))
+        self.setup_cli_handler(verbose)
         self._oeb = oeb
         self._litize_oeb()
 
@@ -319,7 +325,7 @@ class LitWriter(object):
                 if type not in oeb.guide:
                     oeb.guide.add(type, title, cover.href)
         else:
-            print "calibre: warning: No suitable cover image found."
+            self.log_warn('No suitable cover image found.')
 
     def dump(self, stream):
         self._stream = stream
@@ -461,15 +467,15 @@ class LitWriter(object):
         self._add_folder('/data')
         for item in self._oeb.manifest.values():
             if item.media_type not in LIT_MIMES:
-                print "calibre: warning: File %r of unknown media-type %r " \
-                    "excluded from output." % (item.href, item.media_type)
+                self.log_warn("File %r of unknown media-type %r " \
+                    "excluded from output." % (item.href, item.media_type))
                 continue
             name = '/data/' + item.id
             data = item.data
             secnum = 0
             if not isinstance(data, basestring):
                 self._add_folder(name)
-                rebin = ReBinary(data, item.href, self._oeb)
+                rebin = ReBinary(data, item.href, self._oeb, warn=self.log_warn)
                 self._add_file(name + '/ahc', rebin.ahc, 0)
                 self._add_file(name + '/aht', rebin.aht, 0)
                 item.page_breaks = rebin.page_breaks
@@ -548,7 +554,7 @@ class LitWriter(object):
         meta.attrib['ms--minimum_level'] = '0'
         meta.attrib['ms--attr5'] = '1'
         meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper()
-        rebin = ReBinary(meta, 'content.opf', self._oeb, OPF_MAP)
+        rebin = ReBinary(meta, 'content.opf', self._oeb, map=OPF_MAP, warn=self.log_warn)
         meta = rebin.content
         self._meta = meta
         self._add_file('/meta', meta)
@@ -709,6 +715,19 @@ def option_parser():
         help=_('Output file. Default is derived from input filename.'))
     return parser
 
+def oeb2lit(opts, opfpath):
+    litpath = opts.output
+    if litpath is None:
+        litpath = os.path.basename(opfpath)
+        litpath = os.path.splitext(litpath)[0] + '.lit'
+    litpath = os.path.abspath(litpath)
+    lit = LitWriter(OEBBook(opfpath), opts.verbose)
+    with open(litpath, 'wb') as f:
+        lit.dump(f)
+    logger = logging.getLogger('oeb2lit')
+    logger.info(_('Output written to ')+litpath)
+    
+
 def main(argv=sys.argv):
     parser = option_parser()
     opts, args = parser.parse_args(argv[1:])
@@ -716,14 +735,7 @@ def main(argv=sys.argv):
         parser.print_help()
         return 1
     opfpath = args[0]
-    litpath = opts.output
-    if litpath is None:
-        litpath = os.path.basename(opfpath)
-        litpath = os.path.splitext(litpath)[0] + '.lit'
-    lit = LitWriter(OEBBook(opfpath))
-    with open(litpath, 'wb') as f:
-        lit.dump(f)
-    print _('LIT ebook created at'), litpath
+    oeb2lit(opts, opfpath)
     return 0
     
 if __name__ == '__main__':
diff --git a/src/calibre/gui2/viewer/main.py b/src/calibre/gui2/viewer/main.py
index b090b37882..e2ac7d2bd6 100644
--- a/src/calibre/gui2/viewer/main.py
+++ b/src/calibre/gui2/viewer/main.py
@@ -473,9 +473,12 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
         return current_page
         
     def save_current_position(self):
-        pos = self.view.bookmark()
-        bookmark = '%d#%s'%(self.current_index, pos)
-        self.iterator.add_bookmark(('calibre_current_page_bookmark', bookmark))
+        try:
+            pos = self.view.bookmark()
+            bookmark = '%d#%s'%(self.current_index, pos)
+            self.iterator.add_bookmark(('calibre_current_page_bookmark', bookmark))
+        except:
+            traceback.print_exc()
     
     def load_ebook(self, pathtoebook):
         if self.iterator is not None:
diff --git a/src/calibre/linux.py b/src/calibre/linux.py
index 58a35909ab..9e8dea355c 100644
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@@ -47,6 +47,7 @@ entry_points = {
                              'fb2-meta  = calibre.ebooks.metadata.fb2:main',
                              'any2lrf   = calibre.ebooks.lrf.any.convert_from:main',
                              'any2epub  = calibre.ebooks.epub.from_any:main',
+                             'any2lit   = calibre.ebooks.lit.from_any:main',
                              'lrf2lrs   = calibre.ebooks.lrf.lrfparser:main',
                              'lrs2lrf   = calibre.ebooks.lrf.lrs.convert_from:main',
                              'pdfreflow = calibre.ebooks.lrf.pdf.reflow:main',
@@ -184,6 +185,7 @@ def setup_completion(fatal_errors):
         from calibre.ebooks.odt.to_oeb import option_parser as odt2oeb
         from calibre.ebooks.epub.from_feeds import option_parser as feeds2epub
         from calibre.ebooks.epub.from_any import option_parser as any2epub
+        from calibre.ebooks.lit.from_any import option_parser as any2lit
         from calibre.ebooks.epub.from_comic import option_parser as comic2epub
         from calibre.gui2.main import option_parser as guiop 
         any_formats = ['epub', 'htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip',
@@ -207,7 +209,8 @@ def setup_completion(fatal_errors):
         f.write(opts_and_exts('pdf2lrf', htmlop, ['pdf']))
         f.write(opts_and_exts('any2lrf', htmlop, any_formats))
         f.write(opts_and_exts('calibre', guiop, any_formats))
-        f.write(opts_and_exts('any2lrf', any2epub, any_formats))
+        f.write(opts_and_exts('any2epub', any2epub, any_formats))
+        f.write(opts_and_exts('any2lit', any2lit, any_formats))
         f.write(opts_and_exts('lrf2lrs', lrf2lrsop, ['lrf']))
         f.write(opts_and_exts('lrf-meta', metaop, ['lrf']))
         f.write(opts_and_exts('rtf-meta', metaop, ['rtf']))