From da29a58363f770f38f01e02e3cb4221331666c0a Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Fri, 11 Jul 2008 14:37:27 -0400
Subject: [PATCH 01/19] Integrated own cleanup patch

---
 src/calibre/ebooks/mobi/reader.py | 30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)
diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py
index dea87dbd8c..05093f3c1a 100644
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@@ -13,7 +13,7 @@ except ImportError:
     import Image as PILImage
 
 from calibre import __appname__
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
 from calibre.ebooks.mobi import MobiError
 from calibre.ebooks.mobi.huffcdic import HuffReader
 from calibre.ebooks.mobi.palmdoc import decompress_doc
@@ -165,13 +165,14 @@ class MobiReader(object):
         self.processed_html = self.processed_html.decode(self.book_header.codec, 'ignore')
         self.extract_images(processed_records, output_dir)
         self.replace_page_breaks()
-        self.cleanup()
+        self.cleanup_html()
         
         self.processed_html = re.compile('<head>', re.IGNORECASE).sub(
             '<head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\n',
                                      self.processed_html)
         
         soup = BeautifulSoup(self.processed_html.replace('> <', '>\n<'))
+        self.cleanup_soup(soup)
         guide = soup.find('guide')
         for elem in soup.findAll(['metadata', 'guide']):
             elem.extract()
@@ -192,10 +193,29 @@ class MobiReader(object):
             if ncx:
                 open(os.path.splitext(htmlfile)[0]+'.ncx', 'wb').write(ncx)
         
-    def cleanup(self):
+    def cleanup_html(self):
         self.processed_html = re.sub(r'<div height="0(pt|px|ex|em|%){0,1}"></div>', '', self.processed_html)
-        self.processed_html = re.sub(r'<([^>]*) height="([^"]*)"', r'<\1 style="margin-top: \2"', self.processed_html)
-        self.processed_html = re.sub(r'<([^>]*) width="([^"]*)"', r'<\1 style="text-indent: \2"', self.processed_html)
+    
+    def cleanup_soup(self, soup):
+        for tag in soup.recursiveChildGenerator():
+            if not isinstance(tag, Tag): continue
+            styles = []
+            try:
+                styles.append(tag['style'])
+            except KeyError:
+                pass
+            try:
+                styles.append('margin-top: %s' % tag['height'])
+                del tag['height']
+            except KeyError:
+                pass
+            try:
+                styles.append('text-indent: %s' % tag['width'])
+                del tag['width']
+            except KeyError:
+                pass
+            if styles:
+                tag['style'] = '; '.join(styles)
     
     def create_opf(self, htmlfile, guide=None):
         mi = self.book_header.exth.mi

From 615d5ea2795563f8af9dc34c2c2c03c84c9c9714 Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Wed, 16 Jul 2008 10:00:49 -0400
Subject: [PATCH 02/19] Checkpoint state to move to office

---
 src/calibre/ebooks/lit/maps/__init__.py |    7 +-
 src/calibre/ebooks/lit/maps/html.py     | 1568 +++++++++++------------
 src/calibre/ebooks/lit/maps/opf.py      |   54 +-
 src/calibre/ebooks/lit/mssha1.py        |  343 +++++
 src/calibre/ebooks/lit/reader.py        |  418 +++---
 5 files changed, 1352 insertions(+), 1038 deletions(-)
 create mode 100644 src/calibre/ebooks/lit/mssha1.py

diff --git a/src/calibre/ebooks/lit/maps/__init__.py b/src/calibre/ebooks/lit/maps/__init__.py
index eb99464d9b..2abab3efe9 100644
--- a/src/calibre/ebooks/lit/maps/__init__.py
+++ b/src/calibre/ebooks/lit/maps/__init__.py
@@ -1,5 +1,2 @@
-import calibre.ebooks.maps.opf as opf
-import calibre.ebooks.maps.html as html
-
-OPF_MAP = opf.MAP
-HTML_MAP = html.MAP
+from calibre.ebooks.lit.maps.opf import MAP as OPF_MAP
+from calibre.ebooks.lit.maps.html import MAP as HTML_MAP
diff --git a/src/calibre/ebooks/lit/maps/html.py b/src/calibre/ebooks/lit/maps/html.py
index 095b0bcc3e..de0286c764 100644
--- a/src/calibre/ebooks/lit/maps/html.py
+++ b/src/calibre/ebooks/lit/maps/html.py
@@ -1,786 +1,3 @@
-ATTRS0 = { 
-    0x8010 => "tabindex",
-    0x8046 => "title",
-    0x804b => "style",
-    0x804d => "disabled",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x83fe => "datafld",
-    0x83ff => "datasrc",
-    0x8400 => "dataformatas",
-    0x87d6 => "accesskey",
-    0x9392 => "lang",
-    0x93ed => "language",
-    0x93fe => "dir",
-    0x9771 => "onmouseover",
-    0x9772 => "onmouseout",
-    0x9773 => "onmousedown",
-    0x9774 => "onmouseup",
-    0x9775 => "onmousemove",
-    0x9776 => "onkeydown",
-    0x9777 => "onkeyup",
-    0x9778 => "onkeypress",
-    0x9779 => "onclick",
-    0x977a => "ondblclick",
-    0x977e => "onhelp",
-    0x977f => "onfocus",
-    0x9780 => "onblur",
-    0x9783 => "onrowexit",
-    0x9784 => "onrowenter",
-    0x9786 => "onbeforeupdate",
-    0x9787 => "onafterupdate",
-    0x978a => "onreadystatechange",
-    0x9790 => "onscroll",
-    0x9794 => "ondragstart",
-    0x9795 => "onresize",
-    0x9796 => "onselectstart",
-    0x9797 => "onerrorupdate",
-    0x9799 => "ondatasetchanged",
-    0x979a => "ondataavailable",
-    0x979b => "ondatasetcomplete",
-    0x979c => "onfilterchange",
-    0x979f => "onlosecapture",
-    0x97a0 => "onpropertychange",
-    0x97a2 => "ondrag",
-    0x97a3 => "ondragend",
-    0x97a4 => "ondragenter",
-    0x97a5 => "ondragover",
-    0x97a6 => "ondragleave",
-    0x97a7 => "ondrop",
-    0x97a8 => "oncut",
-    0x97a9 => "oncopy",
-    0x97aa => "onpaste",
-    0x97ab => "onbeforecut",
-    0x97ac => "onbeforecopy",
-    0x97ad => "onbeforepaste",
-    0x97af => "onrowsdelete",
-    0x97b0 => "onrowsinserted",
-    0x97b1 => "oncellchange",
-    0x97b2 => "oncontextmenu",
-    0x97b6 => "onbeforeeditfocus",
-    }
-ATTRS3 = {
-    0x0001 => "href",
-    0x03ec => "target",
-    0x03ee => "rel",
-    0x03ef => "rev",
-    0x03f0 => "urn",
-    0x03f1 => "methods",
-    0x8001 => "name",
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS5 = {
-    0x9399 => "clear",
-    }
-ATTRS6 = {
-    0x8001 => "name",
-    0x8006 => "width",
-    0x8007 => "height",
-    0x804a => "align",
-    0x8bbb => "classid",
-    0x8bbc => "data",
-    0x8bbf => "codebase",
-    0x8bc0 => "codetype",
-    0x8bc1 => "code",
-    0x8bc2 => "type",
-    0x8bc5 => "vspace",
-    0x8bc6 => "hspace",
-    0x978e => "onerror",
-    }
-ATTRS7 = {
-    0x0001 => "href",
-    0x03ea => "shape",
-    0x03eb => "coords",
-    0x03ed => "target",
-    0x03ee => "alt",
-    0x03ef => "nohref",
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS8 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS9 = {
-    0x03ec => "href",
-    0x03ed => "target",
-    }
-ATTRS10 = {
-    0x938b => "color",
-    0x939b => "face",
-    0x93a3 => "size",
-    }
-ATTRS12 = {
-    0x03ea => "src",
-    0x03eb => "loop",
-    0x03ec => "volume",
-    0x03ed => "balance",
-    }
-ATTRS13 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS15 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x9399 => "clear",
-    }
-ATTRS16 = {
-    0x07db => "link",
-    0x07dc => "alink",
-    0x07dd => "vlink",
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x938a => "background",
-    0x938b => "text",
-    0x938e => "nowrap",
-    0x93ae => "topmargin",
-    0x93af => "rightmargin",
-    0x93b0 => "bottommargin",
-    0x93b1 => "leftmargin",
-    0x93b6 => "bgproperties",
-    0x93d8 => "scroll",
-    0x977b => "onselect",
-    0x9791 => "onload",
-    0x9792 => "onunload",
-    0x9798 => "onbeforeunload",
-    0x97b3 => "onbeforeprint",
-    0x97b4 => "onafterprint",
-    0xfe0c => "bgcolor",
-    }
-ATTRS17 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x9399 => "clear",
-    }
-ATTRS18 = {
-    0x07d1 => "type",
-    0x8001 => "name",
-    }
-ATTRS19 = {
-    0x8046 => "title",
-    0x8049 => "align",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x93a8 => "valign",
-    }
-ATTRS20 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x9399 => "clear",
-    }
-ATTRS21 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS22 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS23 = {
-    0x03ea => "span",
-    0x8006 => "width",
-    0x8049 => "align",
-    0x93a8 => "valign",
-    0xfe0c => "bgcolor",
-    }
-ATTRS24 = {
-    0x03ea => "span",
-    0x8006 => "width",
-    0x8049 => "align",
-    0x93a8 => "valign",
-    0xfe0c => "bgcolor",
-    }
-ATTRS27 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x938e => "nowrap",
-    }
-ATTRS29 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS31 = {
-    0x8046 => "title",
-    0x8049 => "align",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x938e => "nowrap",
-    }
-ATTRS32 = {
-    0x03ea => "compact",
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS33 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x938e => "nowrap",
-    }
-ATTRS34 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS35 = {
-    0x8001 => "name",
-    0x8006 => "width",
-    0x8007 => "height",
-    0x804a => "align",
-    0x8bbd => "palette",
-    0x8bbe => "pluginspage",
-    0x8bbf => "codebase",
-    0x8bbf => "src",
-    0x8bc1 => "units",
-    0x8bc2 => "type",
-    0x8bc3 => "hidden",
-    }
-ATTRS36 = {
-    0x804a => "align",
-    }
-ATTRS37 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x938b => "color",
-    0x939b => "face",
-    0x939c => "size",
-    }
-ATTRS38 = {
-    0x03ea => "action",
-    0x03ec => "enctype",
-    0x03ed => "method",
-    0x03ef => "target",
-    0x03f4 => "accept-charset",
-    0x8001 => "name",
-    0x977c => "onsubmit",
-    0x977d => "onreset",
-    }
-ATTRS39 = {
-    0x8000 => "align",
-    0x8001 => "name",
-    0x8bb9 => "src",
-    0x8bbb => "border",
-    0x8bbc => "frameborder",
-    0x8bbd => "framespacing",
-    0x8bbe => "marginwidth",
-    0x8bbf => "marginheight",
-    0x8bc0 => "noresize",
-    0x8bc1 => "scrolling",
-    0x8fa2 => "bordercolor",
-    }
-ATTRS40 = {
-    0x03e9 => "rows",
-    0x03ea => "cols",
-    0x03eb => "border",
-    0x03ec => "bordercolor",
-    0x03ed => "frameborder",
-    0x03ee => "framespacing",
-    0x8001 => "name",
-    0x9791 => "onload",
-    0x9792 => "onunload",
-    0x9798 => "onbeforeunload",
-    0x97b3 => "onbeforeprint",
-    0x97b4 => "onafterprint",
-    }
-ATTRS42 = {
-    0x8046 => "title",
-    0x8049 => "align",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x9399 => "clear",
-    }
-ATTRS43 = {
-    0x8046 => "title",
-    0x8049 => "align",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x9399 => "clear",
-    }
-ATTRS44 = {
-    0x8046 => "title",
-    0x8049 => "align",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x9399 => "clear",
-    }
-ATTRS45 = {
-    0x8046 => "title",
-    0x8049 => "align",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x9399 => "clear",
-    }
-ATTRS46 = {
-    0x8046 => "title",
-    0x8049 => "align",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x9399 => "clear",
-    }
-ATTRS47 = {
-    0x8046 => "title",
-    0x8049 => "align",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x9399 => "clear",
-    }
-ATTRS49 = {
-    0x03ea => "noshade",
-    0x8006 => "width",
-    0x8007 => "size",
-    0x8046 => "title",
-    0x8049 => "align",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x938b => "color",
-    }
-ATTRS51 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS52 = {
-    0x8001 => "name",
-    0x8006 => "width",
-    0x8007 => "height",
-    0x804a => "align",
-    0x8bb9 => "src",
-    0x8bbb => "border",
-    0x8bbc => "frameborder",
-    0x8bbd => "framespacing",
-    0x8bbe => "marginwidth",
-    0x8bbf => "marginheight",
-    0x8bc0 => "noresize",
-    0x8bc1 => "scrolling",
-    0x8fa2 => "vspace",
-    0x8fa3 => "hspace",
-    }
-ATTRS53 = {
-    0x03eb => "alt",
-    0x03ec => "src",
-    0x03ed => "border",
-    0x03ee => "vspace",
-    0x03ef => "hspace",
-    0x03f0 => "lowsrc",
-    0x03f1 => "vrml",
-    0x03f2 => "dynsrc",
-    0x03f4 => "loop",
-    0x03f6 => "start",
-    0x07d3 => "ismap",
-    0x07d9 => "usemap",
-    0x8001 => "name",
-    0x8006 => "width",
-    0x8007 => "height",
-    0x8046 => "title",
-    0x804a => "align",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x978d => "onabort",
-    0x978e => "onerror",
-    0x9791 => "onload",
-    }
-ATTRS54 = {
-    0x07d1 => "type",
-    0x07d3 => "size",
-    0x07d4 => "maxlength",
-    0x07d6 => "readonly",
-    0x07d8 => "indeterminate",
-    0x07da => "checked",
-    0x07db => "alt",
-    0x07dc => "src",
-    0x07dd => "border",
-    0x07de => "vspace",
-    0x07df => "hspace",
-    0x07e0 => "lowsrc",
-    0x07e1 => "vrml",
-    0x07e2 => "dynsrc",
-    0x07e4 => "loop",
-    0x07e5 => "start",
-    0x8001 => "name",
-    0x8006 => "width",
-    0x8007 => "height",
-    0x804a => "align",
-    0x93ee => "value",
-    0x977b => "onselect",
-    0x978d => "onabort",
-    0x978e => "onerror",
-    0x978f => "onchange",
-    0x9791 => "onload",
-    }
-ATTRS56 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS57 = {
-    0x03e9 => "for",
-    }
-ATTRS58 = {
-    0x804a => "align",
-    }
-ATTRS59 = {
-    0x03ea => "value",
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x939a => "type",
-    }
-ATTRS60 = {
-    0x03ee => "href",
-    0x03ef => "rel",
-    0x03f0 => "rev",
-    0x03f1 => "type",
-    0x03f9 => "media",
-    0x03fa => "target",
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x978e => "onerror",
-    0x9791 => "onload",
-    }
-ATTRS61 = {
-    0x9399 => "clear",
-    }
-ATTRS62 = {
-    0x8001 => "name",
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS63 = {
-    0x1771 => "scrolldelay",
-    0x1772 => "direction",
-    0x1773 => "behavior",
-    0x1774 => "scrollamount",
-    0x1775 => "loop",
-    0x1776 => "vspace",
-    0x1777 => "hspace",
-    0x1778 => "truespeed",
-    0x8006 => "width",
-    0x8007 => "height",
-    0x9785 => "onbounce",
-    0x978b => "onfinish",
-    0x978c => "onstart",
-    0xfe0c => "bgcolor",
-    }
-ATTRS65 = {
-    0x03ea => "http-equiv",
-    0x03eb => "content",
-    0x03ec => "url",
-    0x03f6 => "charset",
-    0x8001 => "name",
-    }
-ATTRS66 = {
-    0x03f5 => "n",
-    }
-ATTRS71 = {
-    0x8000 => "border",
-    0x8000 => "usemap",
-    0x8001 => "name",
-    0x8006 => "width",
-    0x8007 => "height",
-    0x8046 => "title",
-    0x804a => "align",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x8bbb => "classid",
-    0x8bbc => "data",
-    0x8bbf => "codebase",
-    0x8bc0 => "codetype",
-    0x8bc1 => "code",
-    0x8bc2 => "type",
-    0x8bc5 => "vspace",
-    0x8bc6 => "hspace",
-    0x978e => "onerror",
-    }
-ATTRS72 = {
-    0x03eb => "compact",
-    0x03ec => "start",
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x939a => "type",
-    }
-ATTRS73 = {
-    0x03ea => "selected",
-    0x03eb => "value",
-    }
-ATTRS74 = {
-    0x8046 => "title",
-    0x8049 => "align",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x9399 => "clear",
-    }
-ATTRS75 = {
-    0x8000 => "name",
-    0x8000 => "value",
-    0x8000 => "type",
-    }
-ATTRS76 = {
-    0x9399 => "clear",
-    }
-ATTRS77 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x9399 => "clear",
-    }
-ATTRS78 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS82 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS83 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS84 = {
-    0x03ea => "src",
-    0x03ed => "for",
-    0x03ee => "event",
-    0x03f0 => "defer",
-    0x03f2 => "type",
-    0x978e => "onerror",
-    }
-ATTRS85 = {
-    0x03eb => "size",
-    0x03ec => "multiple",
-    0x8000 => "align",
-    0x8001 => "name",
-    0x978f => "onchange",
-    }
-ATTRS86 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS87 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS88 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS89 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS90 = {
-    0x03eb => "type",
-    0x03ef => "media",
-    0x8046 => "title",
-    0x978e => "onerror",
-    0x9791 => "onload",
-    }
-ATTRS91 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS92 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS93 = {
-    0x03ea => "cols",
-    0x03eb => "border",
-    0x03ec => "rules",
-    0x03ed => "frame",
-    0x03ee => "cellspacing",
-    0x03ef => "cellpadding",
-    0x03fa => "datapagesize",
-    0x8006 => "width",
-    0x8007 => "height",
-    0x8046 => "title",
-    0x804a => "align",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x938a => "background",
-    0x93a5 => "bordercolor",
-    0x93a6 => "bordercolorlight",
-    0x93a7 => "bordercolordark",
-    0xfe0c => "bgcolor",
-    }
-ATTRS94 = {
-    0x8049 => "align",
-    0x93a8 => "valign",
-    0xfe0c => "bgcolor",
-    }
-ATTRS95 = {
-    0x8049 => "align",
-    0x93a8 => "valign",
-    }
-ATTRS96 = {
-    0x07d2 => "rowspan",
-    0x07d3 => "colspan",
-    0x8006 => "width",
-    0x8007 => "height",
-    0x8046 => "title",
-    0x8049 => "align",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x938a => "background",
-    0x938e => "nowrap",
-    0x93a5 => "bordercolor",
-    0x93a6 => "bordercolorlight",
-    0x93a7 => "bordercolordark",
-    0x93a8 => "valign",
-    0xfe0c => "bgcolor",
-    }
-ATTRS97 = {
-    0x1b5a => "rows",
-    0x1b5b => "cols",
-    0x1b5c => "wrap",
-    0x1b5d => "readonly",
-    0x8001 => "name",
-    0x977b => "onselect",
-    0x978f => "onchange",
-    }
-ATTRS98 = {
-    0x8049 => "align",
-    0x93a8 => "valign",
-    0xfe0c => "bgcolor",
-    }
-ATTRS99 = {
-    0x07d2 => "rowspan",
-    0x07d3 => "colspan",
-    0x8006 => "width",
-    0x8007 => "height",
-    0x8046 => "title",
-    0x8049 => "align",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x938a => "background",
-    0x938e => "nowrap",
-    0x93a5 => "bordercolor",
-    0x93a6 => "bordercolorlight",
-    0x93a7 => "bordercolordark",
-    0x93a8 => "valign",
-    0xfe0c => "bgcolor",
-    }
-ATTRS100 = {
-    0x8049 => "align",
-    0x93a8 => "valign",
-    0xfe0c => "bgcolor",
-    }
-ATTRS102 = {
-    0x8007 => "height",
-    0x8046 => "title",
-    0x8049 => "align",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x93a5 => "bordercolor",
-    0x93a6 => "bordercolorlight",
-    0x93a7 => "bordercolordark",
-    0x93a8 => "valign",
-    0xfe0c => "bgcolor",
-    }
-ATTRS103 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS104 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS105 = {
-    0x03eb => "compact",
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x939a => "type",
-    }
-ATTRS106 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS108 = {
-    0x9399 => "clear",
-    }
-
 TAGS = [
     None, 
     None,
@@ -893,6 +110,789 @@ TAGS = [
     None,
     ]
 
+ATTRS0 = { 
+    0x8010: "tabindex",
+    0x8046: "title",
+    0x804b: "style",
+    0x804d: "disabled",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x83fe: "datafld",
+    0x83ff: "datasrc",
+    0x8400: "dataformatas",
+    0x87d6: "accesskey",
+    0x9392: "lang",
+    0x93ed: "language",
+    0x93fe: "dir",
+    0x9771: "onmouseover",
+    0x9772: "onmouseout",
+    0x9773: "onmousedown",
+    0x9774: "onmouseup",
+    0x9775: "onmousemove",
+    0x9776: "onkeydown",
+    0x9777: "onkeyup",
+    0x9778: "onkeypress",
+    0x9779: "onclick",
+    0x977a: "ondblclick",
+    0x977e: "onhelp",
+    0x977f: "onfocus",
+    0x9780: "onblur",
+    0x9783: "onrowexit",
+    0x9784: "onrowenter",
+    0x9786: "onbeforeupdate",
+    0x9787: "onafterupdate",
+    0x978a: "onreadystatechange",
+    0x9790: "onscroll",
+    0x9794: "ondragstart",
+    0x9795: "onresize",
+    0x9796: "onselectstart",
+    0x9797: "onerrorupdate",
+    0x9799: "ondatasetchanged",
+    0x979a: "ondataavailable",
+    0x979b: "ondatasetcomplete",
+    0x979c: "onfilterchange",
+    0x979f: "onlosecapture",
+    0x97a0: "onpropertychange",
+    0x97a2: "ondrag",
+    0x97a3: "ondragend",
+    0x97a4: "ondragenter",
+    0x97a5: "ondragover",
+    0x97a6: "ondragleave",
+    0x97a7: "ondrop",
+    0x97a8: "oncut",
+    0x97a9: "oncopy",
+    0x97aa: "onpaste",
+    0x97ab: "onbeforecut",
+    0x97ac: "onbeforecopy",
+    0x97ad: "onbeforepaste",
+    0x97af: "onrowsdelete",
+    0x97b0: "onrowsinserted",
+    0x97b1: "oncellchange",
+    0x97b2: "oncontextmenu",
+    0x97b6: "onbeforeeditfocus",
+    }
+ATTRS3 = {
+    0x0001: "href",
+    0x03ec: "target",
+    0x03ee: "rel",
+    0x03ef: "rev",
+    0x03f0: "urn",
+    0x03f1: "methods",
+    0x8001: "name",
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS5 = {
+    0x9399: "clear",
+    }
+ATTRS6 = {
+    0x8001: "name",
+    0x8006: "width",
+    0x8007: "height",
+    0x804a: "align",
+    0x8bbb: "classid",
+    0x8bbc: "data",
+    0x8bbf: "codebase",
+    0x8bc0: "codetype",
+    0x8bc1: "code",
+    0x8bc2: "type",
+    0x8bc5: "vspace",
+    0x8bc6: "hspace",
+    0x978e: "onerror",
+    }
+ATTRS7 = {
+    0x0001: "href",
+    0x03ea: "shape",
+    0x03eb: "coords",
+    0x03ed: "target",
+    0x03ee: "alt",
+    0x03ef: "nohref",
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS8 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS9 = {
+    0x03ec: "href",
+    0x03ed: "target",
+    }
+ATTRS10 = {
+    0x938b: "color",
+    0x939b: "face",
+    0x93a3: "size",
+    }
+ATTRS12 = {
+    0x03ea: "src",
+    0x03eb: "loop",
+    0x03ec: "volume",
+    0x03ed: "balance",
+    }
+ATTRS13 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS15 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x9399: "clear",
+    }
+ATTRS16 = {
+    0x07db: "link",
+    0x07dc: "alink",
+    0x07dd: "vlink",
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x938a: "background",
+    0x938b: "text",
+    0x938e: "nowrap",
+    0x93ae: "topmargin",
+    0x93af: "rightmargin",
+    0x93b0: "bottommargin",
+    0x93b1: "leftmargin",
+    0x93b6: "bgproperties",
+    0x93d8: "scroll",
+    0x977b: "onselect",
+    0x9791: "onload",
+    0x9792: "onunload",
+    0x9798: "onbeforeunload",
+    0x97b3: "onbeforeprint",
+    0x97b4: "onafterprint",
+    0xfe0c: "bgcolor",
+    }
+ATTRS17 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x9399: "clear",
+    }
+ATTRS18 = {
+    0x07d1: "type",
+    0x8001: "name",
+    }
+ATTRS19 = {
+    0x8046: "title",
+    0x8049: "align",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x93a8: "valign",
+    }
+ATTRS20 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x9399: "clear",
+    }
+ATTRS21 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS22 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS23 = {
+    0x03ea: "span",
+    0x8006: "width",
+    0x8049: "align",
+    0x93a8: "valign",
+    0xfe0c: "bgcolor",
+    }
+ATTRS24 = {
+    0x03ea: "span",
+    0x8006: "width",
+    0x8049: "align",
+    0x93a8: "valign",
+    0xfe0c: "bgcolor",
+    }
+ATTRS27 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x938e: "nowrap",
+    }
+ATTRS29 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS31 = {
+    0x8046: "title",
+    0x8049: "align",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x938e: "nowrap",
+    }
+ATTRS32 = {
+    0x03ea: "compact",
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS33 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x938e: "nowrap",
+    }
+ATTRS34 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS35 = {
+    0x8001: "name",
+    0x8006: "width",
+    0x8007: "height",
+    0x804a: "align",
+    0x8bbd: "palette",
+    0x8bbe: "pluginspage",
+    0x8bbf: "codebase",
+    0x8bbf: "src",
+    0x8bc1: "units",
+    0x8bc2: "type",
+    0x8bc3: "hidden",
+    }
+ATTRS36 = {
+    0x804a: "align",
+    }
+ATTRS37 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x938b: "color",
+    0x939b: "face",
+    0x939c: "size",
+    }
+ATTRS38 = {
+    0x03ea: "action",
+    0x03ec: "enctype",
+    0x03ed: "method",
+    0x03ef: "target",
+    0x03f4: "accept-charset",
+    0x8001: "name",
+    0x977c: "onsubmit",
+    0x977d: "onreset",
+    }
+ATTRS39 = {
+    0x8000: "align",
+    0x8001: "name",
+    0x8bb9: "src",
+    0x8bbb: "border",
+    0x8bbc: "frameborder",
+    0x8bbd: "framespacing",
+    0x8bbe: "marginwidth",
+    0x8bbf: "marginheight",
+    0x8bc0: "noresize",
+    0x8bc1: "scrolling",
+    0x8fa2: "bordercolor",
+    }
+ATTRS40 = {
+    0x03e9: "rows",
+    0x03ea: "cols",
+    0x03eb: "border",
+    0x03ec: "bordercolor",
+    0x03ed: "frameborder",
+    0x03ee: "framespacing",
+    0x8001: "name",
+    0x9791: "onload",
+    0x9792: "onunload",
+    0x9798: "onbeforeunload",
+    0x97b3: "onbeforeprint",
+    0x97b4: "onafterprint",
+    }
+ATTRS42 = {
+    0x8046: "title",
+    0x8049: "align",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x9399: "clear",
+    }
+ATTRS43 = {
+    0x8046: "title",
+    0x8049: "align",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x9399: "clear",
+    }
+ATTRS44 = {
+    0x8046: "title",
+    0x8049: "align",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x9399: "clear",
+    }
+ATTRS45 = {
+    0x8046: "title",
+    0x8049: "align",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x9399: "clear",
+    }
+ATTRS46 = {
+    0x8046: "title",
+    0x8049: "align",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x9399: "clear",
+    }
+ATTRS47 = {
+    0x8046: "title",
+    0x8049: "align",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x9399: "clear",
+    }
+ATTRS49 = {
+    0x03ea: "noshade",
+    0x8006: "width",
+    0x8007: "size",
+    0x8046: "title",
+    0x8049: "align",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x938b: "color",
+    }
+ATTRS51 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS52 = {
+    0x8001: "name",
+    0x8006: "width",
+    0x8007: "height",
+    0x804a: "align",
+    0x8bb9: "src",
+    0x8bbb: "border",
+    0x8bbc: "frameborder",
+    0x8bbd: "framespacing",
+    0x8bbe: "marginwidth",
+    0x8bbf: "marginheight",
+    0x8bc0: "noresize",
+    0x8bc1: "scrolling",
+    0x8fa2: "vspace",
+    0x8fa3: "hspace",
+    }
+ATTRS53 = {
+    0x03eb: "alt",
+    0x03ec: "src",
+    0x03ed: "border",
+    0x03ee: "vspace",
+    0x03ef: "hspace",
+    0x03f0: "lowsrc",
+    0x03f1: "vrml",
+    0x03f2: "dynsrc",
+    0x03f4: "loop",
+    0x03f6: "start",
+    0x07d3: "ismap",
+    0x07d9: "usemap",
+    0x8001: "name",
+    0x8006: "width",
+    0x8007: "height",
+    0x8046: "title",
+    0x804a: "align",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x978d: "onabort",
+    0x978e: "onerror",
+    0x9791: "onload",
+    }
+ATTRS54 = {
+    0x07d1: "type",
+    0x07d3: "size",
+    0x07d4: "maxlength",
+    0x07d6: "readonly",
+    0x07d8: "indeterminate",
+    0x07da: "checked",
+    0x07db: "alt",
+    0x07dc: "src",
+    0x07dd: "border",
+    0x07de: "vspace",
+    0x07df: "hspace",
+    0x07e0: "lowsrc",
+    0x07e1: "vrml",
+    0x07e2: "dynsrc",
+    0x07e4: "loop",
+    0x07e5: "start",
+    0x8001: "name",
+    0x8006: "width",
+    0x8007: "height",
+    0x804a: "align",
+    0x93ee: "value",
+    0x977b: "onselect",
+    0x978d: "onabort",
+    0x978e: "onerror",
+    0x978f: "onchange",
+    0x9791: "onload",
+    }
+ATTRS56 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS57 = {
+    0x03e9: "for",
+    }
+ATTRS58 = {
+    0x804a: "align",
+    }
+ATTRS59 = {
+    0x03ea: "value",
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x939a: "type",
+    }
+ATTRS60 = {
+    0x03ee: "href",
+    0x03ef: "rel",
+    0x03f0: "rev",
+    0x03f1: "type",
+    0x03f9: "media",
+    0x03fa: "target",
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x978e: "onerror",
+    0x9791: "onload",
+    }
+ATTRS61 = {
+    0x9399: "clear",
+    }
+ATTRS62 = {
+    0x8001: "name",
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS63 = {
+    0x1771: "scrolldelay",
+    0x1772: "direction",
+    0x1773: "behavior",
+    0x1774: "scrollamount",
+    0x1775: "loop",
+    0x1776: "vspace",
+    0x1777: "hspace",
+    0x1778: "truespeed",
+    0x8006: "width",
+    0x8007: "height",
+    0x9785: "onbounce",
+    0x978b: "onfinish",
+    0x978c: "onstart",
+    0xfe0c: "bgcolor",
+    }
+ATTRS65 = {
+    0x03ea: "http-equiv",
+    0x03eb: "content",
+    0x03ec: "url",
+    0x03f6: "charset",
+    0x8001: "name",
+    }
+ATTRS66 = {
+    0x03f5: "n",
+    }
+ATTRS71 = {
+    0x8000: "border",
+    0x8000: "usemap",
+    0x8001: "name",
+    0x8006: "width",
+    0x8007: "height",
+    0x8046: "title",
+    0x804a: "align",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x8bbb: "classid",
+    0x8bbc: "data",
+    0x8bbf: "codebase",
+    0x8bc0: "codetype",
+    0x8bc1: "code",
+    0x8bc2: "type",
+    0x8bc5: "vspace",
+    0x8bc6: "hspace",
+    0x978e: "onerror",
+    }
+ATTRS72 = {
+    0x03eb: "compact",
+    0x03ec: "start",
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x939a: "type",
+    }
+ATTRS73 = {
+    0x03ea: "selected",
+    0x03eb: "value",
+    }
+ATTRS74 = {
+    0x8046: "title",
+    0x8049: "align",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x9399: "clear",
+    }
+ATTRS75 = {
+    0x8000: "name",
+    0x8000: "value",
+    0x8000: "type",
+    }
+ATTRS76 = {
+    0x9399: "clear",
+    }
+ATTRS77 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x9399: "clear",
+    }
+ATTRS78 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS82 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS83 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS84 = {
+    0x03ea: "src",
+    0x03ed: "for",
+    0x03ee: "event",
+    0x03f0: "defer",
+    0x03f2: "type",
+    0x978e: "onerror",
+    }
+ATTRS85 = {
+    0x03eb: "size",
+    0x03ec: "multiple",
+    0x8000: "align",
+    0x8001: "name",
+    0x978f: "onchange",
+    }
+ATTRS86 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS87 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS88 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS89 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS90 = {
+    0x03eb: "type",
+    0x03ef: "media",
+    0x8046: "title",
+    0x978e: "onerror",
+    0x9791: "onload",
+    }
+ATTRS91 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS92 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS93 = {
+    0x03ea: "cols",
+    0x03eb: "border",
+    0x03ec: "rules",
+    0x03ed: "frame",
+    0x03ee: "cellspacing",
+    0x03ef: "cellpadding",
+    0x03fa: "datapagesize",
+    0x8006: "width",
+    0x8007: "height",
+    0x8046: "title",
+    0x804a: "align",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x938a: "background",
+    0x93a5: "bordercolor",
+    0x93a6: "bordercolorlight",
+    0x93a7: "bordercolordark",
+    0xfe0c: "bgcolor",
+    }
+ATTRS94 = {
+    0x8049: "align",
+    0x93a8: "valign",
+    0xfe0c: "bgcolor",
+    }
+ATTRS95 = {
+    0x8049: "align",
+    0x93a8: "valign",
+    }
+ATTRS96 = {
+    0x07d2: "rowspan",
+    0x07d3: "colspan",
+    0x8006: "width",
+    0x8007: "height",
+    0x8046: "title",
+    0x8049: "align",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x938a: "background",
+    0x938e: "nowrap",
+    0x93a5: "bordercolor",
+    0x93a6: "bordercolorlight",
+    0x93a7: "bordercolordark",
+    0x93a8: "valign",
+    0xfe0c: "bgcolor",
+    }
+ATTRS97 = {
+    0x1b5a: "rows",
+    0x1b5b: "cols",
+    0x1b5c: "wrap",
+    0x1b5d: "readonly",
+    0x8001: "name",
+    0x977b: "onselect",
+    0x978f: "onchange",
+    }
+ATTRS98 = {
+    0x8049: "align",
+    0x93a8: "valign",
+    0xfe0c: "bgcolor",
+    }
+ATTRS99 = {
+    0x07d2: "rowspan",
+    0x07d3: "colspan",
+    0x8006: "width",
+    0x8007: "height",
+    0x8046: "title",
+    0x8049: "align",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x938a: "background",
+    0x938e: "nowrap",
+    0x93a5: "bordercolor",
+    0x93a6: "bordercolorlight",
+    0x93a7: "bordercolordark",
+    0x93a8: "valign",
+    0xfe0c: "bgcolor",
+    }
+ATTRS100 = {
+    0x8049: "align",
+    0x93a8: "valign",
+    0xfe0c: "bgcolor",
+    }
+ATTRS102 = {
+    0x8007: "height",
+    0x8046: "title",
+    0x8049: "align",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x93a5: "bordercolor",
+    0x93a6: "bordercolorlight",
+    0x93a7: "bordercolordark",
+    0x93a8: "valign",
+    0xfe0c: "bgcolor",
+    }
+ATTRS103 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS104 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS105 = {
+    0x03eb: "compact",
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x939a: "type",
+    }
+ATTRS106 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS108 = {
+    0x9399: "clear",
+    }
+
 TAGS_ATTRS = [
     None, 
     None,
@@ -1005,4 +1005,4 @@ TAGS_ATTRS = [
     None,
     ]
 
-MAP = (TAGS, TAGS_ATTRS, ATTRS0)
+MAP = (TAGS, ATTRS0, TAGS_ATTRS)
diff --git a/src/calibre/ebooks/lit/maps/opf.py b/src/calibre/ebooks/lit/maps/opf.py
index a39e6bf8e8..cc1acc4dfa 100644
--- a/src/calibre/ebooks/lit/maps/opf.py
+++ b/src/calibre/ebooks/lit/maps/opf.py
@@ -1,28 +1,3 @@
-ATTRS = {
-    0x0001 => "href",   
-    0x0002 => "%never-used",
-    0x0003 => "%guid",
-    0x0004 => "%minimum_level",
-    0x0005 => "%attr5",
-    0x0006 => "id",
-    0x0007 => "href",
-    0x0008 => "media-type",
-    0x0009 => "fallback",
-    0x000A => "idref",
-    0x000B => "xmlns:dc",
-    0x000C => "xmlns:oebpackage",
-    0x000D => "role",
-    0x000E => "file-as",
-    0x000F => "event",
-    0x0010 => "scheme",
-    0x0011 => "title",
-    0x0012 => "type",
-    0x0013 => "unique-identifier",
-    0x0014 => "name",
-    0x0015 => "content",
-    0x0016 => "xml:lang",
-    }
-
 TAGS = [
     None,
     "package",
@@ -69,6 +44,31 @@ TAGS = [
     None,
    ]
 
-TAGS_ATTR = [{} for i in xrange(43)]
+ATTRS = {
+    0x0001: "href",   
+    0x0002: "%never-used",
+    0x0003: "%guid",
+    0x0004: "%minimum_level",
+    0x0005: "%attr5",
+    0x0006: "id",
+    0x0007: "href",
+    0x0008: "media-type",
+    0x0009: "fallback",
+    0x000A: "idref",
+    0x000B: "xmlns:dc",
+    0x000C: "xmlns:oebpackage",
+    0x000D: "role",
+    0x000E: "file-as",
+    0x000F: "event",
+    0x0010: "scheme",
+    0x0011: "title",
+    0x0012: "type",
+    0x0013: "unique-identifier",
+    0x0014: "name",
+    0x0015: "content",
+    0x0016: "xml:lang",
+    }
 
-MAP = (TAGS, TAGS_ATTRS, ATTRS0)
+TAGS_ATTRS = [{} for i in xrange(43)]
+
+MAP = (TAGS, ATTRS, TAGS_ATTRS)
diff --git a/src/calibre/ebooks/lit/mssha1.py b/src/calibre/ebooks/lit/mssha1.py
new file mode 100644
index 0000000000..f6f7c33444
--- /dev/null
+++ b/src/calibre/ebooks/lit/mssha1.py
@@ -0,0 +1,343 @@
+#!/usr/bin/env python
+# -*- coding: iso-8859-1
+
+"""A sample implementation of SHA-1 in pure Python.
+
+   Framework adapted from Dinu Gherman's MD5 implementation by
+   J. Hallén and L. Creighton. SHA-1 implementation based directly on
+   the text of the NIST standard FIPS PUB 180-1.
+"""
+
+
+__date__    = '2004-11-17'
+__version__ = 0.91 # Modernised by J. Hallén and L. Creighton for Pypy
+
+
+import struct, copy
+
+
+# ======================================================================
+# Bit-Manipulation helpers
+#
+#   _long2bytes() was contributed by Barry Warsaw
+#   and is reused here with tiny modifications.
+# ======================================================================
+
+def _long2bytesBigEndian(n, blocksize=0):
+    """Convert a long integer to a byte string.
+
+    If optional blocksize is given and greater than zero, pad the front
+    of the byte string with binary zeros so that the length is a multiple
+    of blocksize.
+    """
+
+    # After much testing, this algorithm was deemed to be the fastest.
+    s = ''
+    pack = struct.pack
+    while n > 0:
+        s = pack('>I', n & 0xffffffffL) + s
+        n = n >> 32
+
+    # Strip off leading zeros.
+    for i in range(len(s)):
+        if s[i] != '\000':
+            break
+    else:
+        # Only happens when n == 0.
+        s = '\000'
+        i = 0
+
+    s = s[i:]
+
+    # Add back some pad bytes. This could be done more efficiently
+    # w.r.t. the de-padding being done above, but sigh...
+    if blocksize > 0 and len(s) % blocksize:
+        s = (blocksize - len(s) % blocksize) * '\000' + s
+
+    return s
+
+
+def _bytelist2longBigEndian(list):
+    "Transform a list of characters into a list of longs."
+
+    imax = len(list)/4
+    hl = [0L] * imax
+
+    j = 0
+    i = 0
+    while i < imax:
+        b0 = long(ord(list[j])) << 24
+        b1 = long(ord(list[j+1])) << 16
+        b2 = long(ord(list[j+2])) << 8
+        b3 = long(ord(list[j+3]))
+        hl[i] = b0 | b1 | b2 | b3
+        i = i+1
+        j = j+4
+
+    return hl
+
+
+def _rotateLeft(x, n):
+    "Rotate x (32 bit) left n bits circularly."
+
+    return (x << n) | (x >> (32-n))
+
+
+# ======================================================================
+# The SHA transformation functions
+#
+# ======================================================================
+
+def f0_19(B, C, D):
+    return (B & (C ^ D)) ^ D
+
+def f20_39(B, C, D):
+    return B ^ C ^ D
+
+def f40_59(B, C, D):
+    return ((B | C) & D) | (B & C)
+
+def f60_79(B, C, D):
+    return B ^ C ^ D
+
+def f6_42(B, C, D):
+    return (B + C) ^ C
+
+f = [f0_19]*20 + [f20_39]*20 + [f40_59]*20 + [f60_79]*20
+f[3] = f20_39
+f[6] = f6_42
+f[10] = f20_39
+f[15] = f20_39
+f[26] = f0_19
+f[31] = f40_59
+f[42] = f6_42
+f[51] = f20_39
+f[68] = f0_19
+
+
+# Constants to be used
+K = [
+    0x5A827999L, # ( 0 <= t <= 19)
+    0x6ED9EBA1L, # (20 <= t <= 39)
+    0x8F1BBCDCL, # (40 <= t <= 59)
+    0xCA62C1D6L  # (60 <= t <= 79)
+    ]
+
+class sha:
+    "An implementation of the MD5 hash function in pure Python."
+
+    def __init__(self):
+        "Initialisation."
+        
+        # Initial message length in bits(!).
+        self.length = 0L
+        self.count = [0, 0]
+
+        # Initial empty message as a sequence of bytes (8 bit characters).
+        self.input = []
+
+        # Call a separate init function, that can be used repeatedly
+        # to start from scratch on the same object.
+        self.init()
+
+
+    def init(self):
+        "Initialize the message-digest and set all fields to zero."
+
+        self.length = 0L
+        self.input = []
+
+        # Initial 160 bit message digest (5 times 32 bit).
+        self.H0 = 0x32107654L
+        self.H1 = 0x23016745L
+        self.H2 = 0xC4E680A2L
+        self.H3 = 0xDC679823L
+        self.H4 = 0xD0857A34L
+
+    def _transform(self, W):
+        for t in range(16, 80):
+            W.append(_rotateLeft(
+                W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16], 1) & 0xffffffffL)
+
+        A = self.H0
+        B = self.H1
+        C = self.H2
+        D = self.H3
+        E = self.H4
+
+        for t in xrange(0, 80):
+            TEMP = _rotateLeft(A, 5) + f[t](B, C, D) + E + W[t] + K[t/20]
+            E = D
+            D = C
+            C = _rotateLeft(B, 30) & 0xffffffffL
+            B = A
+            A = TEMP & 0xffffffffL
+            
+        self.H0 = (self.H0 + A) & 0xffffffffL
+        self.H1 = (self.H1 + B) & 0xffffffffL
+        self.H2 = (self.H2 + C) & 0xffffffffL
+        self.H3 = (self.H3 + D) & 0xffffffffL
+        self.H4 = (self.H4 + E) & 0xffffffffL
+    
+
+    # Down from here all methods follow the Python Standard Library
+    # API of the sha module.
+
+    def update(self, inBuf):
+        """Add to the current message.
+
+        Update the sha object with the string arg. Repeated calls
+        are equivalent to a single call with the concatenation of all
+        the arguments, i.e. s.update(a); s.update(b) is equivalent
+        to s.update(a+b).
+
+        The hash is immediately calculated for all full blocks. The final
+        calculation is made in digest(). It will calculate 1-2 blocks,
+        depending on how much padding we have to add. This allows us to
+        keep an intermediate value for the hash, so that we only need to
+        make minimal recalculation if we call update() to add more data
+        to the hashed string.
+        """
+
+        leninBuf = long(len(inBuf))
+
+        # Compute number of bytes mod 64.
+        index = (self.count[1] >> 3) & 0x3FL
+
+        # Update number of bits.
+        self.count[1] = self.count[1] + (leninBuf << 3)
+        if self.count[1] < (leninBuf << 3):
+            self.count[0] = self.count[0] + 1
+        self.count[0] = self.count[0] + (leninBuf >> 29)
+
+        partLen = 64 - index
+
+        if leninBuf >= partLen:
+            self.input[index:] = list(inBuf[:partLen])
+            self._transform(_bytelist2longBigEndian(self.input))
+            i = partLen
+            while i + 63 < leninBuf:
+                self._transform(_bytelist2longBigEndian(list(inBuf[i:i+64])))
+                i = i + 64
+            else:
+                self.input = list(inBuf[i:leninBuf])
+        else:
+            i = 0
+            self.input = self.input + list(inBuf)
+
+
+    def digest(self):
+        """Terminate the message-digest computation and return digest.
+
+        Return the digest of the strings passed to the update()
+        method so far. This is a 16-byte string which may contain
+        non-ASCII characters, including null bytes.
+        """
+
+        H0 = self.H0
+        H1 = self.H1
+        H2 = self.H2
+        H3 = self.H3
+        H4 = self.H4
+        input = [] + self.input
+        count = [] + self.count
+
+        index = (self.count[1] >> 3) & 0x3fL
+
+        if index < 56:
+            padLen = 56 - index
+        else:
+            padLen = 120 - index
+
+        padding = ['\200'] + ['\000'] * 63
+        self.update(padding[:padLen])
+
+        # Append length (before padding).
+        bits = _bytelist2longBigEndian(self.input[:56]) + count
+
+        self._transform(bits)
+
+        # Store state in digest.
+        digest = _long2bytesBigEndian(self.H0, 4) + \
+                 _long2bytesBigEndian(self.H1, 4) + \
+                 _long2bytesBigEndian(self.H2, 4) + \
+                 _long2bytesBigEndian(self.H3, 4) + \
+                 _long2bytesBigEndian(self.H4, 4)
+
+        self.H0 = H0 
+        self.H1 = H1 
+        self.H2 = H2
+        self.H3 = H3
+        self.H4 = H4
+        self.input = input 
+        self.count = count 
+
+        return digest
+
+
+    def hexdigest(self):
+        """Terminate and return digest in HEX form.
+
+        Like digest() except the digest is returned as a string of
+        length 32, containing only hexadecimal digits. This may be
+        used to exchange the value safely in email or other non-
+        binary environments.
+        """
+        return ''.join(['%02x' % ord(c) for c in self.digest()])
+
+    def copy(self):
+        """Return a clone object.
+
+        Return a copy ('clone') of the md5 object. This can be used
+        to efficiently compute the digests of strings that share
+        a common initial substring.
+        """
+
+        return copy.deepcopy(self)
+
+
+# ======================================================================
+# Mimic Python top-level functions from standard library API
+# for consistency with the md5 module of the standard library.
+# ======================================================================
+
+# These are mandatory variables in the module. They have constant values
+# in the SHA standard.
+
+digest_size = digestsize = 20
+blocksize = 1
+
+def new(arg=None):
+    """Return a new sha crypto object.
+
+    If arg is present, the method call update(arg) is made.
+    """
+
+    crypto = sha()
+    if arg:
+        crypto.update(arg)
+
+    return crypto
+
+if __name__ == '__main__':
+    def main():
+        import sys
+        file = None
+        if len(sys.argv) > 2:
+            print "usage: %s [FILE]" % sys.argv[0]
+            return
+        elif len(sys.argv) < 2:
+            file = sys.stdin
+        else:
+            file = open(sys.argv[1], 'rb')
+        context = new()
+        data = file.read(16384)
+        while data:
+            context.update(data)
+            data = file.read(16384)
+        file.close()
+        digest = context.hexdigest().upper()
+        for i in xrange(0, 40, 8):
+            print digest[i:i+8],
+        print
+    main()
diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py
index 1a0f42f8db..711aef6586 100644
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@@ -5,6 +5,7 @@ Support for reading the metadata from a lit file.
 '''
 
 import sys, struct, cStringIO, os
+import functools
 from itertools import repeat
 
 from calibre import relpath
@@ -13,6 +14,31 @@ from calibre.ebooks.metadata.opf import OPFReader
 from calibre.ebooks.lit import LitError
 from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
 
+OPF_DECL = """"<?xml version="1.0" encoding="UTF-8" ?>
+<!DOCTYPE package 
+  PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN"
+  "http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd">
+"""
+XHTML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
+<!DOCTYPE html PUBLIC
+ "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Document//EN"
+ "http://openebook.org/dtds/oeb-1.0.1/oebdoc101.dtd">
+"""
+
+class DirectoryEntry(object):
+    def __init__(self, name, section, offset, size):
+        self.name = name
+        self.section = section
+        self.offset = offset
+        self.size = size
+        
+    def __repr__(self):
+        return "<DirectoryEntry name='%s' section='%d' offset='%d' size='%d'>" \
+            % (self.name, self.section, self.offset, self.size)
+        
+    def __str__(self):
+        return repr(self)
+
 def u32(bytes):
     return struct.unpack('<L', bytes[:4])[0]
 
@@ -67,7 +93,7 @@ XML_ENTITIES   = ['&amp;', '&apos;', '&lt;', '&gt;', '&quot;']
 class UnBinary(object):
     def __init__(self, bin, manifest, map=OPF_MAP):
         self.manifest = manifest
-        self.attr_map, self.tag_map, self.tag_to_attr_map = map
+        self.tag_map, self.attr_map, self.tag_to_attr_map = map
         self.opf = map is OPF_MAP
         self.bin = bin
         self.buf = cStringIO.StringIO()
@@ -104,7 +130,7 @@ class UnBinary(object):
     def binary_to_text(self, base=0, depth=0):
         tag_name = current_map = None
         dynamic_tag = errors = 0
-        in_censorship = False
+        in_censorship = is_goingdown = False
         state = 'text'
         index =  base
         flags = 0
@@ -136,7 +162,7 @@ class UnBinary(object):
                     tag = oc
                     self.buf.write('<')
                     if not (flags & FLAG_CLOSING):
-                        is_goingdown = 1
+                        is_goingdown = True
                     if tag == 0x8000:
                         state = 'get custom length'
                         continue
@@ -167,7 +193,7 @@ class UnBinary(object):
                     else:
                         self.buf.write('>')
                         index = self.binary_to_text(base=index, depth=depth+1)
-                        is_goingdown = 0
+                        is_goingdown = False
                         if not tag_name:
                             raise LitError('Tag ends before it begins.')
                         self.buf.write('</'+tag_name+'>')
@@ -222,7 +248,7 @@ class UnBinary(object):
                     if not in_censorship:
                         self.buf.write(c)
                     count -= 1
-                elif count == 0:
+                if count == 0:
                     if not in_censorship:
                         self.buf.write('"')
                     in_censorship = False
@@ -268,7 +294,7 @@ class UnBinary(object):
                 href += c
                 count -= 1
                 if count == 0:
-                    doc, m, frag = href.partition('#')
+                    doc, m, frag = href[1:].partition('#')
                     path = self.item_path(doc)
                     if m and frag:
                         path += m + frag
@@ -297,100 +323,74 @@ class ManifestItem(object):
     def __repr__(self):
         return self.internal + u'->' + self.path 
 
+def preserve(function):
+    def wrapper(self, *args, **kwargs):
+        opos = self._stream.tell()
+        try:
+            return function(self, *args, **kwargs)
+        finally:
+            self._stream.seek(opos)
+    functools.update_wrapper(wrapper, function)
+    return wrapper
+    
 class LitFile(object):
     PIECE_SIZE = 16
 
     def magic():
+        @preserve
         def fget(self):
-            val = None
-            opos = self._stream.tell()
-            try:
-                self._stream.seek(0)
-                val = self._stream.read(8)
-            finally:
-                self._stream.seek(opos)
-            return val
+            self._stream.seek(0)
+            return self._stream.read(8)
         return property(fget=fget)
     magic = magic()
     
     def version():
         def fget(self):
-            val = None
-            opos = self._stream.tell()
-            try:
-                self._stream.seek(8)
-                val = u32(self._stream.read(4))
-            finally:
-                self._stream.seek(opos)
-            return val
+            self._stream.seek(8)
+            return u32(self._stream.read(4))
         return property(fget=fget)
     version = version()
     
     def hdr_len():
+        @preserve
         def fget(self):
-            val = None
-            opos = self._stream.tell()
-            try:
-                self._stream.seek(12)
-                val = int32(self._stream.read(4))
-            finally:
-                self._stream.seek(opos)
-            return val
+            self._stream.seek(12)
+            return int32(self._stream.read(4))
         return property(fget=fget)
     hdr_len = hdr_len()
     
     def num_pieces():
+        @preserve
         def fget(self):
-            val = None
-            opos = self._stream.tell()
-            try:
-                self._stream.seek(16)
-                val = int32(self._stream.read(4))
-            finally:
-                self._stream.seek(opos)
-            return val
+            self._stream.seek(16)
+            return int32(self._stream.read(4))
         return property(fget=fget)
     num_pieces = num_pieces()
     
     def sec_hdr_len():
+        @preserve
         def fget(self):
-            val = None
-            opos = self._stream.tell()
-            try:
-                self._stream.seek(20)
-                val = int32(self._stream.read(4))
-            finally:
-                self._stream.seek(opos)
-            return val
+            self._stream.seek(20)
+            return int32(self._stream.read(4))
         return property(fget=fget)
     sec_hdr_len = sec_hdr_len()
     
     def guid():
+        @preserve
         def fget(self):
-            val = None
-            opos = self._stream.tell()
-            try:
-                self._stream.seek(24)
-                val = self._stream.read(16)
-            finally:
-                self._stream.seek(opos)
-            return val
+            self._stream.seek(24)
+            return self._stream.read(16)
         return property(fget=fget)
     guid = guid()
     
     def header():
+        @preserve
         def fget(self):
-            val = None
-            opos = self._stream.tell()
-            try:
-                size = self.hdr_len \
-                    + (self.num_pieces * self.PIECE_SIZE) \
-                    + self.sec_hdr_len
-                self._stream.seek(0)
-                val = self._stream.read(size)
-            finally:
-                self._stream.seek(opos)
-            return val
+            size = self.hdr_len \
+                + (self.num_pieces * self.PIECE_SIZE) \
+                + self.sec_hdr_len
+            self._stream.seek(0)
+            return self._stream.read(size)
         return property(fget=fget)
     header = header()        
     
@@ -402,70 +402,64 @@ class LitFile(object):
             raise LitError('Unknown LIT version %d'%(self.version,))
         self.read_secondary_header()
         self.read_header_pieces()
-    
-    def read_secondary_header(self):
-        opos = self._stream.tell()
-        try:
-            self._stream.seek(self.hdr_len + self.num_pieces*self.PIECE_SIZE)
-            bytes = self._stream.read(self.sec_hdr_len)
-            offset = int32(bytes[4:])
-            while offset < len(bytes):
-                blocktype = bytes[offset:offset+4]
-                blockver  = u32(bytes[offset+4:])
-                if blocktype == 'CAOL':
-                    if blockver != 2:
-                        raise LitError(
-                            'Unknown CAOL block format %d' % blockver)
-                    self.creator_id     = u32(bytes[offset+12:])
-                    self.entry_chunklen = u32(bytes[offset+20:])
-                    self.count_chunklen = u32(bytes[offset+24:])
-                    self.entry_unknown  = u32(bytes[offset+28:])
-                    self.count_unknown  = u32(bytes[offset+32:])
-                    offset += 48
-                elif blocktype == 'ITSF':
-                    if blockver != 4:
-                        raise LitError(
-                            'Unknown ITSF block format %d' % blockver)
-                    if u32(bytes[offset+4+16:]):
-                        raise LitError('This file has a 64bit content offset')
-                    self.content_offset = u32(bytes[offset+16:])
-                    self.timestamp      = u32(bytes[offset+24:]) 
-                    self.language_id    = u32(bytes[offset+28:])
-                    offset += 48
-            if not hasattr(self, 'content_offset'):
-                raise LitError('Could not figure out the content offset')
-        finally:
-            self._stream.seek(opos)
 
+    @preserve
+    def read_secondary_header(self):
+        self._stream.seek(self.hdr_len + self.num_pieces*self.PIECE_SIZE)
+        bytes = self._stream.read(self.sec_hdr_len)
+        offset = int32(bytes[4:])
+        while offset < len(bytes):
+            blocktype = bytes[offset:offset+4]
+            blockver  = u32(bytes[offset+4:])
+            if blocktype == 'CAOL':
+                if blockver != 2:
+                    raise LitError(
+                        'Unknown CAOL block format %d' % blockver)
+                self.creator_id     = u32(bytes[offset+12:])
+                self.entry_chunklen = u32(bytes[offset+20:])
+                self.count_chunklen = u32(bytes[offset+24:])
+                self.entry_unknown  = u32(bytes[offset+28:])
+                self.count_unknown  = u32(bytes[offset+32:])
+                offset += 48
+            elif blocktype == 'ITSF':
+                if blockver != 4:
+                    raise LitError(
+                        'Unknown ITSF block format %d' % blockver)
+                if u32(bytes[offset+4+16:]):
+                    raise LitError('This file has a 64bit content offset')
+                self.content_offset = u32(bytes[offset+16:])
+                self.timestamp      = u32(bytes[offset+24:]) 
+                self.language_id    = u32(bytes[offset+28:])
+                offset += 48
+        if not hasattr(self, 'content_offset'):
+            raise LitError('Could not figure out the content offset')
+    
+    @preserve
     def read_header_pieces(self):
-        opos = self._stream.tell()
-        try:
-            src = self.header[self.hdr_len:]
-            for i in range(self.num_pieces):
-                piece = src[i*self.PIECE_SIZE:(i+1)*self.PIECE_SIZE]
-                if u32(piece[4:]) != 0 or u32(piece[12:]) != 0:
-                    raise LitError('Piece %s has 64bit value' % repr(piece))
-                offset, size = u32(piece), int32(piece[8:])
-                self._stream.seek(offset)
-                piece = self._stream.read(size)
-                if i == 0:
-                    continue # Dont need this piece
-                elif i == 1:
-                    if u32(piece[8:])  != self.entry_chunklen or \
-                       u32(piece[12:]) != self.entry_unknown:
-                        raise LitError('Secondary header does not match piece')
-                    self.read_directory(piece)
-                elif i == 2:
-                    if u32(piece[8:])  != self.count_chunklen or \
-                       u32(piece[12:]) != self.count_unknown:
-                        raise LitError('Secondary header does not match piece')
-                    continue # No data needed from this piece
-                elif i == 3:
-                    self.piece3_guid = piece
-                elif i == 4:
-                    self.piece4_guid = piece
-        finally:
-            self._stream.seek(opos)
+        src = self.header[self.hdr_len:]
+        for i in range(self.num_pieces):
+            piece = src[i*self.PIECE_SIZE:(i+1)*self.PIECE_SIZE]
+            if u32(piece[4:]) != 0 or u32(piece[12:]) != 0:
+                raise LitError('Piece %s has 64bit value' % repr(piece))
+            offset, size = u32(piece), int32(piece[8:])
+            self._stream.seek(offset)
+            piece = self._stream.read(size)
+            if i == 0:
+                continue # Dont need this piece
+            elif i == 1:
+                if u32(piece[8:])  != self.entry_chunklen or \
+                   u32(piece[12:]) != self.entry_unknown:
+                    raise LitError('Secondary header does not match piece')
+                self.read_directory(piece)
+            elif i == 2:
+                if u32(piece[8:])  != self.count_chunklen or \
+                   u32(piece[12:]) != self.count_unknown:
+                    raise LitError('Secondary header does not match piece')
+                continue # No data needed from this piece
+            elif i == 3:
+                self.piece3_guid = piece
+            elif i == 4:
+                self.piece4_guid = piece
                 
     def read_directory(self, piece):
         self.entries = []
@@ -521,108 +515,88 @@ class LitFile(object):
             
             if not hasattr(self, 'manifest'):
                 raise LitError('Lit file does not have a valid manifest')
-                
-    def read_section_names(self, entry):
-        opos = self._stream.tell()
-        try:
-            self._stream.seek(self.content_offset + entry.offset)
-            raw = self._stream.read(entry.size)
-            if len(raw) < 4:
-                raise LitError('Invalid Namelist section')
-            pos = 4
-            self.num_sections = u16(raw[2:pos])
-            
-            self.sections = {}
-            for section in range(self.num_sections):
-                size = u16(raw[pos:pos+2])
-                pos += 2
-                size = size*2 + 2
-                if pos + size > len(raw):
-                    raise LitError('Invalid Namelist section')
-                self.sections[section] = raw[pos:pos+size].decode('utf-16-le')
-                pos += size                
-        finally:
-            self._stream.seek(opos)
-                
-    def read_manifest(self, entry):
-        opos = self._stream.tell()
-        try:
-            self.manifest = []
-            self._stream.seek(self.content_offset + entry.offset)
-            raw = self._stream.read(entry.size)
-            pos = 0
-            while pos < len(raw):
-                size = ord(raw[pos])
-                if size == 0: break
-                pos += 1
-                root = raw[pos:pos+size].decode('utf8')
-                pos += size
-                if pos >= len(raw):
-                    raise LitError('Truncated manifest.')
-                for state in ['spine', 'not spine', 'css', 'images']:
-                    num_files = int32(raw[pos:pos+4])
-                    pos += 4
-                    if num_files == 0: continue
-                    
-                    i = 0
-                    while i < num_files:
-                        if pos+5 >= len(raw):
-                            raise LitError('Truncated manifest.')
-                        offset = u32(raw[pos:pos+4])
-                        pos += 4
-                        
-                        slen = ord(raw[pos])
-                        pos += 1
-                        internal = raw[pos:pos+slen].decode('utf8')
-                        pos += slen
-                        
-                        slen = ord(raw[pos])
-                        pos += 1
-                        original = raw[pos:pos+slen].decode('utf8')
-                        pos += slen
-                        
-                        slen = ord(raw[pos])
-                        pos += 1
-                        mime_type = raw[pos:pos+slen].decode('utf8')
-                        pos += slen + 1
-                        
-                        self.manifest.append(
-                            ManifestItem(original, internal, mime_type,
-                                         offset, root, state))
-                        i += 1
-        finally:
-            self._stream.seek(opos)        
-            
-    def read_meta(self, entry):
-        opos = self._stream.tell()
-        try:
-            self._stream.seek(self.content_offset + entry.offset)
-            raw = self._stream.read(entry.size)
 
-            xml = \
-'''\
-<?xml version="1.0" encoding="UTF-8" ?>
-<!DOCTYPE package
-  PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN"
-  "http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd">
-'''+\
-                unicode(UnBinary(raw, self.manifest))
-            self.meta = xml
-        finally:
-            self._stream.seek(opos)
-            
+    @preserve
+    def read_section_names(self, entry):
+        self._stream.seek(self.content_offset + entry.offset)
+        raw = self._stream.read(entry.size)
+        if len(raw) < 4:
+            raise LitError('Invalid Namelist section')
+        pos = 4
+        self.num_sections = u16(raw[2:pos])
+        
+        self.sections = {}
+        for section in range(self.num_sections):
+            size = u16(raw[pos:pos+2])
+            pos += 2
+            size = size*2 + 2
+            if pos + size > len(raw):
+                raise LitError('Invalid Namelist section')
+            self.sections[section] = raw[pos:pos+size].decode('utf-16-le')
+            pos += size                
+
+    @preserve
+    def read_manifest(self, entry):
+        self.manifest = []
+        self._stream.seek(self.content_offset + entry.offset)
+        raw = self._stream.read(entry.size)
+        pos = 0
+        while pos < len(raw):
+            size = ord(raw[pos])
+            if size == 0: break
+            pos += 1
+            root = raw[pos:pos+size].decode('utf8')
+            pos += size
+            if pos >= len(raw):
+                raise LitError('Truncated manifest.')
+            for state in ['spine', 'not spine', 'css', 'images']:
+                num_files = int32(raw[pos:pos+4])
+                pos += 4
+                if num_files == 0: continue
+                
+                i = 0
+                while i < num_files:
+                    if pos+5 >= len(raw):
+                        raise LitError('Truncated manifest.')
+                    offset = u32(raw[pos:pos+4])
+                    pos += 4
+                    
+                    slen = ord(raw[pos])
+                    pos += 1
+                    internal = raw[pos:pos+slen].decode('utf8')
+                    pos += slen
+                    
+                    slen = ord(raw[pos])
+                    pos += 1
+                    original = raw[pos:pos+slen].decode('utf8')
+                    pos += slen
+                    
+                    slen = ord(raw[pos])
+                    pos += 1
+                    mime_type = raw[pos:pos+slen].decode('utf8')
+                    pos += slen + 1
+                    
+                    self.manifest.append(
+                        ManifestItem(original, internal, mime_type,
+                                     offset, root, state))
+                    i += 1
+
+    @preserve
+    def read_meta(self, entry):
+        self._stream.seek(self.content_offset + entry.offset)
+        raw = self._stream.read(entry.size)
+        xml = OPF_DECL + unicode(UnBinary(raw, self.manifest))
+        self.meta = xml
+
+    @preserve
     def read_image(self, internal_name):
         cover_entry = None
         for entry in self.entries:
             if internal_name in entry.name:
                 cover_entry = entry
                 break
-        opos = self._stream.tell()
-        try:
-            self._stream.seek(self.content_offset + cover_entry.offset)
-            return self._stream.read(cover_entry.size)
-        finally:
-            self._stream.seek(opos)
+        self._stream.seek(self.content_offset + cover_entry.offset)
+        return self._stream.read(cover_entry.size)
 
 def get_metadata(stream):
     try:

From a48282500fb831dc8019a69068700a98c6d8a90d Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Wed, 16 Jul 2008 15:00:47 -0400
Subject: [PATCH 03/19] Checkpoint for changing computers

---
 src/calibre/ebooks/lit/reader.py | 65 ++++++++++++++++++++------------
 1 file changed, 41 insertions(+), 24 deletions(-)

diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py
index 711aef6586..4d149042cc 100644
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@@ -25,20 +25,6 @@ XHTML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
  "http://openebook.org/dtds/oeb-1.0.1/oebdoc101.dtd">
 """
 
-class DirectoryEntry(object):
-    def __init__(self, name, section, offset, size):
-        self.name = name
-        self.section = section
-        self.offset = offset
-        self.size = size
-        
-    def __repr__(self):
-        return "<DirectoryEntry name='%s' section='%d' offset='%d' size='%d'>" \
-            % (self.name, self.section, self.offset, self.size)
-        
-    def __str__(self):
-        return repr(self)
-
 def u32(bytes):
     return struct.unpack('<L', bytes[:4])[0]
 
@@ -302,6 +288,20 @@ class UnBinary(object):
                     state = 'get attr'
         return index
     
+class DirectoryEntry(object):
+    def __init__(self, name, section, offset, size):
+        self.name = name
+        self.section = section
+        self.offset = offset
+        self.size = size
+        
+    def __repr__(self):
+        return "DirectoryEntry(name=%s, section=%d, offset=%d, size=%d)" \
+            % (repr(self.name), self.section, self.offset, self.size)
+        
+    def __str__(self):
+        return repr(self)
+
 class ManifestItem(object):
     def __init__(self, original, internal, mime_type, offset, root, state):
         self.original = original
@@ -310,8 +310,7 @@ class ManifestItem(object):
         self.offset = offset
         self.root = root
         self.state = state
-        self.prefix = 'images' \
-            if state == 'images' else 'css' if state == 'css' else ''
+        self.prefix = state if state in ('images', 'css') else ''
         self.prefix = self.prefix + os.sep if self.prefix else ''
         self.path = self.prefix + self.original
         
@@ -321,7 +320,8 @@ class ManifestItem(object):
         return self.internal == other
     
     def __repr__(self):
-        return self.internal + u'->' + self.path 
+        return "ManifestItem(internal='%s', path='%s')" \
+            % (repr(self.internal), repr(self.path))
 
 def preserve(function):
     def wrapper(self, *args, **kwargs):
@@ -382,6 +382,7 @@ class LitFile(object):
             return self._stream.read(16)
         return property(fget=fget)
     guid = guid()
+
     
     def header():
         @preserve
@@ -403,6 +404,19 @@ class LitFile(object):
         self.read_secondary_header()
         self.read_header_pieces()
 
+    @preserve
+    def __len__(self):
+        self._stream.seek(0, 2)
+        return self._stream.tell()
+
+    @preserve
+    def _read_raw(self, offset, size):
+        self._stream.seek(offset)
+        return self._stream.read(size)
+
+    def _read_content(self, offset, size):
+        return self._read_raw(self.content_offset + offset, size)
+    
     @preserve
     def read_secondary_header(self):
         self._stream.seek(self.hdr_len + self.num_pieces*self.PIECE_SIZE)
@@ -462,7 +476,7 @@ class LitFile(object):
                 self.piece4_guid = piece
                 
     def read_directory(self, piece):
-        self.entries = []
+        self.entries = {}
         if not piece.startswith('IFCM'):
             raise LitError('Header piece #1 is not main directory.')
         chunk_size, num_chunks = int32(piece[8:12]), int32(piece[24:28])
@@ -507,7 +521,7 @@ class LitFile(object):
                     self.read_manifest(entry)
                 elif name == '/meta':
                     self.read_meta(entry)
-                self.entries.append(entry)
+                self.entries[name] = entry
                 i += 1
             
             if not hasattr(self, 'sections'):
@@ -590,14 +604,17 @@ class LitFile(object):
 
     @preserve
     def read_image(self, internal_name):
-        cover_entry = None
-        for entry in self.entries:
-            if internal_name in entry.name:
-                cover_entry = entry
-                break
+        cover_entry = self.entries[internal_name]
         self._stream.seek(self.content_offset + cover_entry.offset)
         return self._stream.read(cover_entry.size)
 
+    def get_file(self, name):
+        entry = self.entries[name]
+        if entry.section == 0:
+            return self._read_content(entry.offset, entry.size)
+        section = self.get_section(entry.section)
+        return section[entry.offset:entry.offset+entry.size]
+
 def get_metadata(stream):
     try:
         litfile = LitFile(stream)

From 9cf4508547a499d7174dfb90cabd5945ba3b356d Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Thu, 17 Jul 2008 19:33:30 -0400
Subject: [PATCH 04/19] Checkpoint for switching computers

---
 src/calibre/ebooks/lit/msdes.py  | 481 +++++++++++++++++++++++++++++++
 src/calibre/ebooks/lit/mssha1.py |   8 +-
 src/calibre/ebooks/lit/reader.py | 118 ++++++--
 3 files changed, 579 insertions(+), 28 deletions(-)
 create mode 100644 src/calibre/ebooks/lit/msdes.py

diff --git a/src/calibre/ebooks/lit/msdes.py b/src/calibre/ebooks/lit/msdes.py
new file mode 100644
index 0000000000..5bc67b09bb
--- /dev/null
+++ b/src/calibre/ebooks/lit/msdes.py
@@ -0,0 +1,481 @@
+# Re-modified for use in MS LIT decryption.  Un-reversed the bytebit[] array.
+# Substituted Microsoft's absurd modified S-boxes.  Modified the encrypt/decrypt
+# methods to handle more than one block at a time.
+#
+# And lo, all the previous notices follow:
+
+# Modified DES encryption for VNC password authentication.
+# Ported from realvnc's java viewer by <cliechti@gmx.net>
+# I chose this package name because it is not compatible with the
+# original DES algorithm, e.g. found pycrypto.
+#
+# (C) 2003 chris <cliechti@gmx.net>
+# Released as free software under the Python License.
+#
+# You're free to use it for commercial and noncommercial
+# application, modify and redistribute it as long as the
+# copyright notices are intact. There are no warranties, not
+# even that it does what it says to do ;-)
+#
+# Original notice following:
+
+# This DES class has been extracted from package Acme.Crypto for use in VNC.
+# The bytebit[] array has been reversed so that the most significant bit
+# in each byte of the key is ignored, not the least significant.  Also the
+# unnecessary odd parity code has been removed.
+#
+# These changes are:
+#  Copyright (C) 1999 AT&T Laboratories Cambridge.  All Rights Reserved.
+#
+# This software is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+
+# DesCipher - the DES encryption method
+#
+# The meat of this code is by Dave Zimmerman <dzimm@widget.com>, and is:
+#
+# Copyright (c) 1996 Widget Workshop, Inc. All Rights Reserved.
+#
+# Permission to use, copy, modify, and distribute this software
+# and its documentation for NON-COMMERCIAL or COMMERCIAL purposes and
+# without fee is hereby granted, provided that this copyright notice is kept 
+# intact. 
+# 
+# WIDGET WORKSHOP MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY
+# OF THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+# TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+# PARTICULAR PURPOSE, OR NON-INFRINGEMENT. WIDGET WORKSHOP SHALL NOT BE LIABLE
+# FOR ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
+# DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
+# 
+# THIS SOFTWARE IS NOT DESIGNED OR INTENDED FOR USE OR RESALE AS ON-LINE
+# CONTROL EQUIPMENT IN HAZARDOUS ENVIRONMENTS REQUIRING FAIL-SAFE
+# PERFORMANCE, SUCH AS IN THE OPERATION OF NUCLEAR FACILITIES, AIRCRAFT
+# NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL, DIRECT LIFE
+# SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH THE FAILURE OF THE
+# SOFTWARE COULD LEAD DIRECTLY TO DEATH, PERSONAL INJURY, OR SEVERE
+# PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH RISK ACTIVITIES").  WIDGET WORKSHOP
+# SPECIFICALLY DISCLAIMS ANY EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR
+# HIGH RISK ACTIVITIES.
+#
+#
+# The rest is:
+#
+# Copyright (C) 1996 by Jef Poskanzer <jef@acme.com>.  All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# Visit the ACME Labs Java page for up-to-date versions of this and other
+# fine Java utilities: http://www.acme.com/java/
+
+
+#/ The DES encryption method.
+# <P>
+# This is surprisingly fast, for pure Java.  On a SPARC 20, wrapped
+# in Acme.Crypto.EncryptedOutputStream or Acme.Crypto.EncryptedInputStream,
+# it does around 7000 bytes/second.
+# <P>
+# Most of this code is by Dave Zimmerman <dzimm@widget.com>, and is
+# Copyright (c) 1996 Widget Workshop, Inc.  See the source file for details.
+# <P>
+# <A HREF="/resources/classes/Acme/Crypto/DesCipher.java">Fetch the software.</A><BR>
+# <A HREF="/resources/classes/Acme.tar.Z">Fetch the entire Acme package.</A>
+# <P>
+# @see Des3Cipher
+# @see EncryptedOutputStream
+# @see EncryptedInputStream
+
+import struct
+
+class DesCipher:
+    # Constructor, byte-array key.
+    def __init__(self, key):
+        self.setKey(key)
+
+    #/ Set the key.
+    def setKey(self, key):
+        self.encryptKeys = self.deskey([ord(x) for x in key], 1)
+        self.decryptKeys = self.deskey([ord(x) for x in key], 0)
+
+    # Turn an 8-byte key into internal keys.
+    def deskey(self, keyBlock, encrypting):
+        #~ int i, j, l, m, n;
+        pc1m = [0]*56   #new int[56];
+        pcr = [0]*56    #new int[56];
+        kn = [0]*32     #new int[32];
+
+        for j in range(56):
+            l = pc1[j]
+            m = l & 07
+            pc1m[j] = ((keyBlock[l >> 3] & bytebit[m]) != 0)
+        for i in range(16):
+            if encrypting:
+                m = i << 1
+            else:
+                m = (15-i) << 1
+            n = m + 1
+            kn[m] = kn[n] = 0
+            for j in range(28):
+                l = j + totrot[i]
+                if l < 28:
+                    pcr[j] = pc1m[l]
+                else:
+                    pcr[j] = pc1m[l - 28]
+            for j in range(28, 56):
+                l = j + totrot[i]
+                if l < 56:
+                    pcr[j] = pc1m[l]
+                else:
+                    pcr[j] = pc1m[l - 28]
+            for j in range(24):
+                if pcr[pc2[j]] != 0:
+                    kn[m] |= bigbyte[j]
+                if pcr[pc2[j+24]] != 0:
+                    kn[n] |= bigbyte[j]
+        return self.cookey(kn)
+
+    def cookey(self, raw):
+        #~ int raw0, raw1;
+        #~ int rawi, KnLi;
+        #~ int i;
+        KnL = [0]*32
+
+        rawi = 0
+        KnLi = 0
+        for i in range(16):
+            raw0 = raw[rawi]
+            rawi += 1
+            raw1 = raw[rawi]
+            rawi += 1
+            KnL[KnLi]  = (raw0 & 0x00fc0000L) <<  6
+            KnL[KnLi] |= (raw0 & 0x00000fc0L) << 10
+            KnL[KnLi] |= (raw1 & 0x00fc0000L) >> 10
+            KnL[KnLi] |= (raw1 & 0x00000fc0L) >>  6
+            KnLi += 1
+            KnL[KnLi]  = (raw0 & 0x0003f000L) << 12
+            KnL[KnLi] |= (raw0 & 0x0000003fL) << 16
+            KnL[KnLi] |= (raw1 & 0x0003f000L) >>  4
+            KnL[KnLi] |= (raw1 & 0x0000003fL)
+            KnLi += 1
+        return KnL
+
+    # Block encryption routines.
+    
+    #/ Encrypt a block of eight bytes.
+    def encrypt(self, clearText):
+        if len(clearText) % 8 != 0:
+            raise TypeError, "length must be multiple of block size"
+        result = []
+        while clearText:
+            result.append(struct.pack(
+                ">LL", *self.des(struct.unpack(">LL", clearText[:8]),
+                                 self.encryptKeys)))
+            clearText = clearText[8:]
+        return ''.join(result)
+
+    #/ Decrypt a block of eight bytes.
+    def decrypt(self, cipherText):
+        if len(cipherText) % 8 != 0:
+            raise TypeError, "length must be multiple of block size"
+        result = []
+        while cipherText:
+            result.append(struct.pack(
+                ">LL", *self.des(struct.unpack(">LL", cipherText[:8]),
+                                 self.decryptKeys)))
+            cipherText = cipherText[8:]
+        return ''.join(result)
+
+    # The DES function.
+    def des(self, (leftt, right), keys):
+        #~ int fval, work, right, leftt;
+        #~ int round
+        keysi = 0
+
+        work   = ((leftt >>  4) ^ right) & 0x0f0f0f0fL
+        right ^= work
+        leftt ^= (work << 4) & 0xffffffffL
+
+        work   = ((leftt >> 16) ^ right) & 0x0000ffffL
+        right ^= work
+        leftt ^= (work << 16) & 0xffffffffL
+
+        work   = ((right >>  2) ^ leftt) & 0x33333333L
+        leftt ^= work
+        right ^= (work << 2) & 0xffffffffL
+
+        work   = ((right >>  8) ^ leftt) & 0x00ff00ffL
+        leftt ^= work
+        right ^= (work << 8) & 0xffffffffL
+        right  = ((right << 1) | ((right >> 31) & 1)) & 0xffffffffL
+
+        work   = (leftt ^ right) & 0xaaaaaaaaL
+        leftt ^= work
+        right ^= work
+        leftt  = ((leftt << 1) | ((leftt >> 31) & 1)) & 0xffffffffL
+
+        for round in range(8):
+            work   = ((right << 28) | (right >> 4)) & 0xffffffffL
+            work  ^= keys[keysi]
+            keysi += 1
+            fval   = SP7[ work        & 0x0000003fL ]
+            fval  |= SP5[(work >>  8) & 0x0000003fL ]
+            fval  |= SP3[(work >> 16) & 0x0000003fL ]
+            fval  |= SP1[(work >> 24) & 0x0000003fL ]
+            work   = right ^ keys[keysi]
+            keysi += 1
+            fval  |= SP8[ work        & 0x0000003fL ]
+            fval  |= SP6[(work >>  8) & 0x0000003fL ]
+            fval  |= SP4[(work >> 16) & 0x0000003fL ]
+            fval  |= SP2[(work >> 24) & 0x0000003fL ]
+            leftt ^= fval
+            work   = ((leftt << 28) | (leftt >> 4)) & 0xffffffffL
+            work  ^= keys[keysi]
+            keysi += 1
+            fval   = SP7[ work        & 0x0000003fL ]
+            fval  |= SP5[(work >>  8) & 0x0000003fL ]
+            fval  |= SP3[(work >> 16) & 0x0000003fL ]
+            fval  |= SP1[(work >> 24) & 0x0000003fL ]
+            work   = leftt ^ keys[keysi]
+            keysi += 1
+            fval  |= SP8[ work        & 0x0000003fL ]
+            fval  |= SP6[(work >>  8) & 0x0000003fL ]
+            fval  |= SP4[(work >> 16) & 0x0000003fL ]
+            fval  |= SP2[(work >> 24) & 0x0000003fL ]
+            right ^= fval
+
+        right  = ((right << 31) | (right >> 1)) & 0xffffffffL
+        work   = (leftt ^ right) & 0xaaaaaaaaL
+        leftt ^= work
+        right ^= work
+        leftt  = ((leftt << 31) | (leftt >> 1)) & 0xffffffffL
+        work   = ((leftt >>  8) ^ right) & 0x00ff00ffL
+        right ^= work
+        leftt ^= (work << 8) & 0xffffffffL
+        work   = ((leftt >>  2) ^ right) & 0x33333333L
+        right ^= work
+        leftt ^= (work << 2) & 0xffffffffL
+        work   = ((right >> 16) ^ leftt) & 0x0000ffffL
+        leftt ^= work
+        right ^= (work << 16) & 0xffffffffL
+        work   = ((right >>  4) ^ leftt) & 0x0f0f0f0fL
+        leftt ^= work
+        right ^= (work << 4) & 0xffffffffL
+        return right, leftt
+
+# Tables, permutations, S-boxes, etc.
+
+bytebit = [0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01]
+
+bigbyte = [
+    0x800000, 0x400000, 0x200000, 0x100000,
+    0x080000, 0x040000, 0x020000, 0x010000,
+    0x008000, 0x004000, 0x002000, 0x001000,
+    0x000800, 0x000400, 0x000200, 0x000100,
+    0x000080, 0x000040, 0x000020, 0x000010,
+    0x000008, 0x000004, 0x000002, 0x000001
+]
+
+pc1 = [
+    56, 48, 40, 32, 24, 16,  8,
+     0, 57, 49, 41, 33, 25, 17,
+     9,  1, 58, 50, 42, 34, 26,
+    18, 10,  2, 59, 51, 43, 35,
+    62, 54, 46, 38, 30, 22, 14,
+     6, 61, 53, 45, 37, 29, 21,
+    13,  5, 60, 52, 44, 36, 28,
+    20, 12,  4, 27, 19, 11, 3
+]
+
+totrot = [
+    1, 2, 4, 6, 8, 10, 12, 14, 15, 17, 19, 21, 23, 25, 27, 28
+]
+
+pc2 = [
+    13, 16, 10, 23,  0,  4,
+    2, 27, 14,  5, 20,  9,
+    22, 18, 11, 3 , 25,  7,
+    15,  6, 26, 19, 12,  1,
+    40, 51, 30, 36, 46, 54,
+    29, 39, 50, 44, 32, 47,
+    43, 48, 38, 55, 33, 52,
+    45, 41, 49, 35, 28, 31,
+]
+
+SP1 = [
+0x02080800L, 0x00080000L, 0x02000002L, 0x02080802L,
+0x02000000L, 0x00080802L, 0x00080002L, 0x02000002L,
+0x00080802L, 0x02080800L, 0x02080000L, 0x00000802L,
+0x02000802L, 0x02000000L, 0x00000000L, 0x00080002L,
+0x00080000L, 0x00000002L, 0x02000800L, 0x00080800L,
+0x02080802L, 0x02080000L, 0x00000802L, 0x02000800L,
+0x00000002L, 0x00000800L, 0x00080800L, 0x02080002L,
+0x00000800L, 0x02000802L, 0x02080002L, 0x00000000L,
+0x00000000L, 0x02080802L, 0x02000800L, 0x00080002L,
+0x02080800L, 0x00080000L, 0x00000802L, 0x02000800L,
+0x02080002L, 0x00000800L, 0x00080800L, 0x02000002L,
+0x00080802L, 0x00000002L, 0x02000002L, 0x02080000L,
+0x02080802L, 0x00080800L, 0x02080000L, 0x02000802L,
+0x02000000L, 0x00000802L, 0x00080002L, 0x00000000L,
+0x00080000L, 0x02000000L, 0x02000802L, 0x02080800L,
+0x00000002L, 0x02080002L, 0x00000800L, 0x00080802L
+]
+SP2 = [
+0x40108010L, 0x00000000L, 0x00108000L, 0x40100000L,
+0x40000010L, 0x00008010L, 0x40008000L, 0x00108000L,
+0x00008000L, 0x40100010L, 0x00000010L, 0x40008000L,
+0x00100010L, 0x40108000L, 0x40100000L, 0x00000010L,
+0x00100000L, 0x40008010L, 0x40100010L, 0x00008000L,
+0x00108010L, 0x40000000L, 0x00000000L, 0x00100010L,
+0x40008010L, 0x00108010L, 0x40108000L, 0x40000010L,
+0x40000000L, 0x00100000L, 0x00008010L, 0x40108010L,
+0x00100010L, 0x40108000L, 0x40008000L, 0x00108010L,
+0x40108010L, 0x00100010L, 0x40000010L, 0x00000000L,
+0x40000000L, 0x00008010L, 0x00100000L, 0x40100010L,
+0x00008000L, 0x40000000L, 0x00108010L, 0x40008010L,
+0x40108000L, 0x00008000L, 0x00000000L, 0x40000010L,
+0x00000010L, 0x40108010L, 0x00108000L, 0x40100000L,
+0x40100010L, 0x00100000L, 0x00008010L, 0x40008000L,
+0x40008010L, 0x00000010L, 0x40100000L, 0x00108000L
+]
+SP3 = [
+0x04000001L, 0x04040100L, 0x00000100L, 0x04000101L,
+0x00040001L, 0x04000000L, 0x04000101L, 0x00040100L,
+0x04000100L, 0x00040000L, 0x04040000L, 0x00000001L,
+0x04040101L, 0x00000101L, 0x00000001L, 0x04040001L,
+0x00000000L, 0x00040001L, 0x04040100L, 0x00000100L,
+0x00000101L, 0x04040101L, 0x00040000L, 0x04000001L,
+0x04040001L, 0x04000100L, 0x00040101L, 0x04040000L,
+0x00040100L, 0x00000000L, 0x04000000L, 0x00040101L,
+0x04040100L, 0x00000100L, 0x00000001L, 0x00040000L,
+0x00000101L, 0x00040001L, 0x04040000L, 0x04000101L,
+0x00000000L, 0x04040100L, 0x00040100L, 0x04040001L,
+0x00040001L, 0x04000000L, 0x04040101L, 0x00000001L,
+0x00040101L, 0x04000001L, 0x04000000L, 0x04040101L,
+0x00040000L, 0x04000100L, 0x04000101L, 0x00040100L,
+0x04000100L, 0x00000000L, 0x04040001L, 0x00000101L,
+0x04000001L, 0x00040101L, 0x00000100L, 0x04040000L
+]
+SP4 = [
+0x00401008L, 0x10001000L, 0x00000008L, 0x10401008L,
+0x00000000L, 0x10400000L, 0x10001008L, 0x00400008L,
+0x10401000L, 0x10000008L, 0x10000000L, 0x00001008L,
+0x10000008L, 0x00401008L, 0x00400000L, 0x10000000L,
+0x10400008L, 0x00401000L, 0x00001000L, 0x00000008L,
+0x00401000L, 0x10001008L, 0x10400000L, 0x00001000L,
+0x00001008L, 0x00000000L, 0x00400008L, 0x10401000L,
+0x10001000L, 0x10400008L, 0x10401008L, 0x00400000L,
+0x10400008L, 0x00001008L, 0x00400000L, 0x10000008L,
+0x00401000L, 0x10001000L, 0x00000008L, 0x10400000L,
+0x10001008L, 0x00000000L, 0x00001000L, 0x00400008L,
+0x00000000L, 0x10400008L, 0x10401000L, 0x00001000L,
+0x10000000L, 0x10401008L, 0x00401008L, 0x00400000L,
+0x10401008L, 0x00000008L, 0x10001000L, 0x00401008L,
+0x00400008L, 0x00401000L, 0x10400000L, 0x10001008L,
+0x00001008L, 0x10000000L, 0x10000008L, 0x10401000L
+]
+SP5 = [
+0x08000000L, 0x00010000L, 0x00000400L, 0x08010420L,
+0x08010020L, 0x08000400L, 0x00010420L, 0x08010000L,
+0x00010000L, 0x00000020L, 0x08000020L, 0x00010400L,
+0x08000420L, 0x08010020L, 0x08010400L, 0x00000000L,
+0x00010400L, 0x08000000L, 0x00010020L, 0x00000420L,
+0x08000400L, 0x00010420L, 0x00000000L, 0x08000020L,
+0x00000020L, 0x08000420L, 0x08010420L, 0x00010020L,
+0x08010000L, 0x00000400L, 0x00000420L, 0x08010400L,
+0x08010400L, 0x08000420L, 0x00010020L, 0x08010000L,
+0x00010000L, 0x00000020L, 0x08000020L, 0x08000400L,
+0x08000000L, 0x00010400L, 0x08010420L, 0x00000000L,
+0x00010420L, 0x08000000L, 0x00000400L, 0x00010020L,
+0x08000420L, 0x00000400L, 0x00000000L, 0x08010420L,
+0x08010020L, 0x08010400L, 0x00000420L, 0x00010000L,
+0x00010400L, 0x08010020L, 0x08000400L, 0x00000420L,
+0x00000020L, 0x00010420L, 0x08010000L, 0x08000020L
+]
+SP6 = [
+0x80000040L, 0x00200040L, 0x00000000L, 0x80202000L,
+0x00200040L, 0x00002000L, 0x80002040L, 0x00200000L,
+0x00002040L, 0x80202040L, 0x00202000L, 0x80000000L,
+0x80002000L, 0x80000040L, 0x80200000L, 0x00202040L,
+0x00200000L, 0x80002040L, 0x80200040L, 0x00000000L,
+0x00002000L, 0x00000040L, 0x80202000L, 0x80200040L,
+0x80202040L, 0x80200000L, 0x80000000L, 0x00002040L,
+0x00000040L, 0x00202000L, 0x00202040L, 0x80002000L,
+0x00002040L, 0x80000000L, 0x80002000L, 0x00202040L,
+0x80202000L, 0x00200040L, 0x00000000L, 0x80002000L,
+0x80000000L, 0x00002000L, 0x80200040L, 0x00200000L,
+0x00200040L, 0x80202040L, 0x00202000L, 0x00000040L,
+0x80202040L, 0x00202000L, 0x00200000L, 0x80002040L,
+0x80000040L, 0x80200000L, 0x00202040L, 0x00000000L,
+0x00002000L, 0x80000040L, 0x80002040L, 0x80202000L,
+0x80200000L, 0x00002040L, 0x00000040L, 0x80200040L,
+]
+SP7 = [
+0x00004000L, 0x00000200L, 0x01000200L, 0x01000004L,
+0x01004204L, 0x00004004L, 0x00004200L, 0x00000000L,
+0x01000000L, 0x01000204L, 0x00000204L, 0x01004000L,
+0x00000004L, 0x01004200L, 0x01004000L, 0x00000204L,
+0x01000204L, 0x00004000L, 0x00004004L, 0x01004204L,
+0x00000000L, 0x01000200L, 0x01000004L, 0x00004200L,
+0x01004004L, 0x00004204L, 0x01004200L, 0x00000004L,
+0x00004204L, 0x01004004L, 0x00000200L, 0x01000000L,
+0x00004204L, 0x01004000L, 0x01004004L, 0x00000204L,
+0x00004000L, 0x00000200L, 0x01000000L, 0x01004004L,
+0x01000204L, 0x00004204L, 0x00004200L, 0x00000000L,
+0x00000200L, 0x01000004L, 0x00000004L, 0x01000200L,
+0x00000000L, 0x01000204L, 0x01000200L, 0x00004200L,
+0x00000204L, 0x00004000L, 0x01004204L, 0x01000000L,
+0x01004200L, 0x00000004L, 0x00004004L, 0x01004204L,
+0x01000004L, 0x01004200L, 0x01004000L, 0x00004004L,
+]
+SP8 = [
+0x20800080L, 0x20820000L, 0x00020080L, 0x00000000L,
+0x20020000L, 0x00800080L, 0x20800000L, 0x20820080L,
+0x00000080L, 0x20000000L, 0x00820000L, 0x00020080L,
+0x00820080L, 0x20020080L, 0x20000080L, 0x20800000L,
+0x00020000L, 0x00820080L, 0x00800080L, 0x20020000L,
+0x20820080L, 0x20000080L, 0x00000000L, 0x00820000L,
+0x20000000L, 0x00800000L, 0x20020080L, 0x20800080L,
+0x00800000L, 0x00020000L, 0x20820000L, 0x00000080L,
+0x00800000L, 0x00020000L, 0x20000080L, 0x20820080L,
+0x00020080L, 0x20000000L, 0x00000000L, 0x00820000L,
+0x20800080L, 0x20020080L, 0x20020000L, 0x00800080L,
+0x20820000L, 0x00000080L, 0x00800080L, 0x20020000L,
+0x20820080L, 0x00800000L, 0x20800000L, 0x20000080L,
+0x00820000L, 0x00020080L, 0x20020080L, 0x20800000L,
+0x00000080L, 0x20820000L, 0x00820080L, 0x00000000L,
+0x20000000L, 0x20800080L, 0x00020000L, 0x00820080L,
+]
+
+def new(key):
+    return DesCipher(key)
+
+block_size = 8
+key_size = 8
+
+#test only:
+if __name__ == '__main__':
+    des = DesCipher("\x01\x23\x45\x67\x89\xab\xcd\xef")
+    print ''.join(
+        "%02x" % ord(x) for x in des.encrypt("Now is t"))
+    
diff --git a/src/calibre/ebooks/lit/mssha1.py b/src/calibre/ebooks/lit/mssha1.py
index f6f7c33444..d61bd39094 100644
--- a/src/calibre/ebooks/lit/mssha1.py
+++ b/src/calibre/ebooks/lit/mssha1.py
@@ -123,7 +123,7 @@ K = [
     0xCA62C1D6L  # (60 <= t <= 79)
     ]
 
-class sha:
+class mssha1(object):
     "An implementation of the MD5 hash function in pure Python."
 
     def __init__(self):
@@ -186,7 +186,7 @@ class sha:
     def update(self, inBuf):
         """Add to the current message.
 
-        Update the sha object with the string arg. Repeated calls
+        Update the mssha1 object with the string arg. Repeated calls
         are equivalent to a single call with the concatenation of all
         the arguments, i.e. s.update(a); s.update(b) is equivalent
         to s.update(a+b).
@@ -308,12 +308,12 @@ digest_size = digestsize = 20
 blocksize = 1
 
 def new(arg=None):
-    """Return a new sha crypto object.
+    """Return a new mssha1 crypto object.
 
     If arg is present, the method call update(arg) is made.
     """
 
-    crypto = sha()
+    crypto = mssha1()
     if arg:
         crypto.update(arg)
 
diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py
index 4d149042cc..2608d63399 100644
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@@ -13,6 +13,8 @@ from calibre.ebooks.metadata import MetaInformation
 from calibre.ebooks.metadata.opf import OPFReader
 from calibre.ebooks.lit import LitError
 from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
+import calibre.ebooks.lit.mssha1 as mssha1
+import calibre.ebooks.lit.msdes as msdes
 
 OPF_DECL = """"<?xml version="1.0" encoding="UTF-8" ?>
 <!DOCTYPE package 
@@ -25,6 +27,9 @@ XHTML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
  "http://openebook.org/dtds/oeb-1.0.1/oebdoc101.dtd">
 """
 
+DESENCRYPT_GUID = "{67F6E4A2-60BF-11D3-8540-00C04F58C3CF}"
+LZXCOMPRESS_GUID = "{0A9007C6-4076-11D3-8789-0000F8105754}"
+
 def u32(bytes):
     return struct.unpack('<L', bytes[:4])[0]
 
@@ -45,6 +50,10 @@ def encint(bytes, remaining):
         if b & 0x80 == 0: break
     return val, bytes[pos:], remaining 
 
+def msguid(bytes):
+    values = struct.unpack("<LHHBBBBBBBB", bytes[:16])
+    return "{%08lX-%04X-%04X-%02X%02X-%02X%02X%02X%02X%02X%02X}" % values
+
 def read_utf8_char(bytes, pos):
     c = ord(bytes[pos])
     mask = 0x80
@@ -491,13 +500,11 @@ class LitFile(object):
             remaining = chunk_size - int32(piece[p+4:p+8]) - 48
             if remaining < 0:
                 raise LitError('AOLL remaining count is negative')
-            
             entries = u16(piece[p+chunk_size-2:])
             if entries <= 0:            
                 # Hopefully everything will work even without a correct entries
                 # count
                 entries = (2 ** 16) - 1
-            
             piece = piece[p+48:]
             i = 0
             while i < entries:
@@ -523,37 +530,33 @@ class LitFile(object):
                     self.read_meta(entry)
                 self.entries[name] = entry
                 i += 1
-            
-            if not hasattr(self, 'sections'):
+            if not hasattr(self, 'section_names'):
                 raise LitError('Lit file does not have a valid NameList')
-            
             if not hasattr(self, 'manifest'):
                 raise LitError('Lit file does not have a valid manifest')
+            self.read_drm()
 
-    @preserve
     def read_section_names(self, entry):
-        self._stream.seek(self.content_offset + entry.offset)
-        raw = self._stream.read(entry.size)
+        raw = self._read_content(entry.offset, entry.size)
         if len(raw) < 4:
             raise LitError('Invalid Namelist section')
         pos = 4
         self.num_sections = u16(raw[2:pos])
-        
-        self.sections = {}
+        self.section_names = [""]*self.num_sections
+        self.section_data = [None]*self.num_sections
         for section in range(self.num_sections):
             size = u16(raw[pos:pos+2])
             pos += 2
             size = size*2 + 2
             if pos + size > len(raw):
                 raise LitError('Invalid Namelist section')
-            self.sections[section] = raw[pos:pos+size].decode('utf-16-le')
-            pos += size                
+            self.section_names[section] = \
+                raw[pos:pos+size].decode('utf-16-le').rstrip('\000')
+            pos += size
 
-    @preserve
     def read_manifest(self, entry):
         self.manifest = []
-        self._stream.seek(self.content_offset + entry.offset)
-        raw = self._stream.read(entry.size)
+        raw = self._read_content(entry.offset, entry.size)
         pos = 0
         while pos < len(raw):
             size = ord(raw[pos])
@@ -595,19 +598,52 @@ class LitFile(object):
                                      offset, root, state))
                     i += 1
 
-    @preserve
     def read_meta(self, entry):
-        self._stream.seek(self.content_offset + entry.offset)
-        raw = self._stream.read(entry.size)
+        raw = self._read_content(entry.offset, entry.size)
         xml = OPF_DECL + unicode(UnBinary(raw, self.manifest))
         self.meta = xml
 
-    @preserve
-    def read_image(self, internal_name):
-        cover_entry = self.entries[internal_name]
-        self._stream.seek(self.content_offset + cover_entry.offset)
-        return self._stream.read(cover_entry.size)
+    def read_drm(self):
+        def exists_file(name):
+            try: self.get_file(name)
+            except KeyError: return False
+            return True
+        self.drmlevel = 0
+        if exists_file('/DRMStorage/Licenses/EUL'):
+            self.drmlevel = 5
+        elif exists_file('/DRMStorage/DRMBookplate'):
+            self.drmlevel = 3
+        elif exists_file('/DRMStorage/DRMSealed'):
+            self.drmlevel = 1
+        else:
+            return
+        des = msdes.new(self.calculate_deskey())
+        bookkey = des.decrypt(self.get_file('/DRMStorage/DRMSealed'))
+        if bookkey[0] != '\000':
+            raise LitError('Unable to decrypt title key!')
+        self.bookkey = bookkey[1:9]
 
+    def calculate_deskey(self):
+        hashfiles = ['/meta', '/DRMStorage/DRMSource']
+        if self.drmlevel == 3:
+            hashfiles.append('/DRMStorage/DRMBookplate')
+        prepad = 2
+        hash = mssha1.new()
+        for name in hashfiles:
+            data = self.get_file(name)
+            if prepad > 0:
+                data = ("\000" * prepad) + data
+                prepad = 0
+            postpad = 64 - (len(data) % 64)
+            if postpad < 64:
+                data = data + ("\000" * postpad)
+            hash.update(data)
+        digest = hash.digest()
+        key = [0] * 8
+        for i in xrange(0, len(digest)):
+            key[i % 8] ^= ord(digest[i])
+        return ''.join(chr(x) for x in key)
+        
     def get_file(self, name):
         entry = self.entries[name]
         if entry.section == 0:
@@ -615,6 +651,40 @@ class LitFile(object):
         section = self.get_section(entry.section)
         return section[entry.offset:entry.offset+entry.size]
 
+    def get_section(self, section):
+        data = self.section_data[section]
+        if not data:
+            data = self._get_section(section)
+            self.section_data[section] = data
+        return data
+
+    def _get_section(self, section):
+        name = self.section_names[section]
+        path = '::DataSpace/Storage/' + name
+        transform = self.get_file(path + '/Transform/List')
+        content = self.get_file(path + '/Content')
+        control = self.get_file(path + '/ControlData')
+        idx_transform = idx_control = 0
+        while (len(transform) - idx_transform) >= 16:
+            ndwords = int32(control[idx_control:]) + 1
+            if (idx_control + (ndwords * 4)) > len(control) or ndwords <= 0:
+                raise LitError("ControlData is too short")
+            guid = msguid(transform[idx_transform:])
+            if guid == DESENCRYPT_GUID:
+                content = self._decrypt(content)
+                idx_control += ndwords * 4
+            elif guid == LZXCOMPRESS_GUID:
+                raise LitError("LZX decompression not implemented")
+            else:
+                raise LitError("Unrecognized transform: %s." % repr(guid))
+            idx_transform += 16
+        return content
+
+    def _decrypt(self, content):
+        if self.drmlevel == 5:
+            raise LitError('Cannot extract content from a DRM protected ebook')
+        return msdes.new(self.bookkey).decrypt(content)
+
 def get_metadata(stream):
     try:
         litfile = LitFile(stream)
@@ -632,7 +702,7 @@ def get_metadata(stream):
                 ext = 'jpg'
             else:
                 ext = ext.lower()
-            cd = litfile.read_image(cover_item)
+            cd = litfile.get_file(cover_item)
             mi.cover_data = (ext, cd) if cd else (None, None)            
     except:
         title = stream.name if hasattr(stream, 'name') and stream.name else 'Unknown'

From 4eeae13b3508d743fcb2f007fe3b352b87c9acc5 Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Thu, 17 Jul 2008 23:14:59 -0400
Subject: [PATCH 05/19] Checkpoint before sleep

---
 src/calibre/ebooks/lit/lzxd.py | 138 +++++++++++++++++++++++++++++++++
 1 file changed, 138 insertions(+)
 create mode 100644 src/calibre/ebooks/lit/lzxd.py

diff --git a/src/calibre/ebooks/lit/lzxd.py b/src/calibre/ebooks/lit/lzxd.py
new file mode 100644
index 0000000000..a09daf012b
--- /dev/null
+++ b/src/calibre/ebooks/lit/lzxd.py
@@ -0,0 +1,138 @@
+import copy
+
+# some constants defined by the LZX specification
+MIN_MATCH = 2
+MAX_MATCH = 257
+NUM_CHARS = 256
+BLOCKTYPE_INVALID = 0  # also blocktypes 4-7 invalid
+BLOCKTYPE_VERBATIM = 1
+BLOCKTYPE_ALIGNED = 2
+BLOCKTYPE_UNCOMPRESSED = 3
+PRETREE_NUM_ELEMENTS = 20
+ALIGNED_NUM_ELEMENTS = 8  # aligned offset tree #elements
+NUM_PRIMARY_LENGTHS = 7  # this one missing from spec!
+NUM_SECONDARY_LENGTHS = 249  # length tree #elements
+
+# LZX huffman defines: tweak tablebits as desired
+PRETREE_MAXSYMBOLS = LZX_PRETREE_NUM_ELEMENTS
+PRETREE_TABLEBITS = 6
+MAINTREE_MAXSYMBOLS = LZX_NUM_CHARS + 50*8
+MAINTREE_TABLEBITS = 12
+LENGTH_MAXSYMBOLS = LZX_NUM_SECONDARY_LENGTHS+1
+LENGTH_TABLEBITS = 12
+ALIGNED_MAXSYMBOLS = LZX_ALIGNED_NUM_ELEMENTS
+ALIGNED_TABLEBITS = 7
+LENTABLE_SAFETY = 64  # table decoding overruns are allowed
+
+FRAME_SIZE = 32768  # the size of a frame in LZX
+
+
+class BitReader(object):
+    def __init__(self, data):
+        self.data, self.pos, self.nbits = \
+            data + "\x00\x00\x00\x00", 0, len(data) * 8
+        
+    def peek(self, n):
+        r, g = 0, 0
+        while g < n:
+            r = (r << 8) | ord(self.data[(self.pos + g) >> 3])
+            g = g + 8 - ((self.pos + g) & 7)
+        return (r >> (g - n)) & ((1 << n) - 1)
+    
+    def remove(self, n):
+        self.pos += n
+        return self.pos <= self.nbits
+    
+    def left(self):
+        return self.nbits - self.pos
+
+    def read(self, n):
+        val = self.peek(n)
+        self.remove(n)
+        return val
+
+class LzxError(Exception):
+    pass
+
+POSITION_BASE = [0]*51
+EXTRA_BITS = [0]*51
+
+def _static_init():
+    j = 0
+    for i in xrange(0, 51, 2):
+        EXTRA_BITS[i] = j
+        EXTRA_BITS[i + 1] = j
+        if i != 0 or j < 17): j += 1
+    j = 0
+    for i in xrange(0, 51, 1):
+        POSITION_BASE[i] = j
+        j += 1 << extra_bits[i]
+_static_init()
+
+class LzxDecompressor(object):
+    def __init__(self, window_bits, reset_interval=0x7fff):
+        # LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb)
+        if window_bits < 15 or window_bits > 21:
+            raise LzxError("Invalid window size")
+        
+        self.window_size = 1 << window_bits
+        self.window_posn = 0
+        self.frame_posn = 0
+        self.frame = 0
+        self.reset_interval = reset_interval
+        self.intel_filesize = 0
+        self.intel_curpos = 0
+        
+        # window bits:    15  16  17  18  19  20  21
+        # position slots: 30  32  34  36  38  42  50 
+        self.posn_solts = 50 if window_bits == 21 \
+            else 42 if window_bits == 20 else window_bits << 1
+        self.intel_started = 0
+        self.input_end = 0
+
+        # huffman code lengths
+        self.PRETREE_len = [0] * (PRETREE_MAXSYMBOLS + LENTABLE_SAFETY)
+        self.MAINTREE_len = [0] * (MAINTREE_MAXSYMBOLS + LENTABLE_SAFETY)
+        self.LENGTH_len = [0] * (LENGTH_MAXSYMBOLS + LENTABLE_SAFETY)
+        self.ALIGNED_len = [0] * (ALIGNED_MAXSYMBOLS + LENTABLE_SAFETY)
+
+        # huffman decoding tables
+        self.PRETREE_table = \
+            [0] * ((1 << PRETREE_TABLEBITS) + (PRETREE_MAXSYMBOLS * 2))
+        self.MAINTREE_table = \
+            [0] * ((1 << MAINTREE_TABLEBITS) + (MAINTREE_MAXSYMBOLS * 2))
+        self.LENGTH_table = \
+            [0] * ((1 << LENGTH_TABLEBITS) + (LENGTH_MAXSYMBOLS * 2))
+        self.ALIGNED_table = \
+            [0] * ((1 << ALIGNED_TABLEBITS) + (ALIGNED_MAXSYMBOLS * 2))
+
+        self.o_buf = self.i_buf = ''
+        
+        self._reset_state()
+
+    def _reset_state(self):
+        self.R0 = 1
+        self.R1 = 1
+        self.R2 = 1
+        self.header_read = 0
+        self.block_remaining = 0
+        self.block_type = BLOCKTYPE_INVALID
+
+        # initialise tables to 0 (because deltas will be applied to them)
+        for i in xrange(MAINTREE_MAXSYMBOLS): self.MAINTREE_len[i] = 0
+        for i in xrange(LENGTH_MAXSYMBOLS): self.LENGTH_len[i] = 0
+
+    def decompress(self, data, out_bytes):
+        return ''.join(self._decompress(data, out_bytes))
+        
+    def _decompress(self, data, out_bytes):
+        # easy answers
+        if out_bytes < 0:
+            raise LzxError('Negative desired output bytes')
+
+        # Initialize input and output
+        input = BitReader(data)
+        output = []
+        
+        
+        

From 11c6b0a44d6c819634594eb538d3d4feff7632fe Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Fri, 18 Jul 2008 00:15:13 -0400
Subject: [PATCH 06/19] Fixed trailing space issue

---
 src/calibre/ebooks/lrf/html/convert_from.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/lrf/html/convert_from.py b/src/calibre/ebooks/lrf/html/convert_from.py
index 15eede6d6c..17ffd05ee2 100644
--- a/src/calibre/ebooks/lrf/html/convert_from.py
+++ b/src/calibre/ebooks/lrf/html/convert_from.py
@@ -222,6 +222,7 @@ class HTMLConverter(object, LoggingInterface):
         self.memory = []        #: Used to ensure that duplicate CSS unhandled erros are not reported
         self.tops = {}          #: element representing the top of each HTML file in the LRF file
         self.previous_text = '' #: Used to figure out when to lstrip
+        self.stripped_space = ''
         self.preserve_block_style = False #: Used so that <p> tags in <blockquote> elements are handled properly
         self.avoid_page_break = False
         self.current_page = book.create_page()
@@ -864,11 +865,15 @@ class HTMLConverter(object, LoggingInterface):
         
         if collapse_whitespace:
             src = re.sub(r'\s{1,}', ' ', src)
+            if self.stripped_space and len(src) == len(src.lstrip(u' \n\r\t')):
+                src = self.stripped_space + src
+            src, orig = src.rstrip(u' \n\r\t'), src
+            self.stripped_space = orig[len(src):]
             if len(self.previous_text) != len(self.previous_text.rstrip(u' \n\r\t')):
                 src = src.lstrip(u' \n\r\t')
             if len(src):
                 self.previous_text = src
-                append_text(src)    
+                append_text(src)
         else:
             srcs = src.split('\n')
             for src in srcs[:-1]:

From bc6f3ab5de22ca0fdb70369e54c081f01b78e2fa Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Fri, 18 Jul 2008 00:20:01 -0400
Subject: [PATCH 07/19] Reverted incorrect branch change

---
 src/calibre/ebooks/lrf/html/convert_from.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/src/calibre/ebooks/lrf/html/convert_from.py b/src/calibre/ebooks/lrf/html/convert_from.py
index 17ffd05ee2..15eede6d6c 100644
--- a/src/calibre/ebooks/lrf/html/convert_from.py
+++ b/src/calibre/ebooks/lrf/html/convert_from.py
@@ -222,7 +222,6 @@ class HTMLConverter(object, LoggingInterface):
         self.memory = []        #: Used to ensure that duplicate CSS unhandled erros are not reported
         self.tops = {}          #: element representing the top of each HTML file in the LRF file
         self.previous_text = '' #: Used to figure out when to lstrip
-        self.stripped_space = ''
         self.preserve_block_style = False #: Used so that <p> tags in <blockquote> elements are handled properly
         self.avoid_page_break = False
         self.current_page = book.create_page()
@@ -865,15 +864,11 @@ class HTMLConverter(object, LoggingInterface):
         
         if collapse_whitespace:
             src = re.sub(r'\s{1,}', ' ', src)
-            if self.stripped_space and len(src) == len(src.lstrip(u' \n\r\t')):
-                src = self.stripped_space + src
-            src, orig = src.rstrip(u' \n\r\t'), src
-            self.stripped_space = orig[len(src):]
             if len(self.previous_text) != len(self.previous_text.rstrip(u' \n\r\t')):
                 src = src.lstrip(u' \n\r\t')
             if len(src):
                 self.previous_text = src
-                append_text(src)
+                append_text(src)    
         else:
             srcs = src.split('\n')
             for src in srcs[:-1]:

From 1e78860f4f3b414a70cfdc04b0dcb1435fea22f8 Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Fri, 18 Jul 2008 16:34:41 -0400
Subject: [PATCH 08/19] Switched LZX to C extension

---
 src/calibre/ebooks/lit/lzxd.py    |  138 ---
 src/calibre/utils/lzx-setup.py    |    5 +
 src/calibre/utils/lzx/lzx.h       |  169 ++++
 src/calibre/utils/lzx/lzxd.c      |  905 ++++++++++++++++++
 src/calibre/utils/lzx/lzxglue.c   |  172 ++++
 src/calibre/utils/lzx/lzxmodule.c |  206 ++++
 src/calibre/utils/lzx/mspack.h    | 1482 +++++++++++++++++++++++++++++
 src/calibre/utils/lzx/system.h    |   66 ++
 8 files changed, 3005 insertions(+), 138 deletions(-)
 delete mode 100644 src/calibre/ebooks/lit/lzxd.py
 create mode 100644 src/calibre/utils/lzx-setup.py
 create mode 100644 src/calibre/utils/lzx/lzx.h
 create mode 100644 src/calibre/utils/lzx/lzxd.c
 create mode 100644 src/calibre/utils/lzx/lzxglue.c
 create mode 100644 src/calibre/utils/lzx/lzxmodule.c
 create mode 100644 src/calibre/utils/lzx/mspack.h
 create mode 100644 src/calibre/utils/lzx/system.h

diff --git a/src/calibre/ebooks/lit/lzxd.py b/src/calibre/ebooks/lit/lzxd.py
deleted file mode 100644
index a09daf012b..0000000000
--- a/src/calibre/ebooks/lit/lzxd.py
+++ /dev/null
@@ -1,138 +0,0 @@
-import copy
-
-# some constants defined by the LZX specification
-MIN_MATCH = 2
-MAX_MATCH = 257
-NUM_CHARS = 256
-BLOCKTYPE_INVALID = 0  # also blocktypes 4-7 invalid
-BLOCKTYPE_VERBATIM = 1
-BLOCKTYPE_ALIGNED = 2
-BLOCKTYPE_UNCOMPRESSED = 3
-PRETREE_NUM_ELEMENTS = 20
-ALIGNED_NUM_ELEMENTS = 8  # aligned offset tree #elements
-NUM_PRIMARY_LENGTHS = 7  # this one missing from spec!
-NUM_SECONDARY_LENGTHS = 249  # length tree #elements
-
-# LZX huffman defines: tweak tablebits as desired
-PRETREE_MAXSYMBOLS = LZX_PRETREE_NUM_ELEMENTS
-PRETREE_TABLEBITS = 6
-MAINTREE_MAXSYMBOLS = LZX_NUM_CHARS + 50*8
-MAINTREE_TABLEBITS = 12
-LENGTH_MAXSYMBOLS = LZX_NUM_SECONDARY_LENGTHS+1
-LENGTH_TABLEBITS = 12
-ALIGNED_MAXSYMBOLS = LZX_ALIGNED_NUM_ELEMENTS
-ALIGNED_TABLEBITS = 7
-LENTABLE_SAFETY = 64  # table decoding overruns are allowed
-
-FRAME_SIZE = 32768  # the size of a frame in LZX
-
-
-class BitReader(object):
-    def __init__(self, data):
-        self.data, self.pos, self.nbits = \
-            data + "\x00\x00\x00\x00", 0, len(data) * 8
-        
-    def peek(self, n):
-        r, g = 0, 0
-        while g < n:
-            r = (r << 8) | ord(self.data[(self.pos + g) >> 3])
-            g = g + 8 - ((self.pos + g) & 7)
-        return (r >> (g - n)) & ((1 << n) - 1)
-    
-    def remove(self, n):
-        self.pos += n
-        return self.pos <= self.nbits
-    
-    def left(self):
-        return self.nbits - self.pos
-
-    def read(self, n):
-        val = self.peek(n)
-        self.remove(n)
-        return val
-
-class LzxError(Exception):
-    pass
-
-POSITION_BASE = [0]*51
-EXTRA_BITS = [0]*51
-
-def _static_init():
-    j = 0
-    for i in xrange(0, 51, 2):
-        EXTRA_BITS[i] = j
-        EXTRA_BITS[i + 1] = j
-        if i != 0 or j < 17): j += 1
-    j = 0
-    for i in xrange(0, 51, 1):
-        POSITION_BASE[i] = j
-        j += 1 << extra_bits[i]
-_static_init()
-
-class LzxDecompressor(object):
-    def __init__(self, window_bits, reset_interval=0x7fff):
-        # LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb)
-        if window_bits < 15 or window_bits > 21:
-            raise LzxError("Invalid window size")
-        
-        self.window_size = 1 << window_bits
-        self.window_posn = 0
-        self.frame_posn = 0
-        self.frame = 0
-        self.reset_interval = reset_interval
-        self.intel_filesize = 0
-        self.intel_curpos = 0
-        
-        # window bits:    15  16  17  18  19  20  21
-        # position slots: 30  32  34  36  38  42  50 
-        self.posn_solts = 50 if window_bits == 21 \
-            else 42 if window_bits == 20 else window_bits << 1
-        self.intel_started = 0
-        self.input_end = 0
-
-        # huffman code lengths
-        self.PRETREE_len = [0] * (PRETREE_MAXSYMBOLS + LENTABLE_SAFETY)
-        self.MAINTREE_len = [0] * (MAINTREE_MAXSYMBOLS + LENTABLE_SAFETY)
-        self.LENGTH_len = [0] * (LENGTH_MAXSYMBOLS + LENTABLE_SAFETY)
-        self.ALIGNED_len = [0] * (ALIGNED_MAXSYMBOLS + LENTABLE_SAFETY)
-
-        # huffman decoding tables
-        self.PRETREE_table = \
-            [0] * ((1 << PRETREE_TABLEBITS) + (PRETREE_MAXSYMBOLS * 2))
-        self.MAINTREE_table = \
-            [0] * ((1 << MAINTREE_TABLEBITS) + (MAINTREE_MAXSYMBOLS * 2))
-        self.LENGTH_table = \
-            [0] * ((1 << LENGTH_TABLEBITS) + (LENGTH_MAXSYMBOLS * 2))
-        self.ALIGNED_table = \
-            [0] * ((1 << ALIGNED_TABLEBITS) + (ALIGNED_MAXSYMBOLS * 2))
-
-        self.o_buf = self.i_buf = ''
-        
-        self._reset_state()
-
-    def _reset_state(self):
-        self.R0 = 1
-        self.R1 = 1
-        self.R2 = 1
-        self.header_read = 0
-        self.block_remaining = 0
-        self.block_type = BLOCKTYPE_INVALID
-
-        # initialise tables to 0 (because deltas will be applied to them)
-        for i in xrange(MAINTREE_MAXSYMBOLS): self.MAINTREE_len[i] = 0
-        for i in xrange(LENGTH_MAXSYMBOLS): self.LENGTH_len[i] = 0
-
-    def decompress(self, data, out_bytes):
-        return ''.join(self._decompress(data, out_bytes))
-        
-    def _decompress(self, data, out_bytes):
-        # easy answers
-        if out_bytes < 0:
-            raise LzxError('Negative desired output bytes')
-
-        # Initialize input and output
-        input = BitReader(data)
-        output = []
-        
-        
-        
diff --git a/src/calibre/utils/lzx-setup.py b/src/calibre/utils/lzx-setup.py
new file mode 100644
index 0000000000..87e523b9c3
--- /dev/null
+++ b/src/calibre/utils/lzx-setup.py
@@ -0,0 +1,5 @@
+from distutils.core import setup, Extension
+
+setup(name="lzx", version="1.0",
+      ext_modules=[Extension('lzx', sources=['lzx/lzxmodule.c', 'lzx/lzxd.c'],
+                             include_dirs=['lzx'])])
diff --git a/src/calibre/utils/lzx/lzx.h b/src/calibre/utils/lzx/lzx.h
new file mode 100644
index 0000000000..15ae17c0aa
--- /dev/null
+++ b/src/calibre/utils/lzx/lzx.h
@@ -0,0 +1,169 @@
+/* This file is part of libmspack.
+ * (C) 2003-2004 Stuart Caie.
+ *
+ * The LZX method was created by Jonathan Forbes and Tomi Poutanen, adapted
+ * by Microsoft Corporation.
+ *
+ * libmspack is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License (LGPL) version 2.1
+ *
+ * For further details, see the file COPYING.LIB distributed with libmspack
+ */
+
+#include <sys/types.h>
+
+#ifndef MSPACK_LZX_H
+#define MSPACK_LZX_H 1
+
+/* LZX compression / decompression definitions */
+
+/* some constants defined by the LZX specification */
+#define LZX_MIN_MATCH                (2)
+#define LZX_MAX_MATCH                (257)
+#define LZX_NUM_CHARS                (256)
+#define LZX_BLOCKTYPE_INVALID        (0)   /* also blocktypes 4-7 invalid */
+#define LZX_BLOCKTYPE_VERBATIM       (1)
+#define LZX_BLOCKTYPE_ALIGNED        (2)
+#define LZX_BLOCKTYPE_UNCOMPRESSED   (3)
+#define LZX_PRETREE_NUM_ELEMENTS     (20)
+#define LZX_ALIGNED_NUM_ELEMENTS     (8)   /* aligned offset tree #elements */
+#define LZX_NUM_PRIMARY_LENGTHS      (7)   /* this one missing from spec! */
+#define LZX_NUM_SECONDARY_LENGTHS    (249) /* length tree #elements */
+
+/* LZX huffman defines: tweak tablebits as desired */
+#define LZX_PRETREE_MAXSYMBOLS  (LZX_PRETREE_NUM_ELEMENTS)
+#define LZX_PRETREE_TABLEBITS   (6)
+#define LZX_MAINTREE_MAXSYMBOLS (LZX_NUM_CHARS + 50*8)
+#define LZX_MAINTREE_TABLEBITS  (12)
+#define LZX_LENGTH_MAXSYMBOLS   (LZX_NUM_SECONDARY_LENGTHS+1)
+#define LZX_LENGTH_TABLEBITS    (12)
+#define LZX_ALIGNED_MAXSYMBOLS  (LZX_ALIGNED_NUM_ELEMENTS)
+#define LZX_ALIGNED_TABLEBITS   (7)
+#define LZX_LENTABLE_SAFETY (64)  /* table decoding overruns are allowed */
+
+#define LZX_FRAME_SIZE (32768) /* the size of a frame in LZX */
+
+struct lzxd_stream {
+  struct mspack_system *sys;      /* I/O routines                            */
+  struct mspack_file   *input;    /* input file handle                       */
+  struct mspack_file   *output;   /* output file handle                      */
+
+  off_t   offset;                 /* number of bytes actually output         */
+  off_t   length;                 /* overall decompressed length of stream   */
+
+  unsigned char *window;          /* decoding window                         */
+  unsigned int   window_size;     /* window size                             */
+  unsigned int   window_posn;     /* decompression offset within window      */
+  unsigned int   frame_posn;      /* current frame offset within in window   */
+  unsigned int   frame;           /* the number of 32kb frames processed     */
+  unsigned int   reset_interval;  /* which frame do we reset the compressor? */
+
+  unsigned int   R0, R1, R2;      /* for the LRU offset system               */
+  unsigned int   block_length;    /* uncompressed length of this LZX block   */
+  unsigned int   block_remaining; /* uncompressed bytes still left to decode */
+
+  signed int     intel_filesize;  /* magic header value used for transform   */
+  signed int     intel_curpos;    /* current offset in transform space       */
+
+  unsigned char  intel_started;   /* has intel E8 decoding started?          */
+  unsigned char  block_type;      /* type of the current block               */
+  unsigned char  header_read;     /* have we started decoding at all yet?    */
+  unsigned char  posn_slots;      /* how many posn slots in stream?          */
+  unsigned char  input_end;       /* have we reached the end of input?       */
+
+  int error;
+
+  /* I/O buffering */
+  unsigned char *inbuf, *i_ptr, *i_end, *o_ptr, *o_end;
+  unsigned int  bit_buffer, bits_left, inbuf_size;
+
+  /* huffman code lengths */
+  unsigned char PRETREE_len  [LZX_PRETREE_MAXSYMBOLS  + LZX_LENTABLE_SAFETY];
+  unsigned char MAINTREE_len [LZX_MAINTREE_MAXSYMBOLS + LZX_LENTABLE_SAFETY];
+  unsigned char LENGTH_len   [LZX_LENGTH_MAXSYMBOLS   + LZX_LENTABLE_SAFETY];
+  unsigned char ALIGNED_len  [LZX_ALIGNED_MAXSYMBOLS  + LZX_LENTABLE_SAFETY];
+
+  /* huffman decoding tables */
+  unsigned short PRETREE_table [(1 << LZX_PRETREE_TABLEBITS) +
+				(LZX_PRETREE_MAXSYMBOLS * 2)];
+  unsigned short MAINTREE_table[(1 << LZX_MAINTREE_TABLEBITS) +
+				(LZX_MAINTREE_MAXSYMBOLS * 2)];
+  unsigned short LENGTH_table  [(1 << LZX_LENGTH_TABLEBITS) +
+				(LZX_LENGTH_MAXSYMBOLS * 2)];
+  unsigned short ALIGNED_table [(1 << LZX_ALIGNED_TABLEBITS) +
+				(LZX_ALIGNED_MAXSYMBOLS * 2)];
+
+  /* this is used purely for doing the intel E8 transform */
+  unsigned char  e8_buf[LZX_FRAME_SIZE];
+};
+
+/* allocates LZX decompression state for decoding the given stream.
+ *
+ * - returns NULL if window_bits is outwith the range 15 to 21 (inclusive).
+ *
+ * - uses system->alloc() to allocate memory
+ *
+ * - returns NULL if not enough memory
+ *
+ * - window_bits is the size of the LZX window, from 32Kb (15) to 2Mb (21).
+ *
+ * - reset_interval is how often the bitstream is reset, measured in
+ *   multiples of 32Kb bytes output. For CAB LZX streams, this is always 0
+ *   (does not occur).
+ *
+ * - input_buffer_size is how many bytes to use as an input bitstream buffer
+ *
+ * - output_length is the length in bytes of the entirely decompressed
+ *   output stream, if known in advance. It is used to correctly perform
+ *   the Intel E8 transformation, which must stop 6 bytes before the very
+ *   end of the decompressed stream. It is not otherwise used or adhered
+ *   to. If the full decompressed length is known in advance, set it here.
+ *   If it is NOT known, use the value 0, and call lzxd_set_output_length()
+ *   once it is known. If never set, 4 of the final 6 bytes of the output
+ *   stream may be incorrect.
+ */
+extern struct lzxd_stream *lzxd_init(struct mspack_system *system,
+				     struct mspack_file *input,
+				     struct mspack_file *output,
+				     int window_bits,
+				     int reset_interval,
+				     int input_buffer_size,
+				     off_t output_length);
+
+/* see description of output_length in lzxd_init() */
+extern void lzxd_set_output_length(struct lzxd_stream *lzx,
+				   off_t output_length);
+
+/* decompresses, or decompresses more of, an LZX stream.
+ *
+ * - out_bytes of data will be decompressed and the function will return
+ *   with an MSPACK_ERR_OK return code.
+ *
+ * - decompressing will stop as soon as out_bytes is reached. if the true
+ *   amount of bytes decoded spills over that amount, they will be kept for
+ *   a later invocation of lzxd_decompress().
+ *
+ * - the output bytes will be passed to the system->write() function given in
+ *   lzxd_init(), using the output file handle given in lzxd_init(). More
+ *   than one call may be made to system->write().
+ *
+ * - LZX will read input bytes as necessary using the system->read() function
+ *   given in lzxd_init(), using the input file handle given in lzxd_init().
+ *   This will continue until system->read() returns 0 bytes, or an error.
+ *   input streams should convey an "end of input stream" by refusing to
+ *   supply all the bytes that LZX asks for when they reach the end of the
+ *   stream, rather than return an error code.
+ *
+ * - if an error code other than MSPACK_ERR_OK is returned, the stream should
+ *   be considered unusable and lzxd_decompress() should not be called again
+ *   on this stream.
+ */
+extern int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes);
+
+/* frees all state associated with an LZX data stream
+ *
+ * - calls system->free() using the system pointer given in lzxd_init()
+ */
+void lzxd_free(struct lzxd_stream *lzx);
+
+#endif
diff --git a/src/calibre/utils/lzx/lzxd.c b/src/calibre/utils/lzx/lzxd.c
new file mode 100644
index 0000000000..337af441fd
--- /dev/null
+++ b/src/calibre/utils/lzx/lzxd.c
@@ -0,0 +1,905 @@
+/* This file is part of libmspack.
+ * (C) 2003-2004 Stuart Caie.
+ *
+ * The LZX method was created by Jonathan Forbes and Tomi Poutanen, adapted
+ * by Microsoft Corporation.
+ *
+ * libmspack is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License (LGPL) version 2.1
+ *
+ * For further details, see the file COPYING.LIB distributed with libmspack
+ */
+
+/* LZX decompression implementation */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <mspack.h>
+#include <system.h>
+#include <lzx.h>
+
+/* Microsoft's LZX document and their implementation of the
+ * com.ms.util.cab Java package do not concur.
+ *
+ * In the LZX document, there is a table showing the correlation between
+ * window size and the number of position slots. It states that the 1MB
+ * window = 40 slots and the 2MB window = 42 slots. In the implementation,
+ * 1MB = 42 slots, 2MB = 50 slots. The actual calculation is 'find the
+ * first slot whose position base is equal to or more than the required
+ * window size'. This would explain why other tables in the document refer
+ * to 50 slots rather than 42.
+ *
+ * The constant NUM_PRIMARY_LENGTHS used in the decompression pseudocode
+ * is not defined in the specification.
+ *
+ * The LZX document does not state the uncompressed block has an
+ * uncompressed length field. Where does this length field come from, so
+ * we can know how large the block is? The implementation has it as the 24
+ * bits following after the 3 blocktype bits, before the alignment
+ * padding.
+ *
+ * The LZX document states that aligned offset blocks have their aligned
+ * offset huffman tree AFTER the main and length trees. The implementation
+ * suggests that the aligned offset tree is BEFORE the main and length
+ * trees.
+ *
+ * The LZX document decoding algorithm states that, in an aligned offset
+ * block, if an extra_bits value is 1, 2 or 3, then that number of bits
+ * should be read and the result added to the match offset. This is
+ * correct for 1 and 2, but not 3, where just a huffman symbol (using the
+ * aligned tree) should be read.
+ *
+ * Regarding the E8 preprocessing, the LZX document states 'No translation
+ * may be performed on the last 6 bytes of the input block'. This is
+ * correct.  However, the pseudocode provided checks for the *E8 leader*
+ * up to the last 6 bytes. If the leader appears between -10 and -7 bytes
+ * from the end, this would cause the next four bytes to be modified, at
+ * least one of which would be in the last 6 bytes, which is not allowed
+ * according to the spec.
+ *
+ * The specification states that the huffman trees must always contain at
+ * least one element. However, many CAB files contain blocks where the
+ * length tree is completely empty (because there are no matches), and
+ * this is expected to succeed.
+ */
+
+
+/* LZX decompressor input macros
+ *
+ * STORE_BITS        stores bitstream state in lzxd_stream structure
+ * RESTORE_BITS      restores bitstream state from lzxd_stream structure
+ * READ_BITS(var,n)  takes N bits from the buffer and puts them in var
+ * ENSURE_BITS(n)    ensures there are at least N bits in the bit buffer.
+ * PEEK_BITS(n)      extracts without removing N bits from the bit buffer
+ * REMOVE_BITS(n)    removes N bits from the bit buffer
+ *
+ * These bit access routines work by using the area beyond the MSB and the
+ * LSB as a free source of zeroes when shifting. This avoids having to
+ * mask any bits. So we have to know the bit width of the bit buffer
+ * variable.
+ *
+ * The bit buffer datatype should be at least 32 bits wide: it must be
+ * possible to ENSURE_BITS(16), so it must be possible to add 16 new bits
+ * to the bit buffer when the bit buffer already has 1 to 15 bits left.
+ */
+
+#if HAVE_LIMITS_H
+# include <limits.h>
+#endif
+#ifndef CHAR_BIT
+# define CHAR_BIT (8)
+#endif
+#define BITBUF_WIDTH (sizeof(bit_buffer) * CHAR_BIT)
+
+#define STORE_BITS do {                                                 \
+  lzx->i_ptr      = i_ptr;                                              \
+  lzx->i_end      = i_end;                                              \
+  lzx->bit_buffer = bit_buffer;                                         \
+  lzx->bits_left  = bits_left;                                          \
+} while (0)
+
+#define RESTORE_BITS do {                                               \
+  i_ptr      = lzx->i_ptr;                                              \
+  i_end      = lzx->i_end;                                              \
+  bit_buffer = lzx->bit_buffer;                                         \
+  bits_left  = lzx->bits_left;                                          \
+} while (0)
+
+#define ENSURE_BITS(nbits)                                              \
+  while (bits_left < (nbits)) {                                         \
+    if (i_ptr >= i_end) {                                               \
+      if (lzxd_read_input(lzx)) return lzx->error;                      \
+      i_ptr = lzx->i_ptr;                                               \
+      i_end = lzx->i_end;                                               \
+    }                                                                   \
+    bit_buffer |= ((i_ptr[1] << 8) | i_ptr[0])                          \
+                  << (BITBUF_WIDTH - 16 - bits_left);                   \
+    bits_left  += 16;                                                   \
+    i_ptr      += 2;                                                    \
+  }
+
+#define PEEK_BITS(nbits) (bit_buffer >> (BITBUF_WIDTH - (nbits)))
+
+#define REMOVE_BITS(nbits) ((bit_buffer <<= (nbits)), (bits_left -= (nbits)))
+
+#define READ_BITS(val, nbits) do {                                      \
+  ENSURE_BITS(nbits);                                                   \
+  (val) = PEEK_BITS(nbits);                                             \
+  REMOVE_BITS(nbits);                                                   \
+} while (0)
+
+static int lzxd_read_input(struct lzxd_stream *lzx) {
+  int read = lzx->sys->read(lzx->input, &lzx->inbuf[0], (int)lzx->inbuf_size);
+  if (read < 0) return lzx->error = MSPACK_ERR_READ;
+
+  /* huff decode's ENSURE_BYTES(16) might overrun the input stream, even
+   * if those bits aren't used, so fake 2 more bytes */
+  if (read == 0) {
+    if (lzx->input_end) {
+      D(("out of input bytes"))
+      return lzx->error = MSPACK_ERR_READ;
+    }
+    else {
+      read = 2;
+      lzx->inbuf[0] = lzx->inbuf[1] = 0;
+      lzx->input_end = 1;
+    }
+  }
+
+  lzx->i_ptr = &lzx->inbuf[0];
+  lzx->i_end = &lzx->inbuf[read];
+
+  return MSPACK_ERR_OK;
+}
+
+/* Huffman decoding macros */
+
+/* READ_HUFFSYM(tablename, var) decodes one huffman symbol from the
+ * bitstream using the stated table and puts it in var.
+ */
+#define READ_HUFFSYM(tbl, var) do {                                     \
+  /* huffman symbols can be up to 16 bits long */                       \
+  ENSURE_BITS(16);                                                      \
+  /* immediate table lookup of [tablebits] bits of the code */          \
+  sym = lzx->tbl##_table[PEEK_BITS(LZX_##tbl##_TABLEBITS)];             \
+  /* is the symbol is longer than [tablebits] bits? (i=node index) */   \
+  if (sym >= LZX_##tbl##_MAXSYMBOLS) {                                  \
+    /* decode remaining bits by tree traversal */                       \
+    i = 1 << (BITBUF_WIDTH - LZX_##tbl##_TABLEBITS);                    \
+    do {                                                                \
+      /* one less bit. error if we run out of bits before decode */     \
+      i >>= 1;                                                          \
+      if (i == 0) {                                                     \
+        D(("out of bits in huffman decode"))                            \
+        return lzx->error = MSPACK_ERR_DECRUNCH;                        \
+      }                                                                 \
+      /* double node index and add 0 (left branch) or 1 (right) */      \
+      sym <<= 1; sym |= (bit_buffer & i) ? 1 : 0;                       \
+      /* hop to next node index / decoded symbol */                     \
+      sym = lzx->tbl##_table[sym];                                      \
+      /* while we are still in node indicies, not decoded symbols */    \
+    } while (sym >= LZX_##tbl##_MAXSYMBOLS);                            \
+  }                                                                     \
+  /* result */                                                          \
+  (var) = sym;                                                          \
+  /* look up the code length of that symbol and discard those bits */   \
+  i = lzx->tbl##_len[sym];                                              \
+  REMOVE_BITS(i);                                                       \
+} while (0)
+
+/* BUILD_TABLE(tbl) builds a huffman lookup table from code lengths */
+#define BUILD_TABLE(tbl)                                                \
+  if (make_decode_table(LZX_##tbl##_MAXSYMBOLS, LZX_##tbl##_TABLEBITS,  \
+			&lzx->tbl##_len[0], &lzx->tbl##_table[0]))      \
+  {                                                                     \
+    D(("failed to build %s table", #tbl))                               \
+    return lzx->error = MSPACK_ERR_DECRUNCH;                            \
+  }
+
+/* make_decode_table(nsyms, nbits, length[], table[])
+ *
+ * This function was coded by David Tritscher. It builds a fast huffman
+ * decoding table from a canonical huffman code lengths table.
+ *
+ * nsyms  = total number of symbols in this huffman tree.
+ * nbits  = any symbols with a code length of nbits or less can be decoded
+ *          in one lookup of the table.
+ * length = A table to get code lengths from [0 to syms-1]
+ * table  = The table to fill up with decoded symbols and pointers.
+ *
+ * Returns 0 for OK or 1 for error
+ */
+
+static int make_decode_table(unsigned int nsyms, unsigned int nbits,
+			     unsigned char *length, unsigned short *table)
+{
+  register unsigned short sym;
+  register unsigned int leaf, fill;
+  register unsigned char bit_num;
+  unsigned int pos         = 0; /* the current position in the decode table */
+  unsigned int table_mask  = 1 << nbits;
+  unsigned int bit_mask    = table_mask >> 1; /* don't do 0 length codes */
+  unsigned int next_symbol = bit_mask; /* base of allocation for long codes */
+
+  /* fill entries for codes short enough for a direct mapping */
+  for (bit_num = 1; bit_num <= nbits; bit_num++) {
+    for (sym = 0; sym < nsyms; sym++) {
+      if (length[sym] != bit_num) continue;
+      leaf = pos;
+      if((pos += bit_mask) > table_mask) return 1; /* table overrun */
+      /* fill all possible lookups of this symbol with the symbol itself */
+      for (fill = bit_mask; fill-- > 0;) table[leaf++] = sym;
+    }
+    bit_mask >>= 1;
+  }
+
+  /* full table already? */
+  if (pos == table_mask) return 0;
+
+  /* clear the remainder of the table */
+  for (sym = pos; sym < table_mask; sym++) table[sym] = 0xFFFF;
+
+  /* allow codes to be up to nbits+16 long, instead of nbits */
+  pos <<= 16;
+  table_mask <<= 16;
+  bit_mask = 1 << 15;
+
+  for (bit_num = nbits+1; bit_num <= 16; bit_num++) {
+    for (sym = 0; sym < nsyms; sym++) {
+      if (length[sym] != bit_num) continue;
+
+      leaf = pos >> 16;
+      for (fill = 0; fill < bit_num - nbits; fill++) {
+	/* if this path hasn't been taken yet, 'allocate' two entries */
+	if (table[leaf] == 0xFFFF) {
+	  table[(next_symbol << 1)] = 0xFFFF;
+	  table[(next_symbol << 1) + 1] = 0xFFFF;
+	  table[leaf] = next_symbol++;
+	}
+	/* follow the path and select either left or right for next bit */
+	leaf = table[leaf] << 1;
+	if ((pos >> (15-fill)) & 1) leaf++;
+      }
+      table[leaf] = sym;
+
+      if ((pos += bit_mask) > table_mask) return 1; /* table overflow */
+    }
+    bit_mask >>= 1;
+  }
+
+  /* full table? */
+  if (pos == table_mask) return 0;
+
+  /* either erroneous table, or all elements are 0 - let's find out. */
+  for (sym = 0; sym < nsyms; sym++) if (length[sym]) return 1;
+  return 0;
+}
+
+
+/* READ_LENGTHS(tablename, first, last) reads in code lengths for symbols
+ * first to last in the given table. The code lengths are stored in their
+ * own special LZX way.
+ */
+#define READ_LENGTHS(tbl, first, last) do {                            \
+  STORE_BITS;                                                          \
+  if (lzxd_read_lens(lzx, &lzx->tbl##_len[0], (first),                 \
+    (unsigned int)(last))) return lzx->error;                          \
+  RESTORE_BITS;                                                        \
+} while (0)
+
+static int lzxd_read_lens(struct lzxd_stream *lzx, unsigned char *lens,
+			  unsigned int first, unsigned int last)
+{
+  /* bit buffer and huffman symbol decode variables */
+  register unsigned int bit_buffer;
+  register int bits_left, i;
+  register unsigned short sym;
+  unsigned char *i_ptr, *i_end;
+
+  unsigned int x, y;
+  int z;
+
+  RESTORE_BITS;
+  
+  /* read lengths for pretree (20 symbols, lengths stored in fixed 4 bits) */
+  for (x = 0; x < 20; x++) {
+    READ_BITS(y, 4);
+    lzx->PRETREE_len[x] = y;
+  }
+  BUILD_TABLE(PRETREE);
+
+  for (x = first; x < last; ) {
+    READ_HUFFSYM(PRETREE, z);
+    if (z == 17) {
+      /* code = 17, run of ([read 4 bits]+4) zeros */
+      READ_BITS(y, 4); y += 4;
+      while (y--) lens[x++] = 0;
+    }
+    else if (z == 18) {
+      /* code = 18, run of ([read 5 bits]+20) zeros */
+      READ_BITS(y, 5); y += 20;
+      while (y--) lens[x++] = 0;
+    }
+    else if (z == 19) {
+      /* code = 19, run of ([read 1 bit]+4) [read huffman symbol] */
+      READ_BITS(y, 1); y += 4;
+      READ_HUFFSYM(PRETREE, z);
+      z = lens[x] - z; if (z < 0) z += 17;
+      while (y--) lens[x++] = z;
+    }
+    else {
+      /* code = 0 to 16, delta current length entry */
+      z = lens[x] - z; if (z < 0) z += 17;
+      lens[x++] = z;
+    }
+  }
+
+  STORE_BITS;
+
+  return MSPACK_ERR_OK;
+}
+
+/* LZX static data tables:
+ *
+ * LZX uses 'position slots' to represent match offsets.  For every match,
+ * a small 'position slot' number and a small offset from that slot are
+ * encoded instead of one large offset.
+ *
+ * position_base[] is an index to the position slot bases
+ *
+ * extra_bits[] states how many bits of offset-from-base data is needed.
+ */
+static unsigned int  position_base[51];
+static unsigned char extra_bits[51];
+
+static void lzxd_static_init(void) {
+  int i, j;
+
+  for (i = 0, j = 0; i < 51; i += 2) {
+    extra_bits[i]   = j; /* 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7... */
+    extra_bits[i+1] = j;
+    if ((i != 0) && (j < 17)) j++; /* 0,0,1,2,3,4...15,16,17,17,17,17... */
+  }
+
+  for (i = 0, j = 0; i < 51; i++) {
+    position_base[i] = j; /* 0,1,2,3,4,6,8,12,16,24,32,... */
+    j += 1 << extra_bits[i]; /* 1,1,1,1,2,2,4,4,8,8,16,16,32,32,... */
+  }
+}
+
+static void lzxd_reset_state(struct lzxd_stream *lzx) {
+  int i;
+
+  lzx->R0              = 1;
+  lzx->R1              = 1;
+  lzx->R2              = 1;
+  lzx->header_read     = 0;
+  lzx->block_remaining = 0;
+  lzx->block_type      = LZX_BLOCKTYPE_INVALID;
+
+  /* initialise tables to 0 (because deltas will be applied to them) */
+  for (i = 0; i < LZX_MAINTREE_MAXSYMBOLS; i++) lzx->MAINTREE_len[i] = 0;
+  for (i = 0; i < LZX_LENGTH_MAXSYMBOLS; i++)   lzx->LENGTH_len[i]   = 0;
+}
+
+/*-------- main LZX code --------*/
+
+struct lzxd_stream *lzxd_init(struct mspack_system *system,
+			      struct mspack_file *input,
+			      struct mspack_file *output,
+			      int window_bits,
+			      int reset_interval,
+			      int input_buffer_size,
+			      off_t output_length)
+{
+  unsigned int window_size = 1 << window_bits;
+  struct lzxd_stream *lzx;
+
+  if (!system) return NULL;
+
+  /* LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb) */
+  if (window_bits < 15 || window_bits > 21) return NULL;
+
+  input_buffer_size = (input_buffer_size + 1) & -2;
+  if (!input_buffer_size) return NULL;
+
+  /* initialise static data */
+  lzxd_static_init();
+
+  /* allocate decompression state */
+  if (!(lzx = system->alloc(system, sizeof(struct lzxd_stream)))) {
+    return NULL;
+  }
+
+  /* allocate decompression window and input buffer */
+  lzx->window = system->alloc(system, (size_t) window_size);
+  lzx->inbuf  = system->alloc(system, (size_t) input_buffer_size);
+  if (!lzx->window || !lzx->inbuf) {
+    system->free(lzx->window);
+    system->free(lzx->inbuf);
+    system->free(lzx);
+    return NULL;
+  }
+
+  /* initialise decompression state */
+  lzx->sys             = system;
+  lzx->input           = input;
+  lzx->output          = output;
+  lzx->offset          = 0;
+  lzx->length          = output_length;
+
+  lzx->inbuf_size      = input_buffer_size;
+  lzx->window_size     = 1 << window_bits;
+  lzx->window_posn     = 0;
+  lzx->frame_posn      = 0;
+  lzx->frame           = 0;
+  lzx->reset_interval  = reset_interval;
+  lzx->intel_filesize  = 0;
+  lzx->intel_curpos    = 0;
+
+  /* window bits:    15  16  17  18  19  20  21
+   * position slots: 30  32  34  36  38  42  50  */
+  lzx->posn_slots      = ((window_bits == 21) ? 50 :
+			  ((window_bits == 20) ? 42 : (window_bits << 1)));
+  lzx->intel_started   = 0;
+  lzx->input_end       = 0;
+
+  lzx->error = MSPACK_ERR_OK;
+
+  lzx->i_ptr = lzx->i_end = &lzx->inbuf[0];
+  lzx->o_ptr = lzx->o_end = &lzx->e8_buf[0];
+  lzx->bit_buffer = lzx->bits_left = 0;
+
+  lzxd_reset_state(lzx);
+  return lzx;
+}
+
+void lzxd_set_output_length(struct lzxd_stream *lzx, off_t out_bytes) {
+  if (lzx) lzx->length = out_bytes;
+}
+
+int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) {
+  /* bitstream reading and huffman variables */
+  register unsigned int bit_buffer;
+  register int bits_left, i=0;
+  register unsigned short sym;
+  unsigned char *i_ptr, *i_end;
+
+  int match_length, length_footer, extra, verbatim_bits, bytes_todo;
+  int this_run, main_element, aligned_bits, j;
+  unsigned char *window, *runsrc, *rundest, buf[12];
+  unsigned int frame_size=0, end_frame, match_offset, window_posn;
+  unsigned int R0, R1, R2;
+
+  /* easy answers */
+  if (!lzx || (out_bytes < 0)) return MSPACK_ERR_ARGS;
+  if (lzx->error) return lzx->error;
+
+  /* flush out any stored-up bytes before we begin */
+  i = lzx->o_end - lzx->o_ptr;
+  if ((off_t) i > out_bytes) i = (int) out_bytes;
+  if (i) {
+    if (lzx->sys->write(lzx->output, lzx->o_ptr, i) != i) {
+      return lzx->error = MSPACK_ERR_WRITE;
+    }
+    lzx->o_ptr  += i;
+    lzx->offset += i;
+    out_bytes   -= i;
+  }
+  if (out_bytes == 0) return MSPACK_ERR_OK;
+
+  /* restore local state */
+  RESTORE_BITS;
+  window = lzx->window;
+  window_posn = lzx->window_posn;
+  R0 = lzx->R0;
+  R1 = lzx->R1;
+  R2 = lzx->R2;
+
+  end_frame = (unsigned int)((lzx->offset + out_bytes) / LZX_FRAME_SIZE) + 1;
+
+  while (lzx->frame < end_frame) {
+    /* have we reached the reset interval? (if there is one?) */
+    if (lzx->reset_interval && ((lzx->frame % lzx->reset_interval) == 0)) {
+      if (lzx->block_remaining) {
+	D(("%d bytes remaining at reset interval", lzx->block_remaining))
+	return lzx->error = MSPACK_ERR_DECRUNCH;
+      }
+
+      /* re-read the intel header and reset the huffman lengths */
+      lzxd_reset_state(lzx);
+    }
+
+    /* read header if necessary */
+    if (!lzx->header_read) {
+      /* read 1 bit. if bit=0, intel filesize = 0.
+       * if bit=1, read intel filesize (32 bits) */
+      j = 0; READ_BITS(i, 1); if (i) { READ_BITS(i, 16); READ_BITS(j, 16); }
+      lzx->intel_filesize = (i << 16) | j;
+      lzx->header_read = 1;
+    } 
+
+    /* calculate size of frame: all frames are 32k except the final frame
+     * which is 32kb or less. this can only be calculated when lzx->length
+     * has been filled in. */
+    frame_size = LZX_FRAME_SIZE;
+    if (lzx->length && (lzx->length - lzx->offset) < (off_t)frame_size) {
+      frame_size = lzx->length - lzx->offset;
+    }
+
+    /* decode until one more frame is available */
+    bytes_todo = lzx->frame_posn + frame_size - window_posn;
+    while (bytes_todo > 0) {
+      /* initialise new block, if one is needed */
+      if (lzx->block_remaining == 0) {
+	/* realign if previous block was an odd-sized UNCOMPRESSED block */
+	if ((lzx->block_type == LZX_BLOCKTYPE_UNCOMPRESSED) &&
+	    (lzx->block_length & 1))
+	{
+	  if (i_ptr == i_end) {
+	    if (lzxd_read_input(lzx)) return lzx->error;
+	    i_ptr = lzx->i_ptr;
+	    i_end = lzx->i_end;
+	  }
+	  i_ptr++;
+	}
+
+	/* read block type (3 bits) and block length (24 bits) */
+	READ_BITS(lzx->block_type, 3);
+	READ_BITS(i, 16); READ_BITS(j, 8);
+	lzx->block_remaining = lzx->block_length = (i << 8) | j;
+	/*D(("new block t%d len %u", lzx->block_type, lzx->block_length))*/
+
+	/* read individual block headers */
+	switch (lzx->block_type) {
+	case LZX_BLOCKTYPE_ALIGNED:
+	  /* read lengths of and build aligned huffman decoding tree */
+	  for (i = 0; i < 8; i++) { READ_BITS(j, 3); lzx->ALIGNED_len[i] = j; }
+	  BUILD_TABLE(ALIGNED);
+	  /* no break -- rest of aligned header is same as verbatim */
+	case LZX_BLOCKTYPE_VERBATIM:
+	  /* read lengths of and build main huffman decoding tree */
+	  READ_LENGTHS(MAINTREE, 0, 256);
+	  READ_LENGTHS(MAINTREE, 256, LZX_NUM_CHARS + (lzx->posn_slots << 3));
+	  BUILD_TABLE(MAINTREE);
+	  /* if the literal 0xE8 is anywhere in the block... */
+	  if (lzx->MAINTREE_len[0xE8] != 0) lzx->intel_started = 1;
+	  /* read lengths of and build lengths huffman decoding tree */
+	  READ_LENGTHS(LENGTH, 0, LZX_NUM_SECONDARY_LENGTHS);
+	  BUILD_TABLE(LENGTH);
+	  break;
+
+	case LZX_BLOCKTYPE_UNCOMPRESSED:
+	  /* because we can't assume otherwise */
+	  lzx->intel_started = 1;
+
+	  /* read 1-16 (not 0-15) bits to align to bytes */
+	  ENSURE_BITS(16);
+	  if (bits_left > 16) i_ptr -= 2;
+	  bits_left = 0; bit_buffer = 0;
+
+	  /* read 12 bytes of stored R0 / R1 / R2 values */
+	  for (rundest = &buf[0], i = 0; i < 12; i++) {
+	    if (i_ptr == i_end) {
+	      if (lzxd_read_input(lzx)) return lzx->error;
+	      i_ptr = lzx->i_ptr;
+	      i_end = lzx->i_end;
+	    }
+	    *rundest++ = *i_ptr++;
+	  }
+	  R0 = buf[0] | (buf[1] << 8) | (buf[2]  << 16) | (buf[3]  << 24);
+	  R1 = buf[4] | (buf[5] << 8) | (buf[6]  << 16) | (buf[7]  << 24);
+	  R2 = buf[8] | (buf[9] << 8) | (buf[10] << 16) | (buf[11] << 24);
+	  break;
+
+	default:
+	  D(("bad block type"))
+	  return lzx->error = MSPACK_ERR_DECRUNCH;
+	}
+      }
+
+      /* decode more of the block:
+       * run = min(what's available, what's needed) */
+      this_run = lzx->block_remaining;
+      if (this_run > bytes_todo) this_run = bytes_todo;
+
+      /* assume we decode exactly this_run bytes, for now */
+      bytes_todo           -= this_run;
+      lzx->block_remaining -= this_run;
+
+      /* decode at least this_run bytes */
+      switch (lzx->block_type) {
+      case LZX_BLOCKTYPE_VERBATIM:
+	while (this_run > 0) {
+	  READ_HUFFSYM(MAINTREE, main_element);
+	  if (main_element < LZX_NUM_CHARS) {
+	    /* literal: 0 to LZX_NUM_CHARS-1 */
+	    window[window_posn++] = main_element;
+	    this_run--;
+	  }
+	  else {
+	    /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */
+	    main_element -= LZX_NUM_CHARS;
+
+	    /* get match length */
+	    match_length = main_element & LZX_NUM_PRIMARY_LENGTHS;
+	    if (match_length == LZX_NUM_PRIMARY_LENGTHS) {
+	      READ_HUFFSYM(LENGTH, length_footer);
+	      match_length += length_footer;
+	    }
+	    match_length += LZX_MIN_MATCH;
+	  
+	    /* get match offset */
+	    switch ((match_offset = (main_element >> 3))) {
+	    case 0: match_offset = R0;                                  break;
+	    case 1: match_offset = R1; R1=R0;        R0 = match_offset; break;
+	    case 2: match_offset = R2; R2=R0;        R0 = match_offset; break;
+	    case 3: match_offset = 1;  R2=R1; R1=R0; R0 = match_offset; break;
+	    default:
+	      extra = extra_bits[match_offset];
+	      READ_BITS(verbatim_bits, extra);
+	      match_offset = position_base[match_offset] - 2 + verbatim_bits;
+	      R2 = R1; R1 = R0; R0 = match_offset;
+	    }
+
+	    if ((window_posn + match_length) > lzx->window_size) {
+	      D(("match ran over window wrap"))
+	      return lzx->error = MSPACK_ERR_DECRUNCH;
+	    }
+	    
+	    /* copy match */
+	    rundest = &window[window_posn];
+	    i = match_length;
+	    /* does match offset wrap the window? */
+	    if (match_offset > window_posn) {
+	      /* j = length from match offset to end of window */
+	      j = match_offset - window_posn;
+	      if (j > (int) lzx->window_size) {
+		D(("match offset beyond window boundaries"))
+		return lzx->error = MSPACK_ERR_DECRUNCH;
+	      }
+	      runsrc = &window[lzx->window_size - j];
+	      if (j < i) {
+		/* if match goes over the window edge, do two copy runs */
+		i -= j; while (j-- > 0) *rundest++ = *runsrc++;
+		runsrc = window;
+	      }
+	      while (i-- > 0) *rundest++ = *runsrc++;
+	    }
+	    else {
+	      runsrc = rundest - match_offset;
+	      while (i-- > 0) *rundest++ = *runsrc++;
+	    }
+
+	    this_run    -= match_length;
+	    window_posn += match_length;
+	  }
+	} /* while (this_run > 0) */
+	break;
+
+      case LZX_BLOCKTYPE_ALIGNED:
+	while (this_run > 0) {
+	  READ_HUFFSYM(MAINTREE, main_element);
+	  if (main_element < LZX_NUM_CHARS) {
+	    /* literal: 0 to LZX_NUM_CHARS-1 */
+	    window[window_posn++] = main_element;
+	    this_run--;
+	  }
+	  else {
+	    /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */
+	    main_element -= LZX_NUM_CHARS;
+
+	    /* get match length */
+	    match_length = main_element & LZX_NUM_PRIMARY_LENGTHS;
+	    if (match_length == LZX_NUM_PRIMARY_LENGTHS) {
+	      READ_HUFFSYM(LENGTH, length_footer);
+	      match_length += length_footer;
+	    }
+	    match_length += LZX_MIN_MATCH;
+
+	    /* get match offset */
+	    switch ((match_offset = (main_element >> 3))) {
+	    case 0: match_offset = R0;                             break;
+	    case 1: match_offset = R1; R1 = R0; R0 = match_offset; break;
+	    case 2: match_offset = R2; R2 = R0; R0 = match_offset; break;
+	    default:
+	      extra = extra_bits[match_offset];
+	      match_offset = position_base[match_offset] - 2;
+	      if (extra > 3) {
+		/* verbatim and aligned bits */
+		extra -= 3;
+		READ_BITS(verbatim_bits, extra);
+		match_offset += (verbatim_bits << 3);
+		READ_HUFFSYM(ALIGNED, aligned_bits);
+		match_offset += aligned_bits;
+	      }
+	      else if (extra == 3) {
+		/* aligned bits only */
+		READ_HUFFSYM(ALIGNED, aligned_bits);
+		match_offset += aligned_bits;
+	      }
+	      else if (extra > 0) { /* extra==1, extra==2 */
+		/* verbatim bits only */
+		READ_BITS(verbatim_bits, extra);
+		match_offset += verbatim_bits;
+	      }
+	      else /* extra == 0 */ {
+		/* ??? not defined in LZX specification! */
+		match_offset = 1;
+	      }
+	      /* update repeated offset LRU queue */
+	      R2 = R1; R1 = R0; R0 = match_offset;
+	    }
+
+	    if ((window_posn + match_length) > lzx->window_size) {
+	      D(("match ran over window wrap"))
+	      return lzx->error = MSPACK_ERR_DECRUNCH;
+	    }
+
+	    /* copy match */
+	    rundest = &window[window_posn];
+	    i = match_length;
+	    /* does match offset wrap the window? */
+	    if (match_offset > window_posn) {
+	      /* j = length from match offset to end of window */
+	      j = match_offset - window_posn;
+	      if (j > (int) lzx->window_size) {
+		D(("match offset beyond window boundaries"))
+		return lzx->error = MSPACK_ERR_DECRUNCH;
+	      }
+	      runsrc = &window[lzx->window_size - j];
+	      if (j < i) {
+		/* if match goes over the window edge, do two copy runs */
+		i -= j; while (j-- > 0) *rundest++ = *runsrc++;
+		runsrc = window;
+	      }
+	      while (i-- > 0) *rundest++ = *runsrc++;
+	    }
+	    else {
+	      runsrc = rundest - match_offset;
+	      while (i-- > 0) *rundest++ = *runsrc++;
+	    }
+
+	    this_run    -= match_length;
+	    window_posn += match_length;
+	  }
+	} /* while (this_run > 0) */
+	break;
+
+      case LZX_BLOCKTYPE_UNCOMPRESSED:
+	/* as this_run is limited not to wrap a frame, this also means it
+	 * won't wrap the window (as the window is a multiple of 32k) */
+	rundest = &window[window_posn];
+	window_posn += this_run;
+	while (this_run > 0) {
+	  if ((i = i_end - i_ptr)) {
+	    if (i > this_run) i = this_run;
+	    lzx->sys->copy(i_ptr, rundest, (size_t) i);
+	    rundest  += i;
+	    i_ptr    += i;
+	    this_run -= i;
+	  }
+	  else {
+	    if (lzxd_read_input(lzx)) return lzx->error;
+	    i_ptr = lzx->i_ptr;
+	    i_end = lzx->i_end;
+	  }
+	}
+	break;
+
+      default:
+        D(("Default Here."));
+	return lzx->error = MSPACK_ERR_DECRUNCH; /* might as well */
+      }
+
+      /* did the final match overrun our desired this_run length? */
+      if (this_run < 0) {
+	if ((unsigned int)(-this_run) > lzx->block_remaining) {
+	  D(("overrun went past end of block by %d (%d remaining)",
+	     -this_run, lzx->block_remaining ))
+	  return lzx->error = MSPACK_ERR_DECRUNCH;
+	}
+	lzx->block_remaining -= -this_run;
+      }
+    } /* while (bytes_todo > 0) */
+
+    /* streams don't extend over frame boundaries */
+    if ((window_posn - lzx->frame_posn) != frame_size) {
+      D(("decode beyond output frame limits! %d != %d",
+	 window_posn - lzx->frame_posn, frame_size))
+     /* Ignored */
+#if 0
+      	return lzx->error = MSPACK_ERR_DECRUNCH; 
+#endif
+    }
+
+    /* re-align input bitstream */
+    if (bits_left > 0) ENSURE_BITS(16);
+    if (bits_left & 15) REMOVE_BITS(bits_left & 15);
+
+    /* check that we've used all of the previous frame first */
+    if (lzx->o_ptr != lzx->o_end) {
+      D(("%d avail bytes, new %d frame", lzx->o_end-lzx->o_ptr, frame_size))
+      return lzx->error = MSPACK_ERR_DECRUNCH;
+    }
+
+    /* does this intel block _really_ need decoding? */
+    if (lzx->intel_started && lzx->intel_filesize &&
+	(lzx->frame <= 32768) && (frame_size > 10))
+    {
+      unsigned char *data    = &lzx->e8_buf[0];
+      unsigned char *dataend = &lzx->e8_buf[frame_size - 10];
+      signed int curpos      = lzx->intel_curpos;
+      signed int filesize    = lzx->intel_filesize;
+      signed int abs_off, rel_off;
+
+      /* copy e8 block to the e8 buffer and tweak if needed */
+      lzx->o_ptr = data;
+      lzx->sys->copy(&lzx->window[lzx->frame_posn], data, frame_size);
+
+      while (data < dataend) {
+	if (*data++ != 0xE8) { curpos++; continue; }
+	abs_off = data[0] | (data[1]<<8) | (data[2]<<16) | (data[3]<<24);
+	if ((abs_off >= -curpos) && (abs_off < filesize)) {
+	  rel_off = (abs_off >= 0) ? abs_off - curpos : abs_off + filesize;
+	  data[0] = (unsigned char) rel_off;
+	  data[1] = (unsigned char) (rel_off >> 8);
+	  data[2] = (unsigned char) (rel_off >> 16);
+	  data[3] = (unsigned char) (rel_off >> 24);
+	}
+	data += 4;
+	curpos += 5;
+      }
+      lzx->intel_curpos += frame_size;
+    }
+    else {
+      lzx->o_ptr = &lzx->window[lzx->frame_posn];
+      if (lzx->intel_filesize) lzx->intel_curpos += frame_size;
+    }
+    lzx->o_end = &lzx->o_ptr[frame_size];
+
+    /* write a frame */
+    i = (out_bytes < (off_t)frame_size) ? (unsigned int)out_bytes : frame_size;
+    if (lzx->sys->write(lzx->output, lzx->o_ptr, i) != i) {
+      return lzx->error = MSPACK_ERR_WRITE;
+    }
+    lzx->o_ptr  += i;
+    lzx->offset += i;
+    out_bytes   -= i;
+
+    /* advance frame start position */
+    lzx->frame_posn += frame_size;
+    lzx->frame++;
+
+    /* wrap window / frame position pointers */
+    if (window_posn == lzx->window_size)     window_posn = 0;
+    if (lzx->frame_posn == lzx->window_size) lzx->frame_posn = 0;
+
+  } /* while (lzx->frame < end_frame) */
+
+  if (out_bytes) {
+    D(("bytes left to output"))
+    return lzx->error = MSPACK_ERR_DECRUNCH;
+  }
+
+  /* store local state */
+  STORE_BITS;
+  lzx->window_posn = window_posn;
+  lzx->R0 = R0;
+  lzx->R1 = R1;
+  lzx->R2 = R2;
+
+  return MSPACK_ERR_OK;
+}
+
+void lzxd_free(struct lzxd_stream *lzx) {
+  struct mspack_system *sys;
+  if (lzx) {
+    sys = lzx->sys;
+    sys->free(lzx->inbuf);
+    sys->free(lzx->window);
+    sys->free(lzx);
+  }
+}
diff --git a/src/calibre/utils/lzx/lzxglue.c b/src/calibre/utils/lzx/lzxglue.c
new file mode 100644
index 0000000000..7820c68cbf
--- /dev/null
+++ b/src/calibre/utils/lzx/lzxglue.c
@@ -0,0 +1,172 @@
+/*--[lzxglue.c]----------------------------------------------------------------
+ | Copyright (C) 2004 DRS
+ |
+ | This file is part of the "openclit" library for processing .LIT files.
+ |
+ | "Openclit" is free software; you can redistribute it and/or modify
+ | it under the terms of the GNU General Public License as published by
+ | the Free Software Foundation; either version 2 of the License, or
+ | (at your option) any later version.
+ |
+ | This program is distributed in the hope that it will be useful,
+ | but WITHOUT ANY WARRANTY; without even the implied warranty of
+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ | GNU General Public License for more details.
+ |
+ | You should have received a copy of the GNU General Public License
+ | along with this program; if not, write to the Free Software
+ | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ |
+ | The GNU General Public License may also be available at the following
+ | URL: http://www.gnu.org/licenses/gpl.html
+*/
+
+/* This provides a "glue" between Stuart Caie's libmspack library and the
+ * Openclit calls to the earlier LZX library.  
+ * 
+ * This way, I should be able to use the files unmodified.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include "litlib.h"
+#include "mspack.h"
+#include "lzx.h"
+
+typedef struct memory_file
+{
+    unsigned int magic;	/* 0xB5 */
+    void * buffer;
+    int total_bytes;
+    int current_bytes;
+} memory_file;
+
+
+void * glue_alloc(struct mspack_system *this, size_t bytes)
+{
+    void * p;
+    p = (void *)malloc(bytes);
+    if (p == NULL)  {
+        lit_error(ERR_R|ERR_LIBC,"Malloc(%d) failed!", bytes); 
+    }
+    return p;
+}
+
+void glue_free(void * p)
+{
+    free(p);
+}
+
+void glue_copy(void *src, void *dest, size_t bytes)
+{
+    memcpy(dest, src, bytes);
+}
+
+struct mspack_file * glue_open(struct mspack_system *this, char *filename,
+    int mode)
+{
+    lit_error(0,"MSPACK_OPEN unsupported!");
+    return NULL;
+}
+
+void glue_close(struct mspack_file * file) {
+    return;
+}
+
+
+int glue_read(struct mspack_file * file, void * buffer, int bytes)
+{
+    memory_file * mem;
+    int remaining;
+
+    mem = (memory_file *)file;
+    if (mem->magic != 0xB5) return -1;
+  
+    remaining = mem->total_bytes - mem->current_bytes;
+    if (!remaining)  return 0;
+    if (bytes > remaining) bytes = remaining;
+    memcpy(buffer, (unsigned char *)mem->buffer+mem->current_bytes, bytes);
+    mem->current_bytes += bytes;
+    return bytes;
+}
+
+int glue_write(struct mspack_file * file, void * buffer, int bytes)
+{
+    memory_file * mem;
+    int remaining;
+
+    mem = (memory_file *)file;
+    if (mem->magic != 0xB5) return -1;
+  
+    remaining = mem->total_bytes - mem->current_bytes;
+    if (!remaining)  return 0;
+    if (bytes > remaining) { 
+        lit_error(0,"MSPACK_READ tried to write %d bytes, only %d left.",
+            bytes, remaining);
+        bytes = remaining;
+    }
+    memcpy((unsigned char *)mem->buffer+mem->current_bytes, buffer, bytes);
+    mem->current_bytes += bytes;
+    return bytes;
+}
+
+struct mspack_system lzxglue_system = 
+{
+    glue_open, 
+    glue_close,
+    glue_read,   /* Read */
+    glue_write,  /* Write */
+    NULL,   /* Seek */
+    NULL,   /* Tell */
+    NULL,   /* Message */
+    glue_alloc,
+    glue_free,
+    glue_copy,
+    NULL    /* Termination */
+};
+
+int LZXwindow;
+struct lzxd_stream * lzx_stream = NULL;
+
+
+/* Can't really init here,don't know enough */
+int LZXinit(int window) 
+{
+    LZXwindow = window;
+    lzx_stream = NULL;
+
+    return 0;
+}
+
+/* Doesn't exist. Oh well, reinitialize state every time anyway */
+void LZXreset(void)
+{
+    return;
+}
+
+int LZXdecompress(unsigned char *inbuf, unsigned char *outbuf, 
+    unsigned int inlen, unsigned int outlen)
+{
+    int err;
+    memory_file source;
+    memory_file dest;
+
+    source.magic = 0xB5;
+    source.buffer = inbuf;
+    source.current_bytes = 0;
+    source.total_bytes = inlen;
+
+    dest.magic = 0xB5;
+    dest.buffer = outbuf;
+    dest.current_bytes = 0;
+    dest.total_bytes = outlen;
+    
+    lzx_stream = lzxd_init(&lzxglue_system, (struct mspack_file *)&source,
+        (struct mspack_file *)&dest, LZXwindow, 
+        0x7fff /* Never reset, I do it */, 4096, outlen);
+    err = -1;
+    if (lzx_stream) err = lzxd_decompress(lzx_stream, outlen);
+
+    lzxd_free(lzx_stream);
+    lzx_stream = NULL;
+    return err;
+}
diff --git a/src/calibre/utils/lzx/lzxmodule.c b/src/calibre/utils/lzx/lzxmodule.c
new file mode 100644
index 0000000000..44cc91c11d
--- /dev/null
+++ b/src/calibre/utils/lzx/lzxmodule.c
@@ -0,0 +1,206 @@
+#include <Python.h>
+
+#include <mspack.h>
+#include <lzx.h>
+
+static char lzx_doc[] = 
+"Provide basic LZX decompression using the code from libmspack.";
+
+static PyObject *LzxError = NULL;
+
+typedef struct memory_file {
+    unsigned int magic;	/* 0xB5 */
+    void * buffer;
+    int total_bytes;
+    int current_bytes;
+} memory_file;
+
+void *
+glue_alloc(struct mspack_system *this, size_t bytes)
+{
+    void *p = NULL;
+    p = (void *)malloc(bytes);
+    if (p == NULL) {
+        return (void *)PyErr_NoMemory();
+    }
+    return p;
+}
+
+void
+glue_free(void *p)
+{
+    free(p);
+}
+
+void
+glue_copy(void *src, void *dest, size_t bytes)
+{
+    memcpy(dest, src, bytes);
+}
+
+struct mspack_file *
+glue_open(struct mspack_system *this, char *filename, int mode)
+{
+    PyErr_SetString(LzxError, "MSPACK_OPEN unsupported");
+    return NULL;
+}
+
+void
+glue_close(struct mspack_file *file)
+{
+    return;
+}
+
+int
+glue_read(struct mspack_file *file, void * buffer, int bytes)
+{
+    memory_file *mem;
+    int remaining;
+
+    mem = (memory_file *)file;
+    if (mem->magic != 0xB5) return -1;
+  
+    remaining = mem->total_bytes - mem->current_bytes;
+    if (!remaining) return 0;
+    if (bytes > remaining) bytes = remaining;
+    memcpy(buffer, (unsigned char *)mem->buffer + mem->current_bytes, bytes);
+    mem->current_bytes += bytes;
+    
+    return bytes;
+}
+
+int
+glue_write(struct mspack_file * file, void * buffer, int bytes)
+{
+    memory_file *mem;
+    int remaining;
+
+    mem = (memory_file *)file;
+    if (mem->magic != 0xB5) return -1;
+  
+    remaining = mem->total_bytes - mem->current_bytes;
+    if (!remaining)  return 0;
+    if (bytes > remaining) {
+        PyErr_SetString(LzxError,
+            "MSPACK_WRITE tried to write beyond end of buffer");
+        bytes = remaining;
+    }
+    memcpy((unsigned char *)mem->buffer + mem->current_bytes, buffer, bytes);
+    mem->current_bytes += bytes;
+    return bytes;
+}
+
+struct mspack_system lzxglue_system = {
+    glue_open, 
+    glue_close,
+    glue_read,   /* Read */
+    glue_write,  /* Write */
+    NULL,        /* Seek */
+    NULL,        /* Tell */
+    NULL,        /* Message */
+    glue_alloc,
+    glue_free,
+    glue_copy,
+    NULL         /* Termination */
+};
+
+
+int LZXwindow = 0;
+struct lzxd_stream * lzx_stream = NULL;
+
+/* Can't really init here, don't know enough */
+static PyObject *
+init(PyObject *self, PyObject *args)
+{
+    int window = 0;
+
+    if (!PyArg_ParseTuple(args, "i", &window)) {
+        return NULL;
+    }
+    
+    LZXwindow = window;
+    lzx_stream = NULL;
+
+    Py_RETURN_NONE;
+}
+
+/* Doesn't exist.  Oh well, reinitialize state every time anyway */
+static PyObject *
+reset(PyObject *self, PyObject *args)
+{
+    if (!PyArg_ParseTuple(args, "")) {
+        return NULL;
+    }
+
+    Py_RETURN_NONE;
+}
+
+//int LZXdecompress(unsigned char *inbuf, unsigned char *outbuf, 
+//    unsigned int inlen, unsigned int outlen)
+static PyObject *
+decompress(PyObject *self, PyObject *args)
+{
+    unsigned char *inbuf;
+    unsigned char *outbuf;
+    unsigned int inlen;
+    unsigned int outlen;
+    int err;
+    memory_file source;
+    memory_file dest;
+    PyObject *retval = NULL;
+
+    if (!PyArg_ParseTuple(args, "s#I", &inbuf, &inlen, &outlen)) {
+        return NULL;
+    }
+
+    retval = PyString_FromStringAndSize(NULL, outlen);
+    if (retval == NULL) {
+        return NULL;
+    }
+    outbuf = (unsigned char *)PyString_AS_STRING(retval);
+    
+    source.magic = 0xB5;
+    source.buffer = inbuf;
+    source.current_bytes = 0;
+    source.total_bytes = inlen;
+
+    dest.magic = 0xB5;
+    dest.buffer = outbuf;
+    dest.current_bytes = 0;
+    dest.total_bytes = outlen;
+    
+    lzx_stream = lzxd_init(&lzxglue_system, (struct mspack_file *)&source,
+        (struct mspack_file *)&dest, LZXwindow, 
+        0x7fff /* Never reset, I do it */, 4096, outlen);
+    err = -1;
+    if (lzx_stream) err = lzxd_decompress(lzx_stream, outlen);
+
+    lzxd_free(lzx_stream);
+    lzx_stream = NULL;
+
+    if (err != MSPACK_ERR_OK) {
+        Py_DECREF(retval);
+        PyErr_SetString(LzxError, "LZX decompression failed");
+    }
+    
+    return retval;
+}
+
+static PyMethodDef lzx_methods[] = {
+    { "init", &init, METH_VARARGS, "Initialize the LZX decompressor" },
+    { "reset", &reset, METH_VARARGS, "Reset the LZX decompressor" },
+    { "decompress", &decompress, METH_VARARGS, "Run the LZX decompressor" },
+    { NULL, NULL }
+};
+
+PyMODINIT_FUNC
+initlzx(void)
+{
+    PyObject *m;
+
+    m = Py_InitModule3("lzx", lzx_methods, lzx_doc);
+    if (m == NULL) return;
+    LzxError = PyErr_NewException("lzx.LzxError", NULL, NULL);
+    Py_INCREF(LzxError);
+    PyModule_AddObject(m, "LzxError", LzxError);
+}
diff --git a/src/calibre/utils/lzx/mspack.h b/src/calibre/utils/lzx/mspack.h
new file mode 100644
index 0000000000..b48623fed0
--- /dev/null
+++ b/src/calibre/utils/lzx/mspack.h
@@ -0,0 +1,1482 @@
+/* libmspack -- a library for working with Microsoft compression formats.
+ * (C) 2003-2004 Stuart Caie <kyzer@4u.net>
+ *
+ * libmspack is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License (LGPL) version 2.1
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/** \mainpage
+ *
+ * \section intro Introduction
+ *
+ * libmspack is a library which provides compressors and decompressors,
+ * archivers and dearchivers for Microsoft compression formats.
+ *
+ * \section formats Formats supported
+ *
+ * The following file formats are supported:
+ * - SZDD files, which use LZSS compression
+ * - KWAJ files, which use LZSS, LZSS+Huffman or deflate compression
+ * - .HLP (MS Help) files, which use LZSS compression
+ * - .CAB (MS Cabinet) files, which use deflate, LZX or Quantum compression
+ * - .CHM (HTML Help) files, which use LZX compression
+ * - .LIT (MS EBook) files, which use LZX compression and DES encryption
+ *
+ * To determine the capabilities of the library, and the binary
+ * compatibility version of any particular compressor or decompressor, use
+ * the mspack_version() function. The UNIX library interface version is
+ * defined as the highest-versioned library component.
+ *
+ * \section starting Getting started
+ *
+ * The macro MSPACK_SYS_SELFTEST() should be used to ensure the library can
+ * be used. In particular, it checks if the caller is using 32-bit file I/O
+ * when the library is compiled for 64-bit file I/O and vice versa.
+ *
+ * If compiled normally, the library includes basic file I/O and memory
+ * management functionality using the standard C library. This can be
+ * customised and replaced entirely by creating a mspack_system structure.
+ *
+ * A compressor or decompressor for the required format must be
+ * instantiated before it can be used. Each construction function takes
+ * one parameter, which is either a pointer to a custom mspack_system
+ * structure, or NULL to use the default. The instantiation returned, if
+ * not NULL, contains function pointers (methods) to work with the given
+ * file format.
+ * 
+ * For compression:
+ * - mspack_create_cab_compressor() creates a mscab_compressor
+ * - mspack_create_chm_compressor() creates a mschm_compressor
+ * - mspack_create_lit_compressor() creates a mslit_compressor
+ * - mspack_create_hlp_compressor() creates a mshlp_compressor
+ * - mspack_create_szdd_compressor() creates a msszdd_compressor
+ * - mspack_create_kwaj_compressor() creates a mskwaj_compressor
+ *
+ * For decompression:
+ * - mspack_create_cab_decompressor() creates a mscab_decompressor
+ * - mspack_create_chm_decompressor() creates a mschm_decompressor
+ * - mspack_create_lit_decompressor() creates a mslit_decompressor
+ * - mspack_create_hlp_decompressor() creates a mshlp_decompressor
+ * - mspack_create_szdd_decompressor() creates a msszdd_decompressor
+ * - mspack_create_kwaj_decompressor() creates a mskwaj_decompressor
+ *
+ * Once finished working with a format, each kind of
+ * compressor/decompressor has its own specific destructor:
+ * - mspack_destroy_cab_compressor()
+ * - mspack_destroy_cab_decompressor()
+ * - mspack_destroy_chm_compressor()
+ * - mspack_destroy_chm_decompressor()
+ * - mspack_destroy_lit_compressor()
+ * - mspack_destroy_lit_decompressor()
+ * - mspack_destroy_hlp_compressor()
+ * - mspack_destroy_hlp_decompressor()
+ * - mspack_destroy_szdd_compressor()
+ * - mspack_destroy_szdd_decompressor()
+ * - mspack_destroy_kwaj_compressor()
+ * - mspack_destroy_kwaj_decompressor()
+ *
+ * Destroying a compressor or decompressor does not destroy any objects,
+ * structures or handles that have been created using that compressor or
+ * decompressor. Ensure that everything created or opened is destroyed or
+ * closed before compressor/decompressor is itself destroyed.
+ *
+ * \section errors Error codes
+ *
+ * All compressors and decompressors use the same set of error codes. Most
+ * methods return an error code directly. For methods which do not
+ * return error codes directly, the error code can be obtained with the
+ * last_error() method.
+ *
+ * - #MSPACK_ERR_OK is used to indicate success. This error code is defined
+ *   as zero, all other code are non-zero.
+ * - #MSPACK_ERR_ARGS indicates that a method was called with inappropriate
+ *   arguments.
+ * - #MSPACK_ERR_OPEN indicates that mspack_system::open() failed.
+ * - #MSPACK_ERR_READ indicates that mspack_system::read() failed.
+ * - #MSPACK_ERR_WRITE indicates that mspack_system::write() failed.
+ * - #MSPACK_ERR_SEEK indicates that mspack_system::seek() failed.
+ * - #MSPACK_ERR_NOMEMORY indicates that mspack_system::alloc() failed.
+ * - #MSPACK_ERR_SIGNATURE indicates that the file being read does not
+ *   have the correct "signature". It is probably not a valid file for
+ *   whatever format is being read.
+ * - #MSPACK_ERR_DATAFORMAT indicates that the file being used or read
+ *   is corrupt.
+ * - #MSPACK_ERR_CHECKSUM indicates that a data checksum has failed.
+ * - #MSPACK_ERR_CRUNCH indicates an error occured during compression.
+ * - #MSPACK_ERR_DECRUNCH indicates an error occured during decompression.
+ */
+
+#ifndef LIB_MSPACK_H
+#define LIB_MSPACK_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#ifdef _MSC_VER
+#include <stdio.h>
+#else
+#include <unistd.h>
+#endif
+/**
+ * System self-test function, to ensure both library and calling program
+ * can use one another.
+ *
+ * A result of MSPACK_ERR_OK means the library and caller are
+ * compatible. Any other result indicates that the library and caller are
+ * not compatible and should not be used. In particular, a value of
+ * MSPACK_ERR_SEEK means the library and caller use different off_t
+ * datatypes.
+ *
+ * It should be used like so:
+ *
+ * @code
+ * int selftest_result;
+ * MSPACK_SYS_SELFTEST(selftest_result);
+ * if (selftest_result != MSPACK_ERR_OK) {
+ *   fprintf(stderr, "incompatible with this build of libmspack\n");
+ *   exit(0);
+ * }
+ * @endcode
+ *
+ * @param  result   an int variable to store the result of the self-test
+ */
+#define MSPACK_SYS_SELFTEST(result)  do { \
+  (result) = mspack_sys_selftest_internal(sizeof(off_t)); \
+} while (0)
+
+/** Part of the MSPACK_SYS_SELFTEST() macro, must not be used directly. */
+extern int mspack_sys_selftest_internal(int);
+
+/**
+ * Enquire about the binary compatibility version of a specific interface in
+ * the library. Currently, the following interfaces are defined:
+ *
+ * - #MSPACK_VER_LIBRARY: the overall library
+ * - #MSPACK_VER_SYSTEM: the mspack_system interface
+ * - #MSPACK_VER_MSCABD: the mscab_decompressor interface
+ * - #MSPACK_VER_MSCABC: the mscab_compressor interface
+ * - #MSPACK_VER_MSCHMD: the mschm_decompressor interface
+ * - #MSPACK_VER_MSCHMC: the mschm_compressor interface
+ * - #MSPACK_VER_MSLITD: the mslit_decompressor interface
+ * - #MSPACK_VER_MSLITC: the mslit_compressor interface
+ * - #MSPACK_VER_MSHLPD: the mshlp_decompressor interface
+ * - #MSPACK_VER_MSHLPC: the mshlp_compressor interface
+ * - #MSPACK_VER_MSSZDDD: the msszdd_decompressor interface
+ * - #MSPACK_VER_MSSZDDC: the msszdd_compressor interface
+ * - #MSPACK_VER_MSKWAJD: the mskwaj_decompressor interface
+ * - #MSPACK_VER_MSKWAJC: the mskwaj_compressor interface
+ *
+ * The result of the function should be interpreted as follows:
+ * - -1: this interface is completely unknown to the library
+ * - 0: this interface is known, but non-functioning
+ * - 1: this interface has all basic functionality
+ * - 2, 3, ...: this interface has additional functionality, clearly marked
+ *   in the documentation as "version 2", "version 3" and so on.
+ *
+ * @param interface the interface to request current version of
+ * @return the version of the requested interface
+ */
+extern int mspack_version(int interface);
+
+/** Pass to mspack_version() to get the overall library version */
+#define MSPACK_VER_LIBRARY   (0)
+/** Pass to mspack_version() to get the mspack_system version */
+#define MSPACK_VER_SYSTEM    (1)
+/** Pass to mspack_version() to get the mscab_decompressor version */
+#define MSPACK_VER_MSCABD    (2)
+/** Pass to mspack_version() to get the mscab_compressor version */
+#define MSPACK_VER_MSCABC    (3)
+/** Pass to mspack_version() to get the mschm_decompressor version */
+#define MSPACK_VER_MSCHMD    (4)
+/** Pass to mspack_version() to get the mschm_compressor version */
+#define MSPACK_VER_MSCHMC    (5)
+/** Pass to mspack_version() to get the mslit_decompressor version */
+#define MSPACK_VER_MSLITD    (6)
+/** Pass to mspack_version() to get the mslit_compressor version */
+#define MSPACK_VER_MSLITC    (7)
+/** Pass to mspack_version() to get the mshlp_decompressor version */
+#define MSPACK_VER_MSHLPD    (8)
+/** Pass to mspack_version() to get the mshlp_compressor version */
+#define MSPACK_VER_MSHLPC    (9)
+/** Pass to mspack_version() to get the msszdd_decompressor version */
+#define MSPACK_VER_MSSZDDD   (10)
+/** Pass to mspack_version() to get the msszdd_compressor version */
+#define MSPACK_VER_MSSZDDC   (11)
+/** Pass to mspack_version() to get the mskwaj_decompressor version */
+#define MSPACK_VER_MSKWAJD   (12)
+/** Pass to mspack_version() to get the mskwaj_compressor version */
+#define MSPACK_VER_MSKWAJC   (13)
+
+/* --- file I/O abstraction ------------------------------------------------ */
+
+/**
+ * A structure which abstracts file I/O and memory management.
+ *
+ * The library always uses the mspack_system structure for interaction
+ * with the file system and to allocate, free and copy all memory. It also
+ * uses it to send literal messages to the library user.
+ *
+ * When the library is compiled normally, passing NULL to a compressor or
+ * decompressor constructor will result in a default mspack_system being
+ * used, where all methods are implemented with the standard C library.
+ * However, all constructors support being given a custom created
+ * mspack_system structure, with the library user's own methods. This
+ * allows for more abstract interaction, such as reading and writing files
+ * directly to memory, or from a network socket or pipe.
+ *
+ * Implementors of an mspack_system structure should read all
+ * documentation entries for every structure member, and write methods
+ * which conform to those standards.
+ */
+struct mspack_system {
+  /**
+   * Opens a file for reading, writing, appending or updating.
+   *
+   * @param this     a self-referential pointer to the mspack_system
+   *                 structure whose open() method is being called. If
+   *                 this pointer is required by close(), read(), write(),
+   *                 seek() or tell(), it should be stored in the result
+   *                 structure at this time.
+   * @param filename the file to be opened. It is passed directly from the
+   *                 library caller without being modified, so it is up to
+   *                 the caller what this parameter actually represents.
+   * @param mode     one of #MSPACK_SYS_OPEN_READ (open an existing file
+   *                 for reading), #MSPACK_SYS_OPEN_WRITE (open a new file
+   *                 for writing), #MSPACK_SYS_OPEN_UPDATE (open an existing
+   *                 file for reading/writing from the start of the file) or
+   *                 #MSPACK_SYS_OPEN_APPEND (open an existing file for
+   *                 reading/writing from the end of the file)
+   * @return a pointer to a mspack_file structure. This structure officially
+   *         contains no members, its true contents are up to the
+   *         mspack_system implementor. It should contain whatever is needed
+   *         for other mspack_system methods to operate.
+   * @see close(), read(), write(), seek(), tell(), message()
+   */
+  struct mspack_file * (*open)(struct mspack_system *this,
+			       char *filename,
+			       int mode);
+
+  /**
+   * Closes a previously opened file. If any memory was allocated for this
+   * particular file handle, it should be freed at this time.
+   * 
+   * @param file the file to close
+   * @see open()
+   */
+  void (*close)(struct mspack_file *file);
+
+  /**
+   * Reads a given number of bytes from an open file.
+   *
+   * @param file    the file to read from
+   * @param buffer  the location where the read bytes should be stored
+   * @param bytes   the number of bytes to read from the file.
+   * @return the number of bytes successfully read (this can be less than
+   *         the number requested), zero to mark the end of file, or less
+   *         than zero to indicate an error.
+   * @see open(), write()
+   */
+  int (*read)(struct mspack_file *file,
+	      void *buffer,
+	      int bytes);
+
+  /**
+   * Writes a given number of bytes to an open file.
+   *
+   * @param file    the file to write to
+   * @param buffer  the location where the written bytes should be read from
+   * @param bytes   the number of bytes to write to the file.
+   * @return the number of bytes successfully written, this can be less
+   *         than the number requested. Zero or less can indicate an error
+   *         where no bytes at all could be written. All cases where less
+   *         bytes were written than requested are considered by the library
+   *         to be an error.
+   * @see open(), read()
+   */
+  int (*write)(struct mspack_file *file,
+	       void *buffer,
+	       int bytes);
+
+  /**
+   * Seeks to a specific file offset within an open file.
+   *
+   * Sometimes the library needs to know the length of a file. It does
+   * this by seeking to the end of the file with seek(file, 0,
+   * MSPACK_SYS_SEEK_END), then calling tell(). Implementations may want
+   * to make a special case for this.
+   *
+   * Due to the potentially varying 32/64 bit datatype off_t on some
+   * architectures, the #MSPACK_SYS_SELFTEST macro MUST be used before
+   * using the library. If not, the error caused by the library passing an
+   * inappropriate stackframe to seek() is subtle and hard to trace.
+   *
+   * @param file   the file to be seeked
+   * @param offset an offset to seek, measured in bytes
+   * @param mode   one of #MSPACK_SYS_SEEK_START (the offset should be
+   *               measured from the start of the file), #MSPACK_SYS_SEEK_CUR
+   *               (the offset should be measured from the current file offset)
+   *               or #MSPACK_SYS_SEEK_END (the offset should be measured from
+   *               the end of the file)
+   * @return zero for success, non-zero for an error
+   * @see open(), tell()
+   */
+  int (*seek)(struct mspack_file *file,
+	      off_t offset,
+	      int mode);
+
+  /**
+   * Returns the current file position (in bytes) of the given file.
+   *
+   * @param file the file whose file position is wanted
+   * @return the current file position of the file
+   * @see open(), seek()
+   */
+  off_t (*tell)(struct mspack_file *file);
+  
+  /**
+   * Used to send messages from the library to the user.
+   *
+   * Occasionally, the library generates warnings or other messages in
+   * plain english to inform the human user. These are informational only
+   * and can be ignored if not wanted.
+   *
+   * @param file   may be a file handle returned from open() if this message
+   *               pertains to a specific open file, or NULL if not related to
+   *               a specific file.
+   * @param format a printf() style format string. It does NOT include a
+   *               trailing newline.
+   * @see open()
+   */
+  void (*message)(struct mspack_file *file,
+		  char *format,
+		  ...);
+
+  /**
+   * Allocates memory.
+   *
+   * @param this     a self-referential pointer to the mspack_system
+   *                 structure whose alloc() method is being called.
+   * @param bytes    the number of bytes to allocate
+   * @result a pointer to the requested number of bytes, or NULL if
+   *         not enough memory is available
+   * @see free()
+   */
+  void * (*alloc)(struct mspack_system *this,
+		  size_t bytes);
+  
+  /**
+   * Frees memory.
+   * 
+   * @param ptr the memory to be freed.
+   * @see alloc()
+   */
+  void (*free)(void *ptr);
+
+  /**
+   * Copies from one region of memory to another.
+   * 
+   * The regions of memory are guaranteed not to overlap, are usually less
+   * than 256 bytes, and may not be aligned. Please note that the source
+   * parameter comes before the destination parameter, unlike the standard
+   * C function memcpy().
+   *
+   * @param src   the region of memory to copy from
+   * @param dest  the region of memory to copy to
+   * @param bytes the size of the memory region, in bytes
+   */
+  void (*copy)(void *src,
+	       void *dest,
+	       size_t bytes);
+
+  /**
+   * A null pointer to mark the end of mspack_system. It must equal NULL.
+   *
+   * Should the mspack_system structure extend in the future, this NULL
+   * will be seen, rather than have an invalid method pointer called.
+   */
+  void *null_ptr;
+};
+
+/** mspack_system::open() mode: open existing file for reading. */
+#define MSPACK_SYS_OPEN_READ   (0)
+/** mspack_system::open() mode: open new file for writing */
+#define MSPACK_SYS_OPEN_WRITE  (1)
+/** mspack_system::open() mode: open existing file for writing */
+#define MSPACK_SYS_OPEN_UPDATE (2)
+/** mspack_system::open() mode: open existing file for writing */
+#define MSPACK_SYS_OPEN_APPEND (3)
+
+/** mspack_system::seek() mode: seek relative to start of file */
+#define MSPACK_SYS_SEEK_START  (0)
+/** mspack_system::seek() mode: seek relative to current offset */
+#define MSPACK_SYS_SEEK_CUR    (1)
+/** mspack_system::seek() mode: seek relative to end of file */
+#define MSPACK_SYS_SEEK_END    (2)
+
+/** 
+ * A structure which represents an open file handle. The contents of this
+ * structure are determined by the implementation of the
+ * mspack_system::open() method.
+ */
+struct mspack_file {
+  int dummy;
+};
+
+/* --- error codes --------------------------------------------------------- */
+
+/** Error code: no error */
+#define MSPACK_ERR_OK          (0)
+/** Error code: bad arguments to method */
+#define MSPACK_ERR_ARGS        (1)
+/** Error code: error opening file */
+#define MSPACK_ERR_OPEN        (2)
+/** Error code: error reading file */
+#define MSPACK_ERR_READ        (3)
+/** Error code: error writing file */
+#define MSPACK_ERR_WRITE       (4)
+/** Error code: seek error */
+#define MSPACK_ERR_SEEK        (5)
+/** Error code: out of memory */
+#define MSPACK_ERR_NOMEMORY    (6)
+/** Error code: bad "magic id" in file */
+#define MSPACK_ERR_SIGNATURE   (7)
+/** Error code: bad or corrupt file format */
+#define MSPACK_ERR_DATAFORMAT  (8)
+/** Error code: bad checksum or CRC */
+#define MSPACK_ERR_CHECKSUM    (9)
+/** Error code: error during compression */
+#define MSPACK_ERR_CRUNCH      (10)
+/** Error code: error during decompression */
+#define MSPACK_ERR_DECRUNCH    (11)
+
+/* --- functions available in library -------------------------------------- */
+
+/** Creates a new CAB compressor.
+ * @param sys a custom mspack_system structure, or NULL to use the default
+ * @return a #mscab_compressor or NULL
+ */
+extern struct mscab_compressor *
+  mspack_create_cab_compressor(struct mspack_system *sys);
+
+/** Creates a new CAB decompressor.
+ * @param sys a custom mspack_system structure, or NULL to use the default
+ * @return a #mscab_decompressor or NULL
+ */
+extern struct mscab_decompressor *
+  mspack_create_cab_decompressor(struct mspack_system *sys);
+
+/** Destroys an existing CAB compressor.
+ * @param this the #mscab_compressor to destroy
+ */
+extern void mspack_destroy_cab_compressor(struct mscab_compressor *this);
+
+/** Destroys an existing CAB decompressor.
+ * @param this the #mscab_decompressor to destroy
+ */
+extern void mspack_destroy_cab_decompressor(struct mscab_decompressor *this);
+
+
+/** Creates a new CHM compressor.
+ * @param sys a custom mspack_system structure, or NULL to use the default
+ * @return a #mschm_compressor or NULL
+ */
+extern struct mschm_compressor *
+  mspack_create_chm_compressor(struct mspack_system *sys);
+
+/** Creates a new CHM decompressor.
+ * @param sys a custom mspack_system structure, or NULL to use the default
+ * @return a #mschm_decompressor or NULL
+ */
+extern struct mschm_decompressor *
+  mspack_create_chm_decompressor(struct mspack_system *sys);
+
+/** Destroys an existing CHM compressor.
+ * @param this the #mschm_compressor to destroy
+ */
+extern void mspack_destroy_chm_compressor(struct mschm_compressor *this);
+
+/** Destroys an existing CHM decompressor.
+ * @param this the #mschm_decompressor to destroy
+ */
+extern void mspack_destroy_chm_decompressor(struct mschm_decompressor *this);
+
+
+/** Creates a new LIT compressor.
+ * @param sys a custom mspack_system structure, or NULL to use the default
+ * @return a #mslit_compressor or NULL
+ */
+extern struct mslit_compressor *
+  mspack_create_lit_compressor(struct mspack_system *sys);
+
+/** Creates a new LIT decompressor.
+ * @param sys a custom mspack_system structure, or NULL to use the default
+ * @return a #mslit_decompressor or NULL
+ */
+extern struct mslit_decompressor *
+  mspack_create_lit_decompressor(struct mspack_system *sys);
+
+/** Destroys an existing LIT compressor.
+ * @param this the #mslit_compressor to destroy
+ */
+extern void mspack_destroy_lit_compressor(struct mslit_compressor *this);
+
+/** Destroys an existing LIT decompressor.
+ * @param this the #mslit_decompressor to destroy
+ */
+extern void mspack_destroy_lit_decompressor(struct mslit_decompressor *this);
+
+
+/** Creates a new HLP compressor.
+ * @param sys a custom mspack_system structure, or NULL to use the default
+ * @return a #mshlp_compressor or NULL
+ */
+extern struct mshlp_compressor *
+  mspack_create_hlp_compressor(struct mspack_system *sys);
+
+/** Creates a new HLP decompressor.
+ * @param sys a custom mspack_system structure, or NULL to use the default
+ * @return a #mshlp_decompressor or NULL
+ */
+extern struct mshlp_decompressor *
+  mspack_create_hlp_decompressor(struct mspack_system *sys);
+
+/** Destroys an existing hlp compressor.
+ * @param this the #mshlp_compressor to destroy
+ */
+extern void mspack_destroy_hlp_compressor(struct mshlp_compressor *this);
+
+/** Destroys an existing hlp decompressor.
+ * @param this the #mshlp_decompressor to destroy
+ */
+extern void mspack_destroy_hlp_decompressor(struct mshlp_decompressor *this);
+
+
+/** Creates a new SZDD compressor.
+ * @param sys a custom mspack_system structure, or NULL to use the default
+ * @return a #msszdd_compressor or NULL
+ */
+extern struct msszdd_compressor *
+  mspack_create_szdd_compressor(struct mspack_system *sys);
+
+/** Creates a new SZDD decompressor.
+ * @param sys a custom mspack_system structure, or NULL to use the default
+ * @return a #msszdd_decompressor or NULL
+ */
+extern struct msszdd_decompressor *
+  mspack_create_szdd_decompressor(struct mspack_system *sys);
+
+/** Destroys an existing SZDD compressor.
+ * @param this the #msszdd_compressor to destroy
+ */
+extern void mspack_destroy_szdd_compressor(struct msszdd_compressor *this);
+
+/** Destroys an existing SZDD decompressor.
+ * @param this the #msszdd_decompressor to destroy
+ */
+extern void mspack_destroy_szdd_decompressor(struct msszdd_decompressor *this);
+
+
+/** Creates a new KWAJ compressor.
+ * @param sys a custom mspack_system structure, or NULL to use the default
+ * @return a #mskwaj_compressor or NULL
+ */
+extern struct mskwaj_compressor *
+  mspack_create_kwaj_compressor(struct mspack_system *sys);
+
+/** Creates a new KWAJ decompressor.
+ * @param sys a custom mspack_system structure, or NULL to use the default
+ * @return a #mskwaj_decompressor or NULL
+ */
+extern struct mskwaj_decompressor *
+  mspack_create_kwaj_decompressor(struct mspack_system *sys);
+
+/** Destroys an existing KWAJ compressor.
+ * @param this the #mskwaj_compressor to destroy
+ */
+extern void mspack_destroy_kwaj_compressor(struct mskwaj_compressor *this);
+
+/** Destroys an existing KWAJ decompressor.
+ * @param this the #mskwaj_decompressor to destroy
+ */
+extern void mspack_destroy_kwaj_decompressor(struct mskwaj_decompressor *this);
+
+
+/* --- support for .CAB (MS Cabinet) file format --------------------------- */
+
+/**
+ * A structure which represents a single cabinet file.
+ *
+ * All fields are READ ONLY.
+ *
+ * If this cabinet is part of a merged cabinet set, the #files and #folders
+ * fields are common to all cabinets in the set, and will be identical.
+ *
+ * @see mscab_decompressor::open(), mscab_decompressor::close(),
+ *      mscab_decompressor::search()
+ */
+struct mscabd_cabinet {
+  /**
+   * The next cabinet in a chained list, if this cabinet was opened with
+   * mscab_decompressor::search(). May be NULL to mark the end of the
+   * list.
+   */
+  struct mscabd_cabinet *next;
+
+  /**
+   * The filename of the cabinet. More correctly, the filename of the
+   * physical file that the cabinet resides in. This is given by the
+   * library user and may be in any format.
+   */
+  char *filename;
+  
+  /** The file offset of cabinet within the physical file it resides in. */
+  off_t base_offset;
+
+  /** The length of the cabinet file in bytes. */
+  unsigned int length;
+
+  /** The previous cabinet in a cabinet set, or NULL. */
+  struct mscabd_cabinet *prevcab;
+
+  /** The next cabinet in a cabinet set, or NULL. */
+  struct mscabd_cabinet *nextcab;
+
+  /** The filename of the previous cabinet in a cabinet set, or NULL. */
+  char *prevname;
+
+  /** The filename of the next cabinet in a cabinet set, or NULL. */
+  char *nextname;
+
+  /** The name of the disk containing the previous cabinet in a cabinet
+   * set, or NULL.
+   */
+  char *previnfo;
+
+  /** The name of the disk containing the next cabinet in a cabinet set,
+   * or NULL.
+   */
+  char *nextinfo;
+
+  /** A list of all files in the cabinet or cabinet set. */
+  struct mscabd_file *files;
+
+  /** A list of all folders in the cabinet or cabinet set. */
+  struct mscabd_folder *folders;
+
+  /** 
+   * The set ID of the cabinet. All cabinets in the same set should have
+   * the same set ID.
+   */
+  unsigned short set_id;
+
+  /**
+   * The index number of the cabinet within the set. Numbering should
+   * start from 0 for the first cabinet in the set, and increment by 1 for
+   * each following cabinet.
+   */
+  unsigned short set_index;
+
+  /**
+   * The number of bytes reserved in the header area of the cabinet.
+   *
+   * If this is non-zero and flags has MSCAB_HDR_RESV set, this data can
+   * be read by the calling application. It is of the given length,
+   * located at offset (base_offset + MSCAB_HDR_RESV_OFFSET) in the
+   * cabinet file.
+   *
+   * @see flags
+   */
+  unsigned short header_resv;
+
+  /**
+   * Header flags.
+   *
+   * - MSCAB_HDR_PREVCAB indicates the cabinet is part of a cabinet set, and
+   *                     has a predecessor cabinet.
+   * - MSCAB_HDR_NEXTCAB indicates the cabinet is part of a cabinet set, and
+   *                     has a successor cabinet.
+   * - MSCAB_HDR_RESV indicates the cabinet has reserved header space.
+   *
+   * @see prevname, previnfo, nextname, nextinfo, header_resv
+   */
+  int flags;
+};
+
+/** Offset from start of cabinet to the reserved header data (if present). */
+#define MSCAB_HDR_RESV_OFFSET (0x28)
+
+/** Cabinet header flag: cabinet has a predecessor */
+#define MSCAB_HDR_PREVCAB (0x01)
+/** Cabinet header flag: cabinet has a successor */
+#define MSCAB_HDR_NEXTCAB (0x02)
+/** Cabinet header flag: cabinet has reserved header space */
+#define MSCAB_HDR_RESV    (0x04)
+
+/**
+ * A structure which represents a single folder in a cabinet or cabinet set.
+ *
+ * All fields are READ ONLY.
+ *
+ * A folder is a single compressed stream of data. When uncompressed, it
+ * holds the data of one or more files. A folder may be split across more
+ * than one cabinet.
+ */
+struct mscabd_folder {
+  /**
+   * A pointer to the next folder in this cabinet or cabinet set, or NULL
+   * if this is the final folder.
+   */
+  struct mscabd_folder *next;
+
+  /** 
+   * The compression format used by this folder.
+   *
+   * The macro MSCABD_COMP_METHOD() should be used on this field to get
+   * the algorithm used. The macro MSCABD_COMP_LEVEL() should be used to get
+   * the "compression level".
+   *
+   * @see MSCABD_COMP_METHOD(), MSCABD_COMP_LEVEL()
+   */
+  int comp_type;
+
+  /**
+   * The total number of data blocks used by this folder. This includes
+   * data blocks present in other files, if this folder spans more than
+   * one cabinet.
+   */
+  unsigned int num_blocks;
+};
+
+/**
+ * Returns the compression method used by a folder.
+ *
+ * @param comp_type a mscabd_folder::comp_type value
+ * @return one of #MSCAB_COMP_NONE, #MSCAB_COMP_MSZIP, #MSCAB_COMP_QUANTUM
+ *         or #MSCAB_COMP_LZX
+ */
+#define MSCABD_COMP_METHOD(comp_type) ((comp_type) & 0x0F)
+/**
+ * Returns the compression level used by a folder.
+ *
+ * @param comp_type a mscabd_folder::comp_type value
+ * @return the compression level. This is only defined by LZX and Quantum
+ *         compression
+ */
+#define MSCABD_COMP_LEVEL(comp_type) (((comp_type) >> 8) & 0x1F)
+
+/** Compression mode: no compression. */
+#define MSCAB_COMP_NONE       (0)
+/** Compression mode: MSZIP (deflate) compression. */
+#define MSCAB_COMP_MSZIP      (1)
+/** Compression mode: Quantum compression */
+#define MSCAB_COMP_QUANTUM    (2)
+/** Compression mode: LZX compression */
+#define MSCAB_COMP_LZX        (3)
+
+/**
+ * A structure which represents a single file in a cabinet or cabinet set.
+ *
+ * All fields are READ ONLY.
+ */
+struct mscabd_file {
+  /**
+   * The next file in the cabinet or cabinet set, or NULL if this is the
+   * final file.
+   */
+  struct mscabd_file *next;
+
+  /**
+   * The filename of the file.
+   *
+   * A null terminated string of up to 255 bytes in length, it may be in
+   * either ISO-8859-1 or UTF8 format, depending on the file attributes.
+   *
+   * @see attribs
+   */
+  char *filename;
+
+  /** The uncompressed length of the file, in bytes. */
+  unsigned int length;
+
+  /**
+   * File attributes.
+   *
+   * The following attributes are defined:
+   * - #MSCAB_ATTRIB_RDONLY indicates the file is write protected.
+   * - #MSCAB_ATTRIB_HIDDEN indicates the file is hidden.
+   * - #MSCAB_ATTRIB_SYSTEM indicates the file is a operating system file.
+   * - #MSCAB_ATTRIB_ARCH indicates the file is "archived".
+   * - #MSCAB_ATTRIB_EXEC indicates the file is an executable program.
+   * - #MSCAB_ATTRIB_UTF_NAME indicates the filename is in UTF8 format rather
+   *   than ISO-8859-1.
+   */
+  int attribs;
+
+  /** File's last modified time, hour field. */
+  char time_h;
+  /** File's last modified time, minute field. */
+  char time_m;
+  /** File's last modified time, second field. */
+  char time_s;
+
+  /** File's last modified date, day field. */
+  char date_d;
+  /** File's last modified date, month field. */
+  char date_m;
+  /** File's last modified date, year field. */
+  int date_y;
+
+  /** A pointer to the folder that contains this file. */
+  struct mscabd_folder *folder;
+
+  /** The uncompressed offset of this file in its folder. */
+  unsigned int offset;
+};
+
+/** mscabd_file::attribs attribute: file is read-only. */
+#define MSCAB_ATTRIB_RDONLY   (0x01)
+/** mscabd_file::attribs attribute: file is hidden. */
+#define MSCAB_ATTRIB_HIDDEN   (0x02)
+/** mscabd_file::attribs attribute: file is an operating system file. */
+#define MSCAB_ATTRIB_SYSTEM   (0x04)
+/** mscabd_file::attribs attribute: file is "archived". */
+#define MSCAB_ATTRIB_ARCH     (0x20)
+/** mscabd_file::attribs attribute: file is an executable program. */
+#define MSCAB_ATTRIB_EXEC     (0x40)
+/** mscabd_file::attribs attribute: filename is UTF8, not ISO-8859-1. */
+#define MSCAB_ATTRIB_UTF_NAME (0x80)
+
+/** mscab_decompressor::set_param() parameter: search buffer size. */
+#define MSCABD_PARAM_SEARCHBUF (0)
+/** mscab_decompressor::set_param() parameter: repair MS-ZIP streams? */
+#define MSCABD_PARAM_FIXMSZIP  (1)
+/** mscab_decompressor::set_param() parameter: size of decompression buffer */
+#define MSCABD_PARAM_DECOMPBUF (2)
+
+/** TODO */
+struct mscab_compressor {
+  int dummy; 
+};
+
+/**
+ * A decompressor for .CAB (Microsoft Cabinet) files
+ *
+ * All fields are READ ONLY.
+ *
+ * @see mspack_create_cab_decompressor(), mspack_destroy_cab_decompressor()
+ */
+struct mscab_decompressor {
+  /**
+   * Opens a cabinet file and reads its contents.
+   *
+   * If the file opened is a valid cabinet file, all headers will be read
+   * and a mscabd_cabinet structure will be returned, with a full list of
+   * folders and files.
+   *
+   * In the case of an error occuring, NULL is returned and the error code
+   * is available from last_error().
+   *
+   * The filename pointer should be considered "in use" until close() is
+   * called on the cabinet.
+   *
+   * @param  this     a self-referential pointer to the mscab_decompressor
+   *                  instance being called
+   * @param  filename the filename of the cabinet file. This is passed
+   *                  directly to mspack_system::open().
+   * @return a pointer to a mscabd_cabinet structure, or NULL on failure
+   * @see close(), search(), last_error()
+   */
+  struct mscabd_cabinet * (*open) (struct mscab_decompressor *this,
+				   char *filename);
+
+  /**
+   * Closes a previously opened cabinet or cabinet set.
+   *
+   * This closes a cabinet, all cabinets associated with it via the
+   * mscabd_cabinet::next, mscabd_cabinet::prevcab and
+   * mscabd_cabinet::nextcab pointers, and all folders and files. All
+   * memory used by these entities is freed.
+   *
+   * The cabinet pointer is now invalid and cannot be used again. All
+   * mscabd_folder and mscabd_file pointers from that cabinet or cabinet
+   * set are also now invalid, and cannot be used again.
+   *
+   * If the cabinet pointer given was created using search(), it MUST be
+   * the cabinet pointer returned by search() and not one of the later
+   * cabinet pointers further along the mscabd_cabinet::next chain.
+
+   * If extra cabinets have been added using append() or prepend(), these
+   * will all be freed, even if the cabinet pointer given is not the first
+   * cabinet in the set. Do NOT close() more than one cabinet in the set.
+   *
+   * The mscabd_cabinet::filename is not freed by the library, as it is
+   * not allocated by the library. The caller should free this itself if
+   * necessary, before it is lost forever.
+   *
+   * @param  this     a self-referential pointer to the mscab_decompressor
+   *                  instance being called
+   * @param  cab      the cabinet to close
+   * @see open(), search(), append(), prepend()
+   */
+  void (*close)(struct mscab_decompressor *this,
+		struct mscabd_cabinet *cab);
+
+  /**
+   * Searches a regular file for embedded cabinets.
+   *
+   * This opens a normal file with the given filename and will search the
+   * entire file for embedded cabinet files
+   *
+   * If any cabinets are found, the equivalent of open() is called on each
+   * potential cabinet file at the offset it was found. All successfully
+   * open()ed cabinets are kept in a list.
+   *
+   * The first cabinet found will be returned directly as the result of
+   * this method. Any further cabinets found will be chained in a list
+   * using the mscabd_cabinet::next field.
+   *
+   * In the case of an error occuring anywhere other than the simulated
+   * open(), NULL is returned and the error code is available from
+   * last_error().
+   *
+   * If no error occurs, but no cabinets can be found in the file, NULL is
+   * returned and last_error() returns MSPACK_ERR_OK.
+   *
+   * The filename pointer should be considered in use until close() is
+   * called on the cabinet.
+   *
+   * close() should only be called on the result of search(), not on any
+   * subsequent cabinets in the mscabd_cabinet::next chain.
+   *
+   * @param  this     a self-referential pointer to the mscab_decompressor
+   *                  instance being called
+   * @param  filename the filename of the file to search for cabinets. This
+   *                  is passed directly to mspack_system::open().
+   * @return a pointer to a mscabd_cabinet structure, or NULL
+   * @see close(), open(), last_error()
+   */
+  struct mscabd_cabinet * (*search) (struct mscab_decompressor *this,
+				     char *filename);
+
+  /**
+   * Appends one mscabd_cabinet to another, forming or extending a cabinet
+   * set.
+   *
+   * This will attempt to append one cabinet to another such that
+   * <tt>(cab->nextcab == nextcab) && (nextcab->prevcab == cab)</tt> and
+   * any folders split between the two cabinets are merged.
+   *
+   * The cabinets MUST be part of a cabinet set -- a cabinet set is a
+   * cabinet that spans more than one physical cabinet file on disk -- and
+   * must be appropriately matched.
+   *
+   * It can be determined if a cabinet has further parts to load by
+   * examining the mscabd_cabinet::flags field:
+   *
+   * - if <tt>(flags & MSCAB_HDR_PREVCAB)</tt> is non-zero, there is a
+   *   predecessor cabinet to open() and prepend(). Its MS-DOS
+   *   case-insensitive filename is mscabd_cabinet::prevname
+   * - if <tt>(flags & MSCAB_HDR_NEXTCAB)</tt> is non-zero, there is a
+   *   successor cabinet to open() and append(). Its MS-DOS case-insensitive
+   *   filename is mscabd_cabinet::nextname
+   *
+   * If the cabinets do not match, an error code will be returned. Neither
+   * cabinet has been altered, and both should be closed seperately.
+   *
+   * Files and folders in a cabinet set are a single entity. All cabinets
+   * in a set use the same file list, which is updated as cabinets in the
+   * set are added. All pointers to mscabd_folder and mscabd_file
+   * structures in either cabinet must be discarded and re-obtained after
+   * merging.
+   *
+   * @param  this     a self-referential pointer to the mscab_decompressor
+   *                  instance being called
+   * @param  cab      the cabinet which will be appended to,
+   *                  predecessor of nextcab
+   * @param  nextcab  the cabinet which will be appended,
+   *                  successor of cab
+   * @return an error code, or MSPACK_ERR_OK if successful
+   * @see prepend(), open(), close()
+   */
+  int (*append) (struct mscab_decompressor *this,
+		 struct mscabd_cabinet *cab,
+		 struct mscabd_cabinet *nextcab);
+
+  /**
+   * Prepends one mscabd_cabinet to another, forming or extending a
+   * cabinet set.
+   *
+   * This will attempt to prepend one cabinet to another, such that
+   * <tt>(cab->prevcab == prevcab) && (prevcab->nextcab == cab)</tt>. In
+   * all other respects, it is identical to append(). See append() for the
+   * full documentation.
+   *
+   * @param  this     a self-referential pointer to the mscab_decompressor
+   *                  instance being called
+   * @param  cab      the cabinet which will be prepended to,
+   *                  successor of prevcab
+   * @param  prevcab  the cabinet which will be prepended,
+   *                  predecessor of cab
+   * @return an error code, or MSPACK_ERR_OK if successful
+   * @see append(), open(), close()
+   */
+  int (*prepend) (struct mscab_decompressor *this,
+		  struct mscabd_cabinet *cab,
+		  struct mscabd_cabinet *prevcab);
+
+  /**
+   * Extracts a file from a cabinet or cabinet set.
+   *
+   * This extracts a compressed file in a cabinet and writes it to the given
+   * filename.
+   *
+   * The MS-DOS filename of the file, mscabd_file::filename, is NOT USED
+   * by extract(). The caller must examine this MS-DOS filename, copy and
+   * change it as necessary, create directories as necessary, and provide
+   * the correct filename as a parameter, which will be passed unchanged
+   * to the decompressor's mspack_system::open()
+   *
+   * If the file belongs to a split folder in a multi-part cabinet set,
+   * and not enough parts of the cabinet set have been loaded and appended
+   * or prepended, an error will be returned immediately.
+   *
+   * @param  this     a self-referential pointer to the mscab_decompressor
+   *                  instance being called
+   * @param  file     the file to be decompressed
+   * @param  filename the filename of the file being written to
+   * @return an error code, or MSPACK_ERR_OK if successful
+   */
+  int (*extract)(struct mscab_decompressor *this,
+		 struct mscabd_file *file,
+		 char *filename);
+
+  /**
+   * Sets a CAB decompression engine parameter.
+   *
+   * The following parameters are defined:
+   * - #MSCABD_PARAM_SEARCHBUF: How many bytes should be allocated as a
+   *   buffer when using search()? The minimum value is 4.  The default
+   *   value is 32768.
+   * - #MSCABD_PARAM_FIXMSZIP: If non-zero, extract() will ignore bad
+   *   checksums and recover from decompression errors in MS-ZIP
+   *   compressed folders. The default value is 0 (don't recover).
+   * - #MSCABD_PARAM_DECOMPBUF: How many bytes should be used as an input
+   *   bit buffer by decompressors? The minimum value is 4. The default
+   *   value is 4096.
+   *
+   * @param  this     a self-referential pointer to the mscab_decompressor
+   *                  instance being called
+   * @param  param    the parameter to set
+   * @param  value    the value to set the parameter to
+   * @return MSPACK_ERR_OK if all is OK, or MSPACK_ERR_ARGS if there
+   *         is a problem with either parameter or value.
+   * @see search(), extract()
+   */
+  int (*set_param)(struct mscab_decompressor *this,
+		   int param,
+		   int value);
+
+  /**
+   * Returns the error code set by the most recently called method.
+   *
+   * This is useful for open() and search(), which do not return an error
+   * code directly.
+   *
+   * @param  this     a self-referential pointer to the mscab_decompressor
+   *                  instance being called
+   * @return the most recent error code
+   * @see open(), search()
+   */
+  int (*last_error)(struct mscab_decompressor *);
+};
+
+/* --- support for .CHM (HTMLHelp) file format ----------------------------- */
+
+/**
+ * A structure which represents a section of a CHM helpfile.
+ *
+ * All fields are READ ONLY.
+ *
+ * Not used directly, but used as a generic base type for
+ * mschmd_sec_uncompressed and mschmd_sec_mscompressed.
+ */
+struct mschmd_section {
+  /** A pointer to the CHM helpfile that contains this section. */
+  struct mschmd_header *chm;
+
+  /**
+   * The section ID. Either 0 for the uncompressed section
+   * mschmd_sec_uncompressed, or 1 for the LZX compressed section
+   * mschmd_sec_mscompressed. No other section IDs are known.
+   */
+  unsigned int id;
+};
+
+/**
+ * A structure which represents the uncompressed section of a CHM helpfile.
+ * 
+ * All fields are READ ONLY.
+ */
+struct mschmd_sec_uncompressed {
+  /** Generic section data. */
+  struct mschmd_section base;
+
+  /** The file offset of where this section begins in the CHM helpfile. */
+  off_t offset;
+};
+
+/**
+ * A structure which represents the compressed section of a CHM helpfile. 
+ * 
+ * All fields are READ ONLY.
+ */
+struct mschmd_sec_mscompressed {
+  /** Generic section data. */
+  struct mschmd_section base;
+
+  /** A pointer to the meta-file which represents all LZX compressed data. */
+  struct mschmd_file *content;
+
+  /** A pointer to the file which contains the LZX control data. */
+  struct mschmd_file *control;
+
+  /** A pointer to the file which contains the LZX reset table. */
+  struct mschmd_file *rtable;
+};
+
+/**
+ * A structure which represents a CHM helpfile.
+ * 
+ * All fields are READ ONLY.
+ */
+struct mschmd_header {
+  /** The version of the CHM file format used in this file. */
+  unsigned int version;
+
+  /**
+   * The "timestamp" of the CHM helpfile. 
+   *
+   * It is the lower 32 bits of a 64-bit value representing the number of
+   * centiseconds since 1601-01-01 00:00:00 UTC, plus 42. It is not useful
+   * as a timestamp, but it is useful as a semi-unique ID.
+   */
+  unsigned int timestamp;
+
+      
+  /**
+   * The default Language and Country ID (LCID) of the user who ran the
+   * HTMLHelp Compiler. This is not the language of the CHM file itself.
+   */
+  unsigned int language;
+
+  /**
+   * The filename of the CHM helpfile. This is given by the library user
+   * and may be in any format.
+   */
+  char *filename;
+
+  /** The length of the CHM helpfile, in bytes. */
+  off_t length;
+
+  /** A list of all non-system files in the CHM helpfile. */
+  struct mschmd_file *files;
+
+  /**
+   * A list of all system files in the CHM helpfile.
+   *
+   * System files are files which begin with "::". They are meta-files
+   * generated by the CHM creation process.
+   */
+  struct mschmd_file *sysfiles;
+
+  /** The section 0 (uncompressed) data in this CHM helpfile. */
+  struct mschmd_sec_uncompressed sec0;
+
+  /** The section 1 (MSCompressed) data in this CHM helpfile. */
+  struct mschmd_sec_mscompressed sec1;
+
+  /** The file offset of the first PMGL/PMGI directory chunk. */
+  off_t dir_offset;
+
+  /** The number of PMGL/PMGI directory chunks in this CHM helpfile. */
+  unsigned int num_chunks;
+
+  /** The size of each PMGL/PMGI chunk, in bytes. */
+  unsigned int chunk_size;
+
+  /** The "density" of the quick-reference section in PMGL/PMGI chunks. */
+  unsigned int density;
+
+  /** The depth of the index tree.
+   *
+   * - if 1, there are no PMGI chunks, only PMGL chunks.
+   * - if 2, there is 1 PMGI chunk. All chunk indices point to PMGL chunks.
+   * - if 3, the root PMGI chunk points to secondary PMGI chunks, which in
+   *         turn point to PMGL chunks.
+   * - and so on...
+   */
+  unsigned int depth;
+
+  /**
+   * The number of the root PGMI chunk.
+   *
+   * If there is no index in the CHM helpfile, this will be 0xFFFFFFFF.
+   */
+  unsigned int index_root;
+};
+
+/**
+ * A structure which represents a file stored in a CHM helpfile.
+ * 
+ * All fields are READ ONLY.
+ */
+struct mschmd_file {
+  /**
+   * A pointer to the next file in the list, or NULL if this is the final
+   * file.
+   */
+  struct mschmd_file *next;
+
+  /**
+   * A pointer to the section that this file is located in. Indirectly,
+   * it also points to the CHM helpfile the file is located in.
+   */
+  struct mschmd_section *section;
+
+  /** The offset within the section data that this file is located at. */
+  off_t offset;
+
+  /** The length of this file, in bytes */
+  off_t length;
+
+  /** The filename of this file -- a null terminated string in UTF8. */
+  char *filename;
+};
+
+/** TODO */
+struct mschm_compressor {
+  int dummy;
+};
+
+/**
+ * A decompressor for .CHM (Microsoft HTMLHelp) files
+ *
+ * All fields are READ ONLY.
+ *
+ * @see mspack_create_chm_decompressor(), mspack_destroy_chm_decompressor()
+ */
+struct mschm_decompressor {
+  /**
+   * Opens a CHM helpfile and reads its contents.
+   *
+   * If the file opened is a valid CHM helpfile, all headers will be read
+   * and a mschmd_header structure will be returned, with a full list of
+   * files.
+   *
+   * In the case of an error occuring, NULL is returned and the error code
+   * is available from last_error().
+   *
+   * The filename pointer should be considered "in use" until close() is
+   * called on the CHM helpfile.
+   *
+   * @param  this     a self-referential pointer to the mschm_decompressor
+   *                  instance being called
+   * @param  filename the filename of the CHM helpfile. This is passed
+   *                  directly to mspack_system::open().
+   * @return a pointer to a mschmd_header structure, or NULL on failure
+   * @see close()
+   */
+  struct mschmd_header *(*open)(struct mschm_decompressor *this,
+				char *filename);
+
+  /**
+   * Closes a previously opened CHM helpfile.
+   *
+   * This closes a CHM helpfile, frees the mschmd_header and all
+   * mschmd_file structures associated with it (if any). This works on
+   * both helpfiles opened with open() and helpfiles opened with
+   * fast_open().
+   *
+   * The CHM header pointer is now invalid and cannot be used again. All
+   * mschmd_file pointers referencing that CHM are also now invalid, and
+   * cannot be used again.
+   *
+   * @param  this     a self-referential pointer to the mschm_decompressor
+   *                  instance being called
+   * @param  chm      the CHM helpfile to close
+   * @see open(), fast_open()
+   */
+  void (*close)(struct mschm_decompressor *this,
+		struct mschmd_header *chm);
+
+  /**
+   * Extracts a file from a CHM helpfile.
+   *
+   * This extracts a file from a CHM helpfile and writes it to the given
+   * filename. The filename of the file, mscabd_file::filename, is not
+   * used by extract(), but can be used by the caller as a guide for
+   * constructing an appropriate filename.
+   *
+   * This method works both with files found in the mschmd_header::files
+   * and mschmd_header::sysfiles list and mschmd_file structures generated
+   * on the fly by fast_find().
+   *
+   * @param  this     a self-referential pointer to the mscab_decompressor
+   *                  instance being called
+   * @param  file     the file to be decompressed
+   * @param  filename the filename of the file being written to
+   * @return an error code, or MSPACK_ERR_OK if successful
+   */
+  int (*extract)(struct mschm_decompressor *this,
+		 struct mschmd_file *file,
+		 char *filename);
+
+  /**
+   * Returns the error code set by the most recently called method.
+   *
+   * This is useful for open() and fast_open(), which do not return an
+   * error code directly.
+   *
+   * @param  this     a self-referential pointer to the mschm_decompressor
+   *                  instance being called
+   * @return the most recent error code
+   * @see open(), search()
+   */
+  int (*last_error)(struct mschm_decompressor *this);
+
+  /**
+   * Opens a CHM helpfile quickly.
+   *
+   * If the file opened is a valid CHM helpfile, only essential headers
+   * will be read. A mschmd_header structure will be still be returned, as
+   * with open(), but the mschmd_header::files field will be NULL. No
+   * files details will be automatically read.  The fast_find() method
+   * must be used to obtain file details.
+   *
+   * In the case of an error occuring, NULL is returned and the error code
+   * is available from last_error().
+   *
+   * The filename pointer should be considered "in use" until close() is
+   * called on the CHM helpfile.
+   *
+   * @param  this     a self-referential pointer to the mschm_decompressor
+   *                  instance being called
+   * @param  filename the filename of the CHM helpfile. This is passed
+   *                  directly to mspack_system::open().
+   * @return a pointer to a mschmd_header structure, or NULL on failure
+   * @see open(), close(), fast_find(), extract()
+   */
+  struct mschmd_header *(*fast_open)(struct mschm_decompressor *this,
+				     char *filename);
+
+  /**
+   * Finds file details quickly.
+   *
+   * Instead of reading all CHM helpfile headers and building a list of
+   * files, fast_open() and fast_find() are intended for finding file
+   * details only when they are needed. The CHM file format includes an
+   * on-disk file index to allow this.
+   *
+   * Given a case-sensitive filename, fast_find() will search the on-disk
+   * index for that file.
+   *
+   * If the file was found, the caller-provided mschmd_file structure will
+   * be filled out like so:
+   * - section: the correct value for the found file
+   * - offset: the correct value for the found file
+   * - length: the correct value for the found file
+   * - all other structure elements: NULL or 0
+   *
+   * If the file was not found, MSPACK_ERR_OK will still be returned as the
+   * result, but the caller-provided structure will be filled out like so:
+   * - section: NULL
+   * - offset: 0
+   * - length: 0
+   * - all other structure elements: NULL or 0
+   *
+   * This method is intended to be used in conjunction with CHM helpfiles
+   * opened with fast_open(), but it also works with helpfiles opened
+   * using the regular open().
+   *
+   * @param  this     a self-referential pointer to the mschm_decompressor
+   *                  instance being called
+   * @param  chm      the CHM helpfile to search for the file
+   * @param  filename the filename of the file to search for
+   * @param  f_ptr    a pointer to a caller-provded mschmd_file structure
+   * @param  f_size   <tt>sizeof(struct mschmd_file)</tt>
+   * @return MSPACK_ERR_OK, or an error code
+   * @see open(), close(), fast_find(), extract()
+   */
+  int (*fast_find)(struct mschm_decompressor *this,
+		   struct mschmd_header *chm,
+		   char *filename,
+		   struct mschmd_file *f_ptr,
+		   int f_size);
+};
+
+/* --- support for .LIT (EBook) file format -------------------------------- */
+
+/** TODO */
+struct mslit_compressor {
+  int dummy; 
+};
+
+/** TODO */
+struct mslit_decompressor {
+  int dummy; 
+};
+
+
+/* --- support for .HLP (MS Help) file format ------------------------------ */
+
+/** TODO */
+struct mshlp_compressor {
+  int dummy; 
+};
+
+/** TODO */
+struct mshlp_decompressor {
+  int dummy; 
+};
+
+
+/* --- support for SZDD file format ---------------------------------------- */
+
+/** TODO */
+struct msszdd_compressor {
+  int dummy; 
+};
+
+/** TODO */
+struct msszdd_decompressor {
+  int dummy; 
+};
+
+/* --- support for KWAJ file format ---------------------------------------- */
+
+/** TODO */
+struct mskwaj_compressor {
+  int dummy; 
+};
+
+/** TODO */
+struct mskwaj_decompressor {
+  int dummy; 
+};
+
+#ifdef __cplusplus
+};
+#endif
+
+#endif
diff --git a/src/calibre/utils/lzx/system.h b/src/calibre/utils/lzx/system.h
new file mode 100644
index 0000000000..acc7d23f56
--- /dev/null
+++ b/src/calibre/utils/lzx/system.h
@@ -0,0 +1,66 @@
+/* This file is part of libmspack.
+ * (C) 2003-2004 Stuart Caie.
+ *
+ * libmspack is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License (LGPL) version 2.1
+ *
+ * For further details, see the file COPYING.LIB distributed with libmspack
+ */
+
+#ifndef MSPACK_SYSTEM_H
+#define MSPACK_SYSTEM_H 1
+
+#ifdef _MSC_VER
+#define inline
+#endif
+
+#ifdef DEBUG
+# include <stdio.h>
+# define D(x) do { printf("%s:%d (%s) ",__FILE__, __LINE__, __FUNCTION__); \
+                   printf x ; fputc('\n', stdout); fflush(stdout);} while (0);
+#else
+# define D(x)
+#endif
+
+/* endian-neutral reading of little-endian data */
+#define __egi32(a,n) ( (((a)[n+3]) << 24) | (((a)[n+2]) << 16) | \
+		       (((a)[n+1]) <<  8) |  ((a)[n+0])        )
+#define EndGetI64(a) ((((unsigned long long int) __egi32(a,4)) << 32) | \
+		      ((unsigned int) __egi32(a,0)))
+#define EndGetI32(a) __egi32(a,0)
+#define EndGetI16(a) ((((a)[1])<<8)|((a)[0]))
+
+/* endian-neutral reading of big-endian data */
+#define EndGetM32(a) ((((a)[0])<<24)|(((a)[1])<<16)|(((a)[2])<<8)|((a)[3]))
+#define EndGetM16(a) ((((a)[0])<<8)|((a)[1]))
+
+extern struct mspack_system *mspack_default_system;
+
+/* returns the length of a file opened for reading */
+extern int mspack_sys_filelen(struct mspack_system *system,
+			      struct mspack_file *file, off_t *length);
+
+/* validates a system structure */
+extern int mspack_valid_system(struct mspack_system *sys);
+
+/* Can't redfine intrinsics in Microsoft Visual C */
+#ifndef _MSC_VER
+
+/* inline memcmp() */
+static inline int memcmp(const void *s1, const void *s2, size_t n) {
+  unsigned char *c1 = (unsigned char *) s1;
+  unsigned char *c2 = (unsigned char *) s2;
+  if (n == 0) return 0;
+  while (--n && (*c1 == *c2)) c1++, c2++;
+  return *c1 - *c2;
+}
+
+/* inline strlen() */
+static inline size_t strlen(const char *s) {
+  const char *e = s;
+  while (*e) e++;
+  return e - s;
+}
+#endif
+            
+#endif

From 1367ba58f3dba20a1221888af2e3912320db6a0f Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Fri, 18 Jul 2008 18:03:28 -0400
Subject: [PATCH 09/19] Section decompression working

---
 src/calibre/ebooks/lit/reader.py  | 102 ++++++++++++++----
 src/calibre/utils/lzx/lzxglue.c   | 172 ------------------------------
 src/calibre/utils/lzx/lzxmodule.c |   7 +-
 3 files changed, 90 insertions(+), 191 deletions(-)
 delete mode 100644 src/calibre/utils/lzx/lzxglue.c

diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py
index 2608d63399..9963e14bf2 100644
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@@ -15,13 +15,14 @@ from calibre.ebooks.lit import LitError
 from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
 import calibre.ebooks.lit.mssha1 as mssha1
 import calibre.ebooks.lit.msdes as msdes
+import calibre.utils.lzx as lzx
 
 OPF_DECL = """"<?xml version="1.0" encoding="UTF-8" ?>
 <!DOCTYPE package 
   PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN"
   "http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd">
 """
-XHTML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
+HTML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
 <!DOCTYPE html PUBLIC
  "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Document//EN"
  "http://openebook.org/dtds/oeb-1.0.1/oebdoc101.dtd">
@@ -30,6 +31,14 @@ XHTML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
 DESENCRYPT_GUID = "{67F6E4A2-60BF-11D3-8540-00C04F58C3CF}"
 LZXCOMPRESS_GUID = "{0A9007C6-4076-11D3-8789-0000F8105754}"
 
+LZXC_TAG = 0x43585a4c
+CONTROL_TAG = 4
+CONTROL_WINDOW_SIZE = 12
+RESET_NENTRIES = 4
+RESET_HDRLEN = 12
+RESET_UCLENGTH = 16
+RESET_INTERVAL = 32
+
 def u32(bytes):
     return struct.unpack('<L', bytes[:4])[0]
 
@@ -114,10 +123,7 @@ class UnBinary(object):
             offset += 4
     
     def item_path(self, internal_id):
-        for i in self.manifest:
-            if i == internal_id:
-                return i.path
-        raise LitError('Could not find item %s'%(internal_id,))
+        return self.manifest.get(internal_id, internal_id)
     
     def __unicode__(self):
         return self.raw
@@ -555,7 +561,7 @@ class LitFile(object):
             pos += size
 
     def read_manifest(self, entry):
-        self.manifest = []
+        self.manifest = {}
         raw = self._read_content(entry.offset, entry.size)
         pos = 0
         while pos < len(raw):
@@ -593,14 +599,14 @@ class LitFile(object):
                     mime_type = raw[pos:pos+slen].decode('utf8')
                     pos += slen + 1
                     
-                    self.manifest.append(
+                    self.manifest[internal] = \
                         ManifestItem(original, internal, mime_type,
-                                     offset, root, state))
+                                     offset, root, state)
                     i += 1
 
     def read_meta(self, entry):
         raw = self._read_content(entry.offset, entry.size)
-        xml = OPF_DECL + unicode(UnBinary(raw, self.manifest))
+        xml = OPF_DECL + unicode(UnBinary(raw, self.manifest, OPF_MAP))
         self.meta = xml
 
     def read_drm(self):
@@ -643,6 +649,13 @@ class LitFile(object):
         for i in xrange(0, len(digest)):
             key[i % 8] ^= ord(digest[i])
         return ''.join(chr(x) for x in key)
+
+    def get_markup_file(self, name):
+        raw = self.get_file(name)
+        decl, map = (OPF_DECL, OPF_MAP) \
+            if name == '/meta' else (HTML_DECL, HTML_MAP)
+        xml = decl + unicode(UnBinary(raw, self.manifest, map))
+        return xml
         
     def get_file(self, name):
         entry = self.entries[name]
@@ -664,20 +677,20 @@ class LitFile(object):
         transform = self.get_file(path + '/Transform/List')
         content = self.get_file(path + '/Content')
         control = self.get_file(path + '/ControlData')
-        idx_transform = idx_control = 0
-        while (len(transform) - idx_transform) >= 16:
-            ndwords = int32(control[idx_control:]) + 1
-            if (idx_control + (ndwords * 4)) > len(control) or ndwords <= 0:
+        while len(transform) >= 16:
+            csize = (int32(control) + 1) * 4
+            if csize > len(control) or csize <= 0:
                 raise LitError("ControlData is too short")
-            guid = msguid(transform[idx_transform:])
+            guid = msguid(transform)
             if guid == DESENCRYPT_GUID:
                 content = self._decrypt(content)
-                idx_control += ndwords * 4
+                control = control[csize:]
             elif guid == LZXCOMPRESS_GUID:
-                raise LitError("LZX decompression not implemented")
+                content = self._decompress_section(name, control, content)
+                control = control[csize:]
             else:
                 raise LitError("Unrecognized transform: %s." % repr(guid))
-            idx_transform += 16
+            transform = transform[16:]
         return content
 
     def _decrypt(self, content):
@@ -685,6 +698,59 @@ class LitFile(object):
             raise LitError('Cannot extract content from a DRM protected ebook')
         return msdes.new(self.bookkey).decrypt(content)
 
+    def _decompress_section(self, name, control, content):
+        if len(control) < 32 or u32(control[CONTROL_TAG:]) != LZXC_TAG:
+            raise LitError("Invalid ControlData tag value")
+        result = []
+        
+        window_size = 14
+        u = u32(control[CONTROL_WINDOW_SIZE:])
+        while u > 0:
+            u >>= 1
+            window_size += 1
+        if window_size < 15 or window_size > 21:
+            raise LitError("Invalid window in ControlData")
+        lzx.init(window_size)
+
+        reset_table = self.get_file('/'.join(
+                ['::DataSpace/Storage', name, 'Transform',
+                 LZXCOMPRESS_GUID, 'InstanceData/ResetTable']))
+        if len(reset_table) < (RESET_INTERVAL + 8):
+            raise LitError("Reset table is too short")
+        if u32(reset_table[RESET_UCLENGTH + 4:]) != 0:
+            raise LitError("Reset table has 64bit value for UCLENGTH")
+        ofs_entry = int32(reset_table[RESET_HDRLEN:]) + 8
+        uclength = int32(reset_table[RESET_UCLENGTH:])
+        accum = int32(reset_table[RESET_INTERVAL:])
+        bytes_remaining = uclength
+        window_bytes = (1 << window_size)
+        base = 0
+
+        while ofs_entry < len(reset_table):
+            if accum >= window_bytes:
+                accum = 0
+                size = int32(reset_table[ofs_entry:])
+                u = int32(reset_table[ofs_entry + 4:])
+                if u != 0:
+                    raise LitError("Reset table entry greater than 32 bits")
+                if size >= (len(content) + base):
+                    raise("Reset table entry out of bounds")
+                if bytes_remaining >= window_bytes:
+                    lzx.reset()
+                    result.append(lzx.decompress(content, window_bytes))
+                    bytes_remaining -= window_bytes
+                    content = content[size - base:]
+                    base = size
+            accum += int32(reset_table[RESET_INTERVAL:])
+            ofs_entry += 8
+        if bytes_remaining < window_bytes and bytes_remaining > 0:
+            lzx.reset()
+            result.append(lzx.decompress(content, bytes_remaining))
+            bytes_remaining = 0
+        if bytes_remaining > 0:
+            raise LitError("Failed to completely decompress section")
+        return ''.join(result)                    
+    
 def get_metadata(stream):
     try:
         litfile = LitFile(stream)
@@ -693,7 +759,7 @@ def get_metadata(stream):
         cover_url, cover_item = mi.cover, None
         if cover_url:
             cover_url = relpath(cover_url, os.getcwd())
-            for item in litfile.manifest:
+            for item in litfile.manifest.values():
                 if item.path == cover_url:
                     cover_item = item.internal
         if cover_item is not None:
diff --git a/src/calibre/utils/lzx/lzxglue.c b/src/calibre/utils/lzx/lzxglue.c
deleted file mode 100644
index 7820c68cbf..0000000000
--- a/src/calibre/utils/lzx/lzxglue.c
+++ /dev/null
@@ -1,172 +0,0 @@
-/*--[lzxglue.c]----------------------------------------------------------------
- | Copyright (C) 2004 DRS
- |
- | This file is part of the "openclit" library for processing .LIT files.
- |
- | "Openclit" is free software; you can redistribute it and/or modify
- | it under the terms of the GNU General Public License as published by
- | the Free Software Foundation; either version 2 of the License, or
- | (at your option) any later version.
- |
- | This program is distributed in the hope that it will be useful,
- | but WITHOUT ANY WARRANTY; without even the implied warranty of
- | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- | GNU General Public License for more details.
- |
- | You should have received a copy of the GNU General Public License
- | along with this program; if not, write to the Free Software
- | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- |
- | The GNU General Public License may also be available at the following
- | URL: http://www.gnu.org/licenses/gpl.html
-*/
-
-/* This provides a "glue" between Stuart Caie's libmspack library and the
- * Openclit calls to the earlier LZX library.  
- * 
- * This way, I should be able to use the files unmodified.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include "litlib.h"
-#include "mspack.h"
-#include "lzx.h"
-
-typedef struct memory_file
-{
-    unsigned int magic;	/* 0xB5 */
-    void * buffer;
-    int total_bytes;
-    int current_bytes;
-} memory_file;
-
-
-void * glue_alloc(struct mspack_system *this, size_t bytes)
-{
-    void * p;
-    p = (void *)malloc(bytes);
-    if (p == NULL)  {
-        lit_error(ERR_R|ERR_LIBC,"Malloc(%d) failed!", bytes); 
-    }
-    return p;
-}
-
-void glue_free(void * p)
-{
-    free(p);
-}
-
-void glue_copy(void *src, void *dest, size_t bytes)
-{
-    memcpy(dest, src, bytes);
-}
-
-struct mspack_file * glue_open(struct mspack_system *this, char *filename,
-    int mode)
-{
-    lit_error(0,"MSPACK_OPEN unsupported!");
-    return NULL;
-}
-
-void glue_close(struct mspack_file * file) {
-    return;
-}
-
-
-int glue_read(struct mspack_file * file, void * buffer, int bytes)
-{
-    memory_file * mem;
-    int remaining;
-
-    mem = (memory_file *)file;
-    if (mem->magic != 0xB5) return -1;
-  
-    remaining = mem->total_bytes - mem->current_bytes;
-    if (!remaining)  return 0;
-    if (bytes > remaining) bytes = remaining;
-    memcpy(buffer, (unsigned char *)mem->buffer+mem->current_bytes, bytes);
-    mem->current_bytes += bytes;
-    return bytes;
-}
-
-int glue_write(struct mspack_file * file, void * buffer, int bytes)
-{
-    memory_file * mem;
-    int remaining;
-
-    mem = (memory_file *)file;
-    if (mem->magic != 0xB5) return -1;
-  
-    remaining = mem->total_bytes - mem->current_bytes;
-    if (!remaining)  return 0;
-    if (bytes > remaining) { 
-        lit_error(0,"MSPACK_READ tried to write %d bytes, only %d left.",
-            bytes, remaining);
-        bytes = remaining;
-    }
-    memcpy((unsigned char *)mem->buffer+mem->current_bytes, buffer, bytes);
-    mem->current_bytes += bytes;
-    return bytes;
-}
-
-struct mspack_system lzxglue_system = 
-{
-    glue_open, 
-    glue_close,
-    glue_read,   /* Read */
-    glue_write,  /* Write */
-    NULL,   /* Seek */
-    NULL,   /* Tell */
-    NULL,   /* Message */
-    glue_alloc,
-    glue_free,
-    glue_copy,
-    NULL    /* Termination */
-};
-
-int LZXwindow;
-struct lzxd_stream * lzx_stream = NULL;
-
-
-/* Can't really init here,don't know enough */
-int LZXinit(int window) 
-{
-    LZXwindow = window;
-    lzx_stream = NULL;
-
-    return 0;
-}
-
-/* Doesn't exist. Oh well, reinitialize state every time anyway */
-void LZXreset(void)
-{
-    return;
-}
-
-int LZXdecompress(unsigned char *inbuf, unsigned char *outbuf, 
-    unsigned int inlen, unsigned int outlen)
-{
-    int err;
-    memory_file source;
-    memory_file dest;
-
-    source.magic = 0xB5;
-    source.buffer = inbuf;
-    source.current_bytes = 0;
-    source.total_bytes = inlen;
-
-    dest.magic = 0xB5;
-    dest.buffer = outbuf;
-    dest.current_bytes = 0;
-    dest.total_bytes = outlen;
-    
-    lzx_stream = lzxd_init(&lzxglue_system, (struct mspack_file *)&source,
-        (struct mspack_file *)&dest, LZXwindow, 
-        0x7fff /* Never reset, I do it */, 4096, outlen);
-    err = -1;
-    if (lzx_stream) err = lzxd_decompress(lzx_stream, outlen);
-
-    lzxd_free(lzx_stream);
-    lzx_stream = NULL;
-    return err;
-}
diff --git a/src/calibre/utils/lzx/lzxmodule.c b/src/calibre/utils/lzx/lzxmodule.c
index 44cc91c11d..bf8a48a056 100644
--- a/src/calibre/utils/lzx/lzxmodule.c
+++ b/src/calibre/utils/lzx/lzxmodule.c
@@ -199,8 +199,13 @@ initlzx(void)
     PyObject *m;
 
     m = Py_InitModule3("lzx", lzx_methods, lzx_doc);
-    if (m == NULL) return;
+    if (m == NULL) {
+        return;
+    }
+    
     LzxError = PyErr_NewException("lzx.LzxError", NULL, NULL);
     Py_INCREF(LzxError);
     PyModule_AddObject(m, "LzxError", LzxError);
+    
+    return;
 }

From 3737fd3e13c380bcfda7b9d54d7ee012547d401e Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Sat, 19 Jul 2008 14:51:31 -0400
Subject: [PATCH 10/19] Added path clean-up and basic extraction method.

---
 src/calibre/ebooks/lit/reader.py | 200 ++++++++++++++++++-------------
 1 file changed, 114 insertions(+), 86 deletions(-)

diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py
index 9963e14bf2..afe5d96297 100644
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@@ -1,8 +1,10 @@
+'''
+Support for reading LIT files.
+'''
+from __future__ import with_statement
+
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-'''
-Support for reading the metadata from a lit file.
-'''
 
 import sys, struct, cStringIO, os
 import functools
@@ -39,6 +41,13 @@ RESET_HDRLEN = 12
 RESET_UCLENGTH = 16
 RESET_INTERVAL = 32
 
+FLAG_OPENING = 1
+FLAG_CLOSING = 2
+FLAG_BLOCK = 4
+FLAG_HEAD = 8
+FLAG_ATOM = 16
+XML_ENTITIES = ['&amp;', '&apos;', '&lt;', '&gt;', '&quot;']
+
 def u32(bytes):
     return struct.unpack('<L', bytes[:4])[0]
 
@@ -87,13 +96,6 @@ def read_utf8_char(bytes, pos):
             c = (c << 6) | (b & 0x3F)
     return unichr(c), pos+elsize
             
-FLAG_OPENING   = 1
-FLAG_CLOSING   = 2
-FLAG_BLOCK     = 4
-FLAG_HEAD      = 8
-FLAG_ATOM      = 16
-XML_ENTITIES   = ['&amp;', '&apos;', '&lt;', '&gt;', '&quot;']
-
 class UnBinary(object):
     def __init__(self, bin, manifest, map=OPF_MAP):
         self.manifest = manifest
@@ -123,7 +125,10 @@ class UnBinary(object):
             offset += 4
     
     def item_path(self, internal_id):
-        return self.manifest.get(internal_id, internal_id)
+        try:
+            return self.manifest[internal_id].path
+        except KeyError:
+            return internal_id
     
     def __unicode__(self):
         return self.raw
@@ -325,9 +330,7 @@ class ManifestItem(object):
         self.offset = offset
         self.root = root
         self.state = state
-        self.prefix = state if state in ('images', 'css') else ''
-        self.prefix = self.prefix + os.sep if self.prefix else ''
-        self.path = self.prefix + self.original
+        self.path = self.original
         
     def __eq__(self, other):
         if hasattr(other, 'internal'):
@@ -335,7 +338,7 @@ class ManifestItem(object):
         return self.internal == other
     
     def __repr__(self):
-        return "ManifestItem(internal='%s', path='%s')" \
+        return "ManifestItem(internal=%s, path=%s)" \
             % (repr(self.internal), repr(self.path))
 
 def preserve(function):
@@ -348,7 +351,7 @@ def preserve(function):
     functools.update_wrapper(wrapper, function)
     return wrapper
     
-class LitFile(object):
+class LitReader(object):
     PIECE_SIZE = 16
 
     def magic():
@@ -397,7 +400,6 @@ class LitFile(object):
             return self._stream.read(16)
         return property(fget=fget)
     guid = guid()
-
     
     def header():
         @preserve
@@ -410,8 +412,11 @@ class LitFile(object):
         return property(fget=fget)
     header = header()        
     
-    def __init__(self, stream):
-        self._stream = stream
+    def __init__(self, filename_or_stream):
+        if hasattr(filename_or_stream, 'read'):
+            self._stream = filename_or_stream
+        else:
+            self._stream = open(filename_or_stream, 'rb')
         if self.magic != 'ITOLITLS':
             raise LitError('Not a valid LIT file')
         if self.version != 1:
@@ -467,7 +472,7 @@ class LitFile(object):
     def read_header_pieces(self):
         src = self.header[self.hdr_len:]
         for i in range(self.num_pieces):
-            piece = src[i*self.PIECE_SIZE:(i+1)*self.PIECE_SIZE]
+            piece = src[i * self.PIECE_SIZE:(i + 1) * self.PIECE_SIZE]
             if u32(piece[4:]) != 0 or u32(piece[12:]) != 0:
                 raise LitError('Piece %s has 64bit value' % repr(piece))
             offset, size = u32(piece), int32(piece[8:])
@@ -495,10 +500,8 @@ class LitFile(object):
         if not piece.startswith('IFCM'):
             raise LitError('Header piece #1 is not main directory.')
         chunk_size, num_chunks = int32(piece[8:12]), int32(piece[24:28])
-        
         if (32 + chunk_size * num_chunks) != len(piece):
             raise LitError('IFCM HEADER has incorrect length')
-        
         for chunk in range(num_chunks):
             p = 32 + chunk * chunk_size
             if piece[p:p+4] != 'AOLL':
@@ -563,46 +566,39 @@ class LitFile(object):
     def read_manifest(self, entry):
         self.manifest = {}
         raw = self._read_content(entry.offset, entry.size)
-        pos = 0
-        while pos < len(raw):
-            size = ord(raw[pos])
-            if size == 0: break
-            pos += 1
-            root = raw[pos:pos+size].decode('utf8')
-            pos += size
-            if pos >= len(raw):
-                raise LitError('Truncated manifest.')
+        while raw:
+            slen, raw = ord(raw[0]), raw[1:]
+            if slen == 0: break
+            root, raw = raw[:slen].decode('utf8'), raw[slen:]
+            if not raw:
+                raise LitError('Truncated manifest')
             for state in ['spine', 'not spine', 'css', 'images']:
-                num_files = int32(raw[pos:pos+4])
-                pos += 4
+                num_files, raw = int32(raw), raw[4:]
                 if num_files == 0: continue
-                
-                i = 0
-                while i < num_files:
-                    if pos+5 >= len(raw):
-                        raise LitError('Truncated manifest.')
-                    offset = u32(raw[pos:pos+4])
-                    pos += 4
-                    
-                    slen = ord(raw[pos])
-                    pos += 1
-                    internal = raw[pos:pos+slen].decode('utf8')
-                    pos += slen
-                    
-                    slen = ord(raw[pos])
-                    pos += 1
-                    original = raw[pos:pos+slen].decode('utf8')
-                    pos += slen
-                    
-                    slen = ord(raw[pos])
-                    pos += 1
-                    mime_type = raw[pos:pos+slen].decode('utf8')
-                    pos += slen + 1
-                    
-                    self.manifest[internal] = \
-                        ManifestItem(original, internal, mime_type,
-                                     offset, root, state)
-                    i += 1
+                for i in xrange(num_files):
+                    if len(raw) < 5:
+                        raise LitError('Truncated manifest')
+                    offset, raw = u32(raw), raw[4:]
+                    slen, raw = ord(raw[0]), raw[1:]
+                    internal, raw = raw[:slen].decode('utf8'), raw[slen:]
+                    slen, raw = ord(raw[0]), raw[1:]
+                    original, raw = raw[:slen].decode('utf8'), raw[slen:]
+                    slen, raw = ord(raw[0]), raw[1:]
+                    mime_type, raw = raw[:slen].decode('utf8'), raw[slen+1:]
+                    self.manifest[internal] = ManifestItem(
+                        original, internal, mime_type, offset, root, state)
+        mlist = self.manifest.values()
+        shared = mlist[0].path
+        for item in mlist[1:]:
+            path = item.path
+            while not path.startswith(shared):
+                shared = shared[:-1]
+            if shared == '':
+                break
+        else:
+            slen = len(shared)
+            for item in mlist:
+                item.path = item.path[slen:]
 
     def read_meta(self, entry):
         raw = self._read_content(entry.offset, entry.size)
@@ -610,16 +606,12 @@ class LitFile(object):
         self.meta = xml
 
     def read_drm(self):
-        def exists_file(name):
-            try: self.get_file(name)
-            except KeyError: return False
-            return True
         self.drmlevel = 0
-        if exists_file('/DRMStorage/Licenses/EUL'):
+        if '/DRMStorage/Licenses/EUL' in self.entries:
             self.drmlevel = 5
-        elif exists_file('/DRMStorage/DRMBookplate'):
+        elif '/DRMStorage/DRMBookplate' in self.entries:
             self.drmlevel = 3
-        elif exists_file('/DRMStorage/DRMSealed'):
+        elif '/DRMStorage/DRMSealed' in self.entries:
             self.drmlevel = 1
         else:
             return
@@ -686,7 +678,10 @@ class LitFile(object):
                 content = self._decrypt(content)
                 control = control[csize:]
             elif guid == LZXCOMPRESS_GUID:
-                content = self._decompress_section(name, control, content)
+                reset_table = self.get_file(
+                    '/'.join(['::DataSpace/Storage', name, 'Transform',
+                              LZXCOMPRESS_GUID, 'InstanceData/ResetTable']))
+                content = self._decompress(content, control, reset_table)
                 control = control[csize:]
             else:
                 raise LitError("Unrecognized transform: %s." % repr(guid))
@@ -698,9 +693,14 @@ class LitFile(object):
             raise LitError('Cannot extract content from a DRM protected ebook')
         return msdes.new(self.bookkey).decrypt(content)
 
-    def _decompress_section(self, name, control, content):
+    def _decompress(self, content, control, reset_table):
         if len(control) < 32 or u32(control[CONTROL_TAG:]) != LZXC_TAG:
             raise LitError("Invalid ControlData tag value")
+        if len(reset_table) < (RESET_INTERVAL + 8):
+            raise LitError("Reset table is too short")
+        if u32(reset_table[RESET_UCLENGTH + 4:]) != 0:
+            raise LitError("Reset table has 64bit value for UCLENGTH")
+        
         result = []
         
         window_size = 14
@@ -712,13 +712,6 @@ class LitFile(object):
             raise LitError("Invalid window in ControlData")
         lzx.init(window_size)
 
-        reset_table = self.get_file('/'.join(
-                ['::DataSpace/Storage', name, 'Transform',
-                 LZXCOMPRESS_GUID, 'InstanceData/ResetTable']))
-        if len(reset_table) < (RESET_INTERVAL + 8):
-            raise LitError("Reset table is too short")
-        if u32(reset_table[RESET_UCLENGTH + 4:]) != 0:
-            raise LitError("Reset table has 64bit value for UCLENGTH")
         ofs_entry = int32(reset_table[RESET_HDRLEN:]) + 8
         uclength = int32(reset_table[RESET_UCLENGTH:])
         accum = int32(reset_table[RESET_INTERVAL:])
@@ -749,11 +742,38 @@ class LitFile(object):
             bytes_remaining = 0
         if bytes_remaining > 0:
             raise LitError("Failed to completely decompress section")
-        return ''.join(result)                    
-    
+        return ''.join(result)
+
+    def extract_content(self, output_dir=os.getcwdu()):
+        output_dir = os.path.abspath(output_dir)
+        try:
+            opf_path = os.path.splitext(
+                os.path.basename(self._stream.name))[0] + '.opf'
+        except AttributeError:
+            opf_path = 'content.opf'
+        opf_path = os.path.join(output_dir, opf_path)
+        self._ensure_dir(opf_path)
+        with open(opf_path, 'w') as f:
+            f.write(self.get_markup_file('/meta').encode('utf-8'))
+        for entry in self.manifest.values():
+            path = os.path.join(output_dir, entry.path)
+            self._ensure_dir(path)
+            with open(path, 'w') as f:
+                if 'spine' in entry.state:
+                    name = '/'.join(['/data', entry.internal, 'content'])
+                    f.write(self.get_markup_file(name).encode('utf-8'))
+                else:
+                    name = '/'.join(['/data', entry.internal])
+                    f.write(self.get_file(name))
+
+    def _ensure_dir(self, path):
+        dir = os.path.dirname(path)
+        if not os.path.isdir(dir):
+            os.makedirs(dir)
+
 def get_metadata(stream):
     try:
-        litfile = LitFile(stream)
+        litfile = LitReader(stream)
         src = litfile.meta.encode('utf-8')
         mi = OPFReader(cStringIO.StringIO(src), dir=os.getcwd())
         cover_url, cover_item = mi.cover, None
@@ -775,16 +795,24 @@ def get_metadata(stream):
         mi = MetaInformation(title, ['Unknown'])
     return mi
 
+def option_parser():
+    from calibre import OptionParser
+    parser = OptionParser(usage=_('%prog [options] EBOOK'))
+    parser.add_option('-o', '--output-dir', default='.', 
+                      help=_('Output directory. Defaults to current directory.'))
+    parser.add_option('--verbose', default=False, action='store_true',
+                      help='Useful for debugging.')
+    return parser
+
 def main(args=sys.argv):
+    parser = option_parser()
+    opts, args = parser.parse_args(args)
     if len(args) != 2:
-        print >>sys.stderr, _('Usage: %s file.lit')%(args[0],)
+        parser.print_help()
         return 1
-    mi = get_metadata(open(args[1], 'rb'))
-    print unicode(mi)
-    if mi.cover_data[1]:
-        cover = os.path.abspath(os.path.splitext(os.path.basename(args[1]))[0] + '.' + mi.cover_data[0]) 
-        open(cover, 'wb').write(mi.cover_data[1])
-        print _('Cover saved to'), cover
+    lr = LitReader(args[1])
+    lr.extract_content(opts.output_dir)
+    print _('OEB ebook created in'), opts.output_dir
     return 0
 
 if __name__ == '__main__':

From 731631a7d7dcc4662100caea66f841b32fae6fa6 Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Sat, 19 Jul 2008 16:45:41 -0400
Subject: [PATCH 11/19] Added a few speed-ups to the DES code

---
 src/calibre/ebooks/lit/msdes.py | 38 ++++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/src/calibre/ebooks/lit/msdes.py b/src/calibre/ebooks/lit/msdes.py
index 5bc67b09bb..de980f8c3d 100644
--- a/src/calibre/ebooks/lit/msdes.py
+++ b/src/calibre/ebooks/lit/msdes.py
@@ -1,6 +1,7 @@
 # Re-modified for use in MS LIT decryption.  Un-reversed the bytebit[] array.
-# Substituted Microsoft's absurd modified S-boxes.  Modified the encrypt/decrypt
-# methods to handle more than one block at a time.
+# Substituted Microsoft's absurd modified S-boxes.  Modified the
+# encrypt/decrypt methods to handle more than one block at a time.  Added a few
+# speed-ups supported by modern versions of Python.  Added option 'psyco' use.
 #
 # And lo, all the previous notices follow:
 
@@ -125,30 +126,30 @@ class DesCipher:
         pcr = [0]*56    #new int[56];
         kn = [0]*32     #new int[32];
 
-        for j in range(56):
+        for j in xrange(56):
             l = pc1[j]
             m = l & 07
             pc1m[j] = ((keyBlock[l >> 3] & bytebit[m]) != 0)
-        for i in range(16):
+        for i in xrange(16):
             if encrypting:
                 m = i << 1
             else:
                 m = (15-i) << 1
             n = m + 1
             kn[m] = kn[n] = 0
-            for j in range(28):
+            for j in xrange(28):
                 l = j + totrot[i]
                 if l < 28:
                     pcr[j] = pc1m[l]
                 else:
                     pcr[j] = pc1m[l - 28]
-            for j in range(28, 56):
+            for j in xrange(28, 56):
                 l = j + totrot[i]
                 if l < 56:
                     pcr[j] = pc1m[l]
                 else:
                     pcr[j] = pc1m[l - 28]
-            for j in range(24):
+            for j in xrange(24):
                 if pcr[pc2[j]] != 0:
                     kn[m] |= bigbyte[j]
                 if pcr[pc2[j+24]] != 0:
@@ -163,7 +164,7 @@ class DesCipher:
 
         rawi = 0
         KnLi = 0
-        for i in range(16):
+        for i in xrange(16):
             raw0 = raw[rawi]
             rawi += 1
             raw1 = raw[rawi]
@@ -187,11 +188,10 @@ class DesCipher:
         if len(clearText) % 8 != 0:
             raise TypeError, "length must be multiple of block size"
         result = []
-        while clearText:
+        for base in xrange(0, len(clearText), 8):
             result.append(struct.pack(
-                ">LL", *self.des(struct.unpack(">LL", clearText[:8]),
+                ">LL", *self.des(struct.unpack(">LL", clearText[base:base+8]),
                                  self.encryptKeys)))
-            clearText = clearText[8:]
         return ''.join(result)
 
     #/ Decrypt a block of eight bytes.
@@ -199,11 +199,10 @@ class DesCipher:
         if len(cipherText) % 8 != 0:
             raise TypeError, "length must be multiple of block size"
         result = []
-        while cipherText:
+        for base in xrange(0, len(cipherText), 8):
             result.append(struct.pack(
-                ">LL", *self.des(struct.unpack(">LL", cipherText[:8]),
+                ">LL", *self.des(struct.unpack(">LL", cipherText[base:base+8]),
                                  self.decryptKeys)))
-            cipherText = cipherText[8:]
         return ''.join(result)
 
     # The DES function.
@@ -234,7 +233,7 @@ class DesCipher:
         right ^= work
         leftt  = ((leftt << 1) | ((leftt >> 31) & 1)) & 0xffffffffL
 
-        for round in range(8):
+        for round in xrange(8):
             work   = ((right << 28) | (right >> 4)) & 0xffffffffL
             work  ^= keys[keysi]
             keysi += 1
@@ -322,6 +321,7 @@ pc2 = [
     45, 41, 49, 35, 28, 31,
 ]
 
+# Microsoft's modified S-boxes for LIT file encryption
 SP1 = [
 0x02080800L, 0x00080000L, 0x02000002L, 0x02080802L,
 0x02000000L, 0x00080802L, 0x00080002L, 0x02000002L,
@@ -473,6 +473,14 @@ def new(key):
 block_size = 8
 key_size = 8
 
+try:
+    import psyco
+    psyco.bind(DesCipher.deskey)
+    psyco.bind(DesCipher.cookey)
+    psyco.bind(DesCipher.des)
+except ImportError:
+    pass
+
 #test only:
 if __name__ == '__main__':
     des = DesCipher("\x01\x23\x45\x67\x89\xab\xcd\xef")

From fb4f2f3a81c3e27837cfb47697f00cbafeac07ee Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Sat, 19 Jul 2008 16:47:14 -0400
Subject: [PATCH 12/19] Added comments for LIT-specific SHA-1 changes.

---
 src/calibre/ebooks/lit/mssha1.py | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/src/calibre/ebooks/lit/mssha1.py b/src/calibre/ebooks/lit/mssha1.py
index d61bd39094..1708c8dd8b 100644
--- a/src/calibre/ebooks/lit/mssha1.py
+++ b/src/calibre/ebooks/lit/mssha1.py
@@ -1,21 +1,11 @@
-#!/usr/bin/env python
-# -*- coding: iso-8859-1
+"""
+Modified version of SHA-1 used in Microsoft LIT files.
 
-"""A sample implementation of SHA-1 in pure Python.
-
-   Framework adapted from Dinu Gherman's MD5 implementation by
-   J. Hallén and L. Creighton. SHA-1 implementation based directly on
-   the text of the NIST standard FIPS PUB 180-1.
+Adapted from the PyPy pure-Python SHA-1 implementation.
 """
 
-
-__date__    = '2004-11-17'
-__version__ = 0.91 # Modernised by J. Hallén and L. Creighton for Pypy
-
-
 import struct, copy
 
-
 # ======================================================================
 # Bit-Manipulation helpers
 #
@@ -100,10 +90,13 @@ def f40_59(B, C, D):
 def f60_79(B, C, D):
     return B ^ C ^ D
 
+# Microsoft's lovely addition...
 def f6_42(B, C, D):
     return (B + C) ^ C
 
 f = [f0_19]*20 + [f20_39]*20 + [f40_59]*20 + [f60_79]*20
+
+# ...and delightful changes
 f[3] = f20_39
 f[6] = f6_42
 f[10] = f20_39
@@ -148,6 +141,7 @@ class mssha1(object):
         self.input = []
 
         # Initial 160 bit message digest (5 times 32 bit).
+        # Also changed by Microsoft from standard.
         self.H0 = 0x32107654L
         self.H1 = 0x23016745L
         self.H2 = 0xC4E680A2L

From 6b18c8b745cf6be6dbc463d7032942a375a2d61a Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Sat, 19 Jul 2008 16:47:46 -0400
Subject: [PATCH 13/19] Added "lit2oeb" to set of command-line tools

---
 src/calibre/linux.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/calibre/linux.py b/src/calibre/linux.py
index 5d3ead778e..4d7ff9c8aa 100644
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@@ -46,6 +46,7 @@ entry_points = {
                              'librarything = calibre.ebooks.metadata.library_thing:main',
                              'mobi2oeb  = calibre.ebooks.mobi.reader:main',
                              'lrf2html  = calibre.ebooks.lrf.html.convert_to:main',
+                             'lit2oeb   = calibre.ebooks.lit.reader:main',
                              'calibre-debug      = calibre.debug:main',
                              'calibredb          = calibre.library.cli:main',
                              'calibre-fontconfig = calibre.utils.fontconfig:main',

From 006182e5f46ea8f5da43607ab530dff752a12d94 Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Sat, 19 Jul 2008 16:50:14 -0400
Subject: [PATCH 14/19] Fixed bug in directory processing and re-named methods
 to reflect public/private status.

---
 src/calibre/ebooks/lit/reader.py | 113 ++++++++++++++-----------------
 1 file changed, 51 insertions(+), 62 deletions(-)

diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py
index afe5d96297..0fed4aacbc 100644
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@@ -19,13 +19,13 @@ import calibre.ebooks.lit.mssha1 as mssha1
 import calibre.ebooks.lit.msdes as msdes
 import calibre.utils.lzx as lzx
 
-OPF_DECL = """"<?xml version="1.0" encoding="UTF-8" ?>
+OPF_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
 <!DOCTYPE package 
   PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN"
   "http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd">
 """
 HTML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
-<!DOCTYPE html PUBLIC
+<!DOCTYPE html PUBLIC 
  "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Document//EN"
  "http://openebook.org/dtds/oeb-1.0.1/oebdoc101.dtd">
 """
@@ -421,8 +421,13 @@ class LitReader(object):
             raise LitError('Not a valid LIT file')
         if self.version != 1:
             raise LitError('Unknown LIT version %d'%(self.version,))
-        self.read_secondary_header()
-        self.read_header_pieces()
+        self.entries = {}
+        self._read_secondary_header()
+        self._read_header_pieces()
+        self._read_section_names()
+        self._read_manifest()
+        self._read_meta()
+        self._read_drm()
 
     @preserve
     def __len__(self):
@@ -437,10 +442,9 @@ class LitReader(object):
     def _read_content(self, offset, size):
         return self._read_raw(self.content_offset + offset, size)
     
-    @preserve
-    def read_secondary_header(self):
-        self._stream.seek(self.hdr_len + self.num_pieces*self.PIECE_SIZE)
-        bytes = self._stream.read(self.sec_hdr_len)
+    def _read_secondary_header(self):
+        offset = self.hdr_len + (self.num_pieces * self.PIECE_SIZE)
+        bytes = self._read_raw(offset, self.sec_hdr_len)
         offset = int32(bytes[4:])
         while offset < len(bytes):
             blocktype = bytes[offset:offset+4]
@@ -468,23 +472,21 @@ class LitReader(object):
         if not hasattr(self, 'content_offset'):
             raise LitError('Could not figure out the content offset')
     
-    @preserve
-    def read_header_pieces(self):
+    def _read_header_pieces(self):
         src = self.header[self.hdr_len:]
         for i in range(self.num_pieces):
             piece = src[i * self.PIECE_SIZE:(i + 1) * self.PIECE_SIZE]
             if u32(piece[4:]) != 0 or u32(piece[12:]) != 0:
                 raise LitError('Piece %s has 64bit value' % repr(piece))
             offset, size = u32(piece), int32(piece[8:])
-            self._stream.seek(offset)
-            piece = self._stream.read(size)
+            piece = self._read_raw(offset, size)
             if i == 0:
                 continue # Dont need this piece
             elif i == 1:
                 if u32(piece[8:])  != self.entry_chunklen or \
                    u32(piece[12:]) != self.entry_unknown:
                     raise LitError('Secondary header does not match piece')
-                self.read_directory(piece)
+                self._read_directory(piece)
             elif i == 2:
                 if u32(piece[8:])  != self.count_chunklen or \
                    u32(piece[12:]) != self.count_unknown:
@@ -495,58 +497,44 @@ class LitReader(object):
             elif i == 4:
                 self.piece4_guid = piece
                 
-    def read_directory(self, piece):
-        self.entries = {}
+    def _read_directory(self, piece):
         if not piece.startswith('IFCM'):
             raise LitError('Header piece #1 is not main directory.')
         chunk_size, num_chunks = int32(piece[8:12]), int32(piece[24:28])
-        if (32 + chunk_size * num_chunks) != len(piece):
+        if (32 + (num_chunks * chunk_size)) != len(piece):
             raise LitError('IFCM HEADER has incorrect length')
-        for chunk in range(num_chunks):
-            p = 32 + chunk * chunk_size
-            if piece[p:p+4] != 'AOLL':
-                continue
-            remaining = chunk_size - int32(piece[p+4:p+8]) - 48
-            if remaining < 0:
+        for i in xrange(num_chunks):
+            offset = 32 + (i * chunk_size)
+            chunk = piece[offset:offset + chunk_size]
+            tag, chunk = chunk[:4], chunk[4:]
+            if tag != 'AOLL': continue
+            remaining, chunk = int32(chunk[:4]), chunk[4:]
+            if remaining >= chunk_size:
                 raise LitError('AOLL remaining count is negative')
-            entries = u16(piece[p+chunk_size-2:])
-            if entries <= 0:            
-                # Hopefully everything will work even without a correct entries
-                # count
+            remaining = chunk_size - (remaining + 48)
+            entries = u16(chunk[-2:])
+            if entries == 0:
+                # Hopefully will work even without a correct entries count
                 entries = (2 ** 16) - 1
-            piece = piece[p+48:]
-            i = 0
-            while i < entries:
+            chunk = chunk[40:]
+            for j in xrange(entries):
                 if remaining <= 0: break
-                namelen, piece, remaining = encint(piece, remaining)
+                namelen, chunk, remaining = encint(chunk, remaining)
                 if namelen != (namelen & 0x7fffffff):
                     raise LitError('Directory entry had 64bit name length.')
                 if namelen > remaining - 3:
                     raise LitError('Read past end of directory chunk')
-                name = piece[:namelen]
-                piece = piece[namelen:]
-                section, piece, remaining = encint(piece, remaining)
-                offset, piece, remaining = encint(piece, remaining)
-                size, piece, remaining = encint(piece, remaining)
-                
+                name, chunk = chunk[:namelen], chunk[namelen:]
+                section, chunk, remaining = encint(chunk, remaining)
+                offset, chunk, remaining = encint(chunk, remaining)
+                size, chunk, remaining = encint(chunk, remaining)
                 entry = DirectoryEntry(name, section, offset, size)
-                
-                if name == '::DataSpace/NameList':
-                    self.read_section_names(entry)
-                elif name == '/manifest':
-                    self.read_manifest(entry)
-                elif name == '/meta':
-                    self.read_meta(entry)
                 self.entries[name] = entry
-                i += 1
-            if not hasattr(self, 'section_names'):
-                raise LitError('Lit file does not have a valid NameList')
-            if not hasattr(self, 'manifest'):
-                raise LitError('Lit file does not have a valid manifest')
-            self.read_drm()
 
-    def read_section_names(self, entry):
-        raw = self._read_content(entry.offset, entry.size)
+    def _read_section_names(self):
+        if '::DataSpace/NameList' not in self.entries:
+            raise LitError('Lit file does not have a valid NameList')
+        raw = self.get_file('::DataSpace/NameList')
         if len(raw) < 4:
             raise LitError('Invalid Namelist section')
         pos = 4
@@ -563,9 +551,11 @@ class LitReader(object):
                 raw[pos:pos+size].decode('utf-16-le').rstrip('\000')
             pos += size
 
-    def read_manifest(self, entry):
+    def _read_manifest(self):
+        if '/manifest' not in self.entries:
+            raise LitError('Lit file does not have a valid manifest')
+        raw = self.get_file('/manifest')
         self.manifest = {}
-        raw = self._read_content(entry.offset, entry.size)
         while raw:
             slen, raw = ord(raw[0]), raw[1:]
             if slen == 0: break
@@ -600,12 +590,12 @@ class LitReader(object):
             for item in mlist:
                 item.path = item.path[slen:]
 
-    def read_meta(self, entry):
-        raw = self._read_content(entry.offset, entry.size)
+    def _read_meta(self):
+        raw = self.get_file('/meta')
         xml = OPF_DECL + unicode(UnBinary(raw, self.manifest, OPF_MAP))
         self.meta = xml
 
-    def read_drm(self):
+    def _read_drm(self):
         self.drmlevel = 0
         if '/DRMStorage/Licenses/EUL' in self.entries:
             self.drmlevel = 5
@@ -615,13 +605,13 @@ class LitReader(object):
             self.drmlevel = 1
         else:
             return
-        des = msdes.new(self.calculate_deskey())
+        des = msdes.new(self._calculate_deskey())
         bookkey = des.decrypt(self.get_file('/DRMStorage/DRMSealed'))
         if bookkey[0] != '\000':
             raise LitError('Unable to decrypt title key!')
         self.bookkey = bookkey[1:9]
 
-    def calculate_deskey(self):
+    def _calculate_deskey(self):
         hashfiles = ['/meta', '/DRMStorage/DRMSource']
         if self.drmlevel == 3:
             hashfiles.append('/DRMStorage/DRMBookplate')
@@ -726,19 +716,18 @@ class LitReader(object):
                 u = int32(reset_table[ofs_entry + 4:])
                 if u != 0:
                     raise LitError("Reset table entry greater than 32 bits")
-                if size >= (len(content) + base):
+                if size >= len(content):
                     raise("Reset table entry out of bounds")
                 if bytes_remaining >= window_bytes:
                     lzx.reset()
-                    result.append(lzx.decompress(content, window_bytes))
+                    result.append(lzx.decompress(content[base:size], window_bytes))
                     bytes_remaining -= window_bytes
-                    content = content[size - base:]
                     base = size
             accum += int32(reset_table[RESET_INTERVAL:])
             ofs_entry += 8
         if bytes_remaining < window_bytes and bytes_remaining > 0:
             lzx.reset()
-            result.append(lzx.decompress(content, bytes_remaining))
+            result.append(lzx.decompress(content[base:], bytes_remaining))
             bytes_remaining = 0
         if bytes_remaining > 0:
             raise LitError("Failed to completely decompress section")

From a349d763791c48d47cea1f32778b244aef794b5c Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Sat, 19 Jul 2008 18:24:59 -0400
Subject: [PATCH 15/19] Various encoding fix-ups.  Fix for broken file(s?) from
 Penguin.

---
 src/calibre/ebooks/lit/reader.py | 40 ++++++++++++++++++--------------
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py
index 0fed4aacbc..66d6fe9385 100644
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@@ -8,6 +8,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 
 import sys, struct, cStringIO, os
 import functools
+import codecs
 from itertools import repeat
 
 from calibre import relpath
@@ -33,7 +34,6 @@ HTML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
 DESENCRYPT_GUID = "{67F6E4A2-60BF-11D3-8540-00C04F58C3CF}"
 LZXCOMPRESS_GUID = "{0A9007C6-4076-11D3-8789-0000F8105754}"
 
-LZXC_TAG = 0x43585a4c
 CONTROL_TAG = 4
 CONTROL_WINDOW_SIZE = 12
 RESET_NENTRIES = 4
@@ -41,11 +41,11 @@ RESET_HDRLEN = 12
 RESET_UCLENGTH = 16
 RESET_INTERVAL = 32
 
-FLAG_OPENING = 1
-FLAG_CLOSING = 2
-FLAG_BLOCK = 4
-FLAG_HEAD = 8
-FLAG_ATOM = 16
+FLAG_OPENING = (1 << 0)
+FLAG_CLOSING = (1 << 1)
+FLAG_BLOCK   = (1 << 2)
+FLAG_HEAD    = (1 << 3)
+FLAG_ATOM    = (1 << 4)
 XML_ENTITIES = ['&amp;', '&apos;', '&lt;', '&gt;', '&quot;']
 
 def u32(bytes):
@@ -202,7 +202,7 @@ class UnBinary(object):
                         is_goingdown = False
                         if not tag_name:
                             raise LitError('Tag ends before it begins.')
-                        self.buf.write('</'+tag_name+'>')
+                        self.buf.write(u''.join(('</', tag_name, '>')).encode('utf-8'))
                         dynamic_tag = 0
                         tag_name = None
                     state = 'text'
@@ -252,7 +252,7 @@ class UnBinary(object):
                     state = 'get attr'
                 elif count > 0:
                     if not in_censorship:
-                        self.buf.write(c)
+                        self.buf.write(unicode(c).encode('utf-8'))
                     count -= 1
                 if count == 0:
                     if not in_censorship:
@@ -272,7 +272,7 @@ class UnBinary(object):
                 tag_name += c
                 count -= 1
                 if count == 0:
-                    self.buf.write(tag_name)
+                    self.buf.write(unicode(tag_name).encode('utf-8'))
                     state = 'get attr'
             
             elif state == 'get attr length':
@@ -283,7 +283,7 @@ class UnBinary(object):
                 state = 'get custom attr'
             
             elif state == 'get custom attr':
-                self.buf.write(c)
+                self.buf.write(unicode(c).encode('utf-8'))
                 count -= 1
                 if count == 0:
                     self.buf.write('=')
@@ -592,7 +592,13 @@ class LitReader(object):
 
     def _read_meta(self):
         raw = self.get_file('/meta')
-        xml = OPF_DECL + unicode(UnBinary(raw, self.manifest, OPF_MAP))
+        try:
+            xml = OPF_DECL + unicode(UnBinary(raw, self.manifest, OPF_MAP))
+        except LitError:
+            if 'PENGUIN group' not in raw: raise
+            print "WARNING: attempting PENGUIN malformed OPF fix"
+            raw = raw.replace('PENGUIN group', '\x00\x01\x18\x00PENGUIN group', 1)
+            xml = OPF_DECL + unicode(UnBinary(raw, self.manifest, OPF_MAP))
         self.meta = xml
 
     def _read_drm(self):
@@ -669,8 +675,8 @@ class LitReader(object):
                 control = control[csize:]
             elif guid == LZXCOMPRESS_GUID:
                 reset_table = self.get_file(
-                    '/'.join(['::DataSpace/Storage', name, 'Transform',
-                              LZXCOMPRESS_GUID, 'InstanceData/ResetTable']))
+                    '/'.join(('::DataSpace/Storage', name, 'Transform',
+                              LZXCOMPRESS_GUID, 'InstanceData/ResetTable')))
                 content = self._decompress(content, control, reset_table)
                 control = control[csize:]
             else:
@@ -684,7 +690,7 @@ class LitReader(object):
         return msdes.new(self.bookkey).decrypt(content)
 
     def _decompress(self, content, control, reset_table):
-        if len(control) < 32 or u32(control[CONTROL_TAG:]) != LZXC_TAG:
+        if len(control) < 32 or control[CONTROL_TAG:CONTROL_TAG+4] != "LZXC":
             raise LitError("Invalid ControlData tag value")
         if len(reset_table) < (RESET_INTERVAL + 8):
             raise LitError("Reset table is too short")
@@ -743,16 +749,16 @@ class LitReader(object):
         opf_path = os.path.join(output_dir, opf_path)
         self._ensure_dir(opf_path)
         with open(opf_path, 'w') as f:
-            f.write(self.get_markup_file('/meta').encode('utf-8'))
+            f.write(self.meta.encode('utf-8'))
         for entry in self.manifest.values():
             path = os.path.join(output_dir, entry.path)
             self._ensure_dir(path)
             with open(path, 'w') as f:
                 if 'spine' in entry.state:
-                    name = '/'.join(['/data', entry.internal, 'content'])
+                    name = '/'.join(('/data', entry.internal, 'content'))
                     f.write(self.get_markup_file(name).encode('utf-8'))
                 else:
-                    name = '/'.join(['/data', entry.internal])
+                    name = '/'.join(('/data', entry.internal))
                     f.write(self.get_file(name))
 
     def _ensure_dir(self, path):

From 015ca663506ee5fa930adbbcaa294e847a4ae2d8 Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Sun, 20 Jul 2008 00:20:18 -0400
Subject: [PATCH 16/19] Added various copyright headers and doc strings

---
 src/calibre/ebooks/lit/maps/__init__.py | 7 +++++++
 src/calibre/ebooks/lit/maps/html.py     | 7 +++++++
 src/calibre/ebooks/lit/maps/opf.py      | 7 +++++++
 src/calibre/ebooks/lit/reader.py        | 3 ++-
 src/calibre/utils/lzx/lzxmodule.c       | 7 +++++++
 5 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/lit/maps/__init__.py b/src/calibre/ebooks/lit/maps/__init__.py
index 2abab3efe9..2235c384ff 100644
--- a/src/calibre/ebooks/lit/maps/__init__.py
+++ b/src/calibre/ebooks/lit/maps/__init__.py
@@ -1,2 +1,9 @@
+__license__   = 'GPL v3'
+__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
+
+"""
+Microsoft LIT tag and attribute tables.
+"""
+
 from calibre.ebooks.lit.maps.opf import MAP as OPF_MAP
 from calibre.ebooks.lit.maps.html import MAP as HTML_MAP
diff --git a/src/calibre/ebooks/lit/maps/html.py b/src/calibre/ebooks/lit/maps/html.py
index de0286c764..c0b9987f32 100644
--- a/src/calibre/ebooks/lit/maps/html.py
+++ b/src/calibre/ebooks/lit/maps/html.py
@@ -1,3 +1,10 @@
+__license__   = 'GPL v3'
+__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
+
+"""
+Microsoft LIT HTML tag and attribute tables, copied from ConvertLIT.
+"""
+
 TAGS = [
     None, 
     None,
diff --git a/src/calibre/ebooks/lit/maps/opf.py b/src/calibre/ebooks/lit/maps/opf.py
index cc1acc4dfa..f3bb7dcb89 100644
--- a/src/calibre/ebooks/lit/maps/opf.py
+++ b/src/calibre/ebooks/lit/maps/opf.py
@@ -1,3 +1,10 @@
+__license__   = 'GPL v3'
+__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
+
+"""
+Microsoft LIT OPF tag and attribute tables, copied from ConvertLIT.
+"""
+
 TAGS = [
     None,
     "package",
diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py
index 66d6fe9385..c53f266e79 100644
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@@ -4,7 +4,8 @@ Support for reading LIT files.
 from __future__ import with_statement
 
 __license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
+__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
+    'and Marshall T. Vandegrift <llasram@gmail.com>'
 
 import sys, struct, cStringIO, os
 import functools
diff --git a/src/calibre/utils/lzx/lzxmodule.c b/src/calibre/utils/lzx/lzxmodule.c
index bf8a48a056..a1917b5749 100644
--- a/src/calibre/utils/lzx/lzxmodule.c
+++ b/src/calibre/utils/lzx/lzxmodule.c
@@ -1,3 +1,10 @@
+/* __license__   = 'GPL v3'
+ * __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
+ *
+ * Python module C glue code.
+ */
+
+
 #include <Python.h>
 
 #include <mspack.h>

From 87ae95cc7a1caeb2f20236db2df4b124fb99cc18 Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Sun, 20 Jul 2008 00:40:41 -0400
Subject: [PATCH 17/19] Removed duplicate LIT-parsing code.

---
 src/calibre/ebooks/lit/reader.py   |  26 +-
 src/calibre/ebooks/metadata/lit.py | 737 +----------------------------
 2 files changed, 16 insertions(+), 747 deletions(-)

diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py
index c53f266e79..65fce4f3e9 100644
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@@ -767,33 +767,9 @@ class LitReader(object):
         if not os.path.isdir(dir):
             os.makedirs(dir)
 
-def get_metadata(stream):
-    try:
-        litfile = LitReader(stream)
-        src = litfile.meta.encode('utf-8')
-        mi = OPFReader(cStringIO.StringIO(src), dir=os.getcwd())
-        cover_url, cover_item = mi.cover, None
-        if cover_url:
-            cover_url = relpath(cover_url, os.getcwd())
-            for item in litfile.manifest.values():
-                if item.path == cover_url:
-                    cover_item = item.internal
-        if cover_item is not None:
-            ext = cover_url.rpartition('.')[-1]
-            if not ext:
-                ext = 'jpg'
-            else:
-                ext = ext.lower()
-            cd = litfile.get_file(cover_item)
-            mi.cover_data = (ext, cd) if cd else (None, None)            
-    except:
-        title = stream.name if hasattr(stream, 'name') and stream.name else 'Unknown'
-        mi = MetaInformation(title, ['Unknown'])
-    return mi
-
 def option_parser():
     from calibre import OptionParser
-    parser = OptionParser(usage=_('%prog [options] EBOOK'))
+    parser = OptionParser(usage=_('%prog [options] LITFILE'))
     parser.add_option('-o', '--output-dir', default='.', 
                       help=_('Output directory. Defaults to current directory.'))
     parser.add_option('--verbose', default=False, action='store_true',
diff --git a/src/calibre/ebooks/metadata/lit.py b/src/calibre/ebooks/metadata/lit.py
index 2b8c3a4b9f..825fe45cf4 100644
--- a/src/calibre/ebooks/metadata/lit.py
+++ b/src/calibre/ebooks/metadata/lit.py
@@ -1,734 +1,25 @@
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
-Support for reading the metadata from a lit file.
+Support for reading the metadata from a LIT file.
 '''
 
-import sys, struct, cStringIO, os
-from itertools import repeat
+import sys, cStringIO, os
 
 from calibre import relpath
 from calibre.ebooks.metadata import MetaInformation
 from calibre.ebooks.metadata.opf import OPFReader
-
-OPF_ATTR_MAP = [
-    None,
-    "href",   
-    "%never-used",
-    "%guid",
-    "%minimum_level",
-    "%attr5",
-    "id",
-    "href",
-    "media-type",
-    "fallback",
-    "idref",
-    "xmlns:dc",
-    "xmlns:oebpackage",
-    "role",
-    "file-as",
-    "event",
-    "scheme",
-    "title",
-    "type",
-    "unique-identifier",
-    "name",
-    "content",
-    "xml:lang",                 
-    ]
-
-OPF_TAG_MAP = [
-    None,
-    "package",
-    "dc:Title",
-    "dc:Creator",
-    None,
-    None,
-    None,
-    None,
-    None,
-    None,
-    None,
-    None,
-    None,
-    None,
-    None,
-    None,
-    "manifest",
-    "item",
-    "spine",
-    "itemref",
-    "metadata",
-    "dc-metadata",
-    "dc:Subject",
-    "dc:Description",
-    "dc:Publisher",
-    "dc:Contributor",
-    "dc:Date",
-    "dc:Type",
-    "dc:Format",
-    "dc:Identifier",
-    "dc:Source",
-    "dc:Language",
-    "dc:Relation",
-    "dc:Coverage",
-    "dc:Rights",
-    "x-metadata",
-    "meta",
-    "tours",
-    "tour",
-    "site",
-    "guide",
-    "reference",
-    None,
-   ]
-
-class DirectoryEntry(object):
-    def __init__(self, name, section, offset, size):
-        self.name = name
-        self.section = section
-        self.offset = offset
-        self.size = size
-        
-    def __repr__(self):
-        return '%s\n\tSection: %d\n\tOffset: %d\n\tSize: %d\n'%(self.name,
-                                        self.section, self.offset, self.size)
-        
-    def __str__(self):
-        return repr(self)
-
-class LitReadError(Exception):
-    pass
-
-def u32(bytes):
-    b = struct.unpack('BBBB', bytes[:4])
-    return b[0] + (b[1] << 8) + (b[2] << 16) + (b[3] << 32)
-
-def u16(bytes):
-    b = struct.unpack('BB', bytes[:2])
-    return b[0] + (b[1] << 8)
-
-def int32(bytes):
-    return u32(bytes)&0x7FFFFFFF
-
-def encint(bytes, remaining):
-    pos, val = 0, 0
-    while remaining > 0:
-        b = ord(bytes[pos])
-        pos += 1
-        remaining -= 1
-        val <<= 7
-        val |= (b & 0x7f)
-        if b & 0x80 == 0: break
-    return val, bytes[pos:], remaining 
-
-def read_utf8_char(bytes, pos):
-    c = ord(bytes[pos])
-    mask = 0x80
-    if (c & mask):
-        elsize = 0
-        while c & mask:
-            mask >>= 1
-            elsize += 1
-        if (mask <= 1) or (mask == 0x40):
-            raise LitReadError('Invalid UTF8 character: %s'%(repr(bytes[pos])))
-    else:
-        elsize = 1
-        
-    
-    if elsize > 1:
-        if elsize + pos > len(bytes):
-            raise LitReadError('Invalid UTF8 character: %s'%(repr(bytes[pos])))
-        c &= (mask - 1)
-        for i in range(1, elsize):
-            b = ord(bytes[pos+i])
-            if (b & 0xC0) != 0x80:
-                raise LitReadError('Invalid UTF8 character: %s'%(repr(bytes[pos:pos+i])))
-            c = (c << 6) | (b & 0x3F)
-    return unichr(c), pos+elsize
-            
-FLAG_OPENING   = 1
-FLAG_CLOSING   = 2
-FLAG_BLOCK     = 4
-FLAG_HEAD      = 8
-FLAG_ATOM      = 16
-XML_ENTITIES   = ['&amp;', '&apos;', '&lt;', '&gt;', '&quot;']
-
-class UnBinary(object):
-    def __init__(self, bin, manifest, attr_map=OPF_ATTR_MAP, tag_map=OPF_TAG_MAP, 
-                 tag_to_attr_map=[[] for i in range(43)]):
-        self.manifest = manifest
-        self.pending_indent  = 0
-        self.lingering_space = 0
-        self.was_in_text     = 0
-        self.attr_map = attr_map
-        self.tag_map  = tag_map
-        self.tag_to_attr_map = tag_to_attr_map
-        self.opf = self.attr_map is OPF_ATTR_MAP
-        self.bin = bin
-        self.buf = cStringIO.StringIO()
-        self.ampersands = []
-        self.binary_to_text()
-        self.raw = self.buf.getvalue().lstrip().decode('utf-8')
-        self.escape_ampersands() 
-
-    def escape_ampersands(self):
-        offset = 0
-        for pos in self.ampersands:
-            test = self.raw[pos+offset:pos+offset+6]
-            if test.startswith('&#') and ';' in test:
-                continue
-            escape = True
-            for ent in XML_ENTITIES:
-                if test.startswith(ent):
-                    escape = False
-                    break
-            if not escape:
-                continue
-            self.raw = self.raw[:pos+offset] + '&amp;' + self.raw[pos+offset+1:]
-            offset += 4
-            
-    
-    def write_spaces(self, depth):
-        self.buf.write(u''.join(repeat(' ', depth)))
-        
-    def item_path(self, internal_id):
-        for i in self.manifest:
-            if i == internal_id:
-                return i.path
-        raise LitReadError('Could not find item %s'%(internal_id,))
-    
-    def __unicode__(self):
-        return self.raw
-    
-    def binary_to_text(self, base=0, depth=0):
-        space_enabled, saved_space_enabled = 1, 0
-        was_indented, is_goingdown = 0, 0
-        tag_name = current_map = None
-        dynamic_tag = errors = in_censorship = 0
-            
-        state = 'text'
-        index =  base
-        flags = 0
-        
-        while index < len(self.bin):
-            c, index = read_utf8_char(self.bin, index)
-            if state == 'text':
-                if ord(c) == 0:
-                    state = 'get flags'
-                    continue
-                if (not self.was_in_text) or space_enabled:            
-                    space_enabled = 0;
-                    if c in (' ', '\t', '\n', '\r'): 
-                        space_enabled += 1
-                    else:
-                        self.was_in_text = 1
-                if c == '\v': 
-                    c = '\n'
-                pending_indent = 0
-                if c == '&':
-                    self.ampersands.append(self.buf.tell()-1)
-                self.buf.write(c.encode('utf-8') if isinstance(c, unicode) else c)
-            elif state == 'get flags':
-                if ord(c) == 0:
-                    state = 'text'
-                    continue
-                flags = ord(c)
-                state = 'get tag'
-            elif state == 'get tag':
-                state = 'text' if ord(c) == 0 else 'get attr'
-                if flags & FLAG_OPENING:
-                    if space_enabled and ((not self.was_in_text) or (flags &(FLAG_BLOCK|FLAG_HEAD))):
-                        self.pending_indent += 1
-                    if self.pending_indent or self.opf:
-                        was_indented += 1
-                        self.buf.write(u'\n')
-                        self.write_spaces(depth)
-                        pending_indent = 0
-                    if (flags & FLAG_HEAD) or (flags & FLAG_BLOCK) or \
-                        self.opf or depth == 0:
-                        pending_indent = 1
-                    tag = ord(c)
-                    self.buf.write('<')
-                    if not (flags & FLAG_CLOSING):
-                        is_goingdown = 1
-                    if tag == 0x8000:
-                        state = 'get custom length'
-                        continue
-                    if flags & FLAG_ATOM:
-                        raise LitReadError('TODO: Atoms not yet implemented')
-                    elif tag < len(self.tag_map):
-                        tag_name = self.tag_map[tag]
-                        current_map = self.tag_to_attr_map[tag]
-                    else:
-                        dynamic_tag += 1
-                        errors += 1
-                        tag_name = '?'+unichr(tag)+'?'
-                        current_map = self.tag_to_attr_map[tag]
-                        print 'WARNING: tag %s unknown'%(unichr(tag),)
-                    
-                    self.buf.write(unicode(tag_name).encode('utf-8'))
-                elif flags & FLAG_CLOSING:
-                    #if depth == 0:
-                    #    raise LitReadError('Extra closing tag')
-                    self.lingering_space = space_enabled
-                    return index
-            elif state == 'get attr':
-                in_censorship = 0
-                if ord(c) == 0:
-                    if not is_goingdown:
-                        tag_name = None
-                        dynamic_tag = 0
-                        self.buf.write(' />')
-                    else:
-                        self.buf.write('>')
-                        if not self.opf and (flags & (FLAG_BLOCK|FLAG_HEAD)):
-                            pending_indent += 1
-                        index = self.binary_to_text(base=index, depth=depth+1)
-                        is_goingdown = 0
-                        if not tag_name:
-                            raise LitReadError('Tag ends before it begins.')
-                        saved_space_enabled = space_enabled
-                        space_enabled = self.lingering_space
-                        if space_enabled and was_indented and not self.was_in_text:
-                            self.buf.write('\n')
-                            self.write_spaces(depth)
-                        self.buf.write('</'+tag_name+'>')
-                        if (space_enabled and self.opf) or (flags & (FLAG_BLOCK|FLAG_HEAD)):
-                            self.pending_indent += 1
-                        dynamic_tag = 0
-                        tag_name = None
-                        space_enabled = saved_space_enabled
-                    
-                    self.was_in_text = 0
-                    state = 'text'
-                else:
-                    if ord(c) == 0x8000:
-                        state = 'get attr length'
-                        continue
-                    attr = None
-                    if ord(c) < len(current_map) and current_map[ord(c)]:
-                        attr = current_map[ord(c)]                        
-                    elif ord(c) < len(self.attr_map):
-                        attr = self.attr_map[ord(c)]
-                    
-                    if not attr or not isinstance(attr, basestring):
-                        raise LitReadError('Unknown attribute %d in tag %s'%(ord(c), tag_name))
-                    
-                    if attr.startswith('%'):
-                        in_censorship = 1
-                        state = 'get value length'
-                        continue
-                    
-                    self.buf.write(' ' + unicode(attr).encode('utf-8') + '=')
-                    if attr in ['href', 'src']:
-                        state = 'get href'
-                    else:
-                        state = 'get value length'
-            elif state == 'get value length':
-                if not in_censorship:
-                    self.buf.write('"')
-                char_count = ord(c) - 1
-                if not char_count:
-                    if not in_censorship:
-                        self.buf.write('"')
-                    in_censorship = 0
-                    state = 'get attr'
-                state = 'get value'
-                if ord(c) == 0xffff:
-                    continue
-                if char_count < 0 or char_count > len(self.bin)-index:
-                    raise LitReadError('Invalid character count %d'%(char_count,))
-            elif state == 'get value':
-                if char_count == 0xfffe:
-                    if not in_censorship:
-                        self.buf.write(str(ord(c)-1))
-                    in_censorship = 0
-                    state = 'get attr'
-                elif char_count:
-                    if not in_censorship:
-                        self.buf.write(c)
-                    char_count -= 1
-                if not char_count:
-                    if not in_censorship:
-                        self.buf.write('"')
-                    in_censorship = 0
-                    state = 'get attr'
-            elif state == 'get custom length':
-                char_count = ord(c) - 1
-                if char_count <= 0 or char_count > len(self.bin)-index:
-                    raise LitReadError('Invalid character count %d'%(char_count,))
-                dynamic_tag += 1
-                state = 'get custom'
-                tag_name = ''
-            elif state == 'get custom':
-                tag += c
-                char_count -= 1
-                if not char_count:
-                    self.buf.write(tag_name)
-                    state = 'get attr'
-            elif state == 'get attr length':
-                char_count = ord(c) - 1
-                if char_count <= 0 or char_count > len(self.bin)-index:
-                    raise LitReadError('Invalid character count %d'%(char_count,))
-                self.buf.write(' ')
-                state = 'get custom attr'
-            elif state == 'get custom attr':
-                self.buf.write(c)
-                char_count -= 1
-                if not char_count:
-                    self.buf.write('=')
-                    state = 'get value length'
-            elif state == 'get href':
-                char_count = ord(c) - 1
-                if char_count <= 0:
-                    raise LitReadError('Invalid character count %d'%(char_count,))
-                href = self.bin[index+1:index+char_count].decode('ascii')
-                index += char_count 
-                doc, m, frag = href.partition('#')
-                path = self.item_path(doc)
-                if m and frag:
-                    path += m+frag
-                self.buf.write((u'"%s"'%(path,)).encode('utf-8'))
-                state = 'get attr'
-        
-        self.lingering_space = space_enabled
-        return index 
-    
-class ManifestItem(object):
-    
-    def __init__(self, original, internal, mime_type, offset, root, state):
-        self.original = original
-        self.internal = internal
-        self.mime_type = mime_type
-        self.offset = offset
-        self.root = root
-        self.state = state
-        self.prefix = 'images' if state == 'images' else 'css' if state == 'css' else ''
-        self.prefix = self.prefix + os.sep if self.prefix else ''
-        self.path = self.prefix + self.original
-        
-    def __eq__(self, other):
-        if hasattr(other, 'internal'):
-            return self.internal == other.internal
-        return self.internal == other
-    
-    def __repr__(self):
-        return self.internal + u'->' + self.path 
-
-class LitFile(object):
-    
-    PIECE_SIZE    = 16
-    
-    @apply
-    def magic():
-        def fget(self):
-            opos = self._stream.tell()
-            self._stream.seek(0)
-            val = self._stream.read(8)
-            self._stream.seek(opos)
-            return val
-        return property(fget=fget)
-    
-    @apply
-    def version():
-        def fget(self):
-            opos = self._stream.tell()
-            self._stream.seek(8)
-            val = u32(self._stream.read(4))
-            self._stream.seek(opos)
-            return val
-        return property(fget=fget)
-    
-    @apply
-    def hdr_len():
-        def fget(self):
-            opos = self._stream.tell()
-            self._stream.seek(12)
-            val = int32(self._stream.read(4))
-            self._stream.seek(opos)
-            return val
-        return property(fget=fget)
-    
-    @apply
-    def num_pieces():
-        def fget(self):
-            opos = self._stream.tell()
-            self._stream.seek(16)
-            val = int32(self._stream.read(4))
-            self._stream.seek(opos)
-            return val
-        return property(fget=fget)
-    
-    @apply
-    def sec_hdr_len():
-        def fget(self):
-            opos = self._stream.tell()
-            self._stream.seek(20)
-            val = int32(self._stream.read(4))
-            self._stream.seek(opos)
-            return val
-        return property(fget=fget)
-    
-    @apply
-    def guid():
-        def fget(self):
-            opos = self._stream.tell()
-            self._stream.seek(24)
-            val = self._stream.read(16)
-            self._stream.seek(opos)
-            return val
-        return property(fget=fget)
-    
-    @apply
-    def header():
-        def fget(self):
-            opos = self._stream.tell()
-            size = self.hdr_len + self.num_pieces*self.PIECE_SIZE + self.sec_hdr_len
-            self._stream.seek(0)
-            val  = self._stream.read(size)
-            self._stream.seek(opos)
-            return val
-        return property(fget=fget)
-    
-    def __init__(self, stream):
-        self._stream = stream
-        if self.magic != 'ITOLITLS':
-            raise LitReadError('Not a valid LIT file')
-        if self.version != 1:
-            raise LitReadError('Unknown LIT version %d'%(self.version,))
-        self.read_secondary_header()
-        self.read_header_pieces()
-        
-        
-    def read_secondary_header(self):
-        opos = self._stream.tell()
-        try:
-            self._stream.seek(self.hdr_len + self.num_pieces*self.PIECE_SIZE)
-            bytes = self._stream.read(self.sec_hdr_len)
-            offset = int32(bytes[4:])
-            
-            while offset < len(bytes):
-                blocktype = bytes[offset:offset+4]
-                blockver  = u32(bytes[offset+4:])
-            
-                if blocktype == 'CAOL':
-                    if blockver != 2:
-                        raise LitReadError('Unknown CAOL block format %d'%(blockver,))
-                    self.creator_id     = u32(bytes[offset+12:])
-                    self.entry_chunklen = u32(bytes[offset+20:])
-                    self.count_chunklen = u32(bytes[offset+24:])
-                    self.entry_unknown  = u32(bytes[offset+28:])
-                    self.count_unknown  = u32(bytes[offset+32:])
-                    offset += 48
-                elif blocktype == 'ITSF':
-                    if blockver != 4:
-                        raise LitReadError('Unknown ITSF block format %d'%(blockver,))
-                    if u32(bytes[offset+4+16:]):
-                        raise LitReadError('This file has a 64bit content offset')
-                    self.content_offset = u32(bytes[offset+16:])
-                    self.timestamp      = u32(bytes[offset+24:]) 
-                    self.language_id    = u32(bytes[offset+28:])
-                    offset += 48
-                
-            if not hasattr(self, 'content_offset'):
-                raise LitReadError('Could not figure out the content offset')
-        finally:
-            self._stream.seek(opos)
-
-    def read_header_pieces(self):
-        opos = self._stream.tell()
-        try:
-            src = self.header[self.hdr_len:]
-            for i in range(self.num_pieces):
-                piece = src[i*self.PIECE_SIZE:(i+1)*self.PIECE_SIZE]
-                if u32(piece[4:]) != 0 or u32(piece[12:]) != 0:
-                    raise LitReadError('Piece %s has 64bit value'%(repr(piece),))
-                offset, size = u32(piece), int32(piece[8:])
-                self._stream.seek(offset)
-                piece = self._stream.read(size)
-                if i == 0:
-                    continue # Dont need this piece
-                elif i == 1:
-                    if u32(piece[8:])  != self.entry_chunklen or \
-                       u32(piece[12:]) != self.entry_unknown:
-                        raise LitReadError('Secondary header does not match piece')
-                    self.read_directory(piece)
-                elif i == 2:
-                    if u32(piece[8:])  != self.count_chunklen or \
-                       u32(piece[12:]) != self.count_unknown:
-                        raise LitReadError('Secondary header does not match piece')
-                    continue # No data needed from this piece
-                elif i == 3:
-                    self.piece3_guid = piece
-                elif i == 4:
-                    self.piece4_guid = piece
-        finally:
-            self._stream.seek(opos)
-                
-    def read_directory(self, piece):
-        self.entries = []
-        if not piece.startswith('IFCM'):
-            raise LitReadError('Header piece #1 is not main directory.')
-        chunk_size, num_chunks = int32(piece[8:12]), int32(piece[24:28])
-        
-        if 32 + chunk_size*num_chunks != len(piece):
-            raise LitReadError('IFCM HEADER has incorrect length')
-        
-        for chunk in range(num_chunks):
-            p = 32 + chunk*chunk_size
-            if piece[p:p+4] != 'AOLL':
-                continue
-            remaining = chunk_size - int32(piece[p+4:p+8]) - 48
-            if remaining < 0:
-                raise LitReadError('AOLL remaining count is negative')
-            
-            entries = u16(piece[p+chunk_size-2:])
-            
-            if entries <= 0: # Hopefully everything will work even without a correct entries count
-                entries = (2**16)-1 
-            
-            piece = piece[p+48:]
-            i = 0
-            while i < entries:
-                if remaining <= 0: break
-                namelen, piece, remaining = encint(piece, remaining)
-                if namelen != (namelen & 0x7fffffff):
-                    raise LitReadError('Directory entry had 64bit name length.')
-                if namelen > remaining - 3:
-                    raise LitReadError('Read past end of directory chunk')
-                name = piece[:namelen]
-                piece = piece[namelen:]
-                section, piece, remaining = encint(piece, remaining)
-                offset, piece, remaining = encint(piece, remaining)
-                size, piece, remaining = encint(piece, remaining)
-                
-                entry = DirectoryEntry(name, section, offset, size)
-                
-                if name == '::DataSpace/NameList':
-                    self.read_section_names(entry)
-                elif name == '/manifest':
-                    self.read_manifest(entry)
-                elif name == '/meta':
-                    self.read_meta(entry)
-                self.entries.append(entry)
-                i += 1
-            
-            if not hasattr(self, 'sections'):
-                raise LitReadError('Lit file does not have a valid NameList')
-            
-            if not hasattr(self, 'manifest'):
-                raise LitReadError('Lit file does not have a valid manifest')
-                
-    def read_section_names(self, entry):
-        opos = self._stream.tell()
-        try:
-            self._stream.seek(self.content_offset + entry.offset)
-            raw = self._stream.read(entry.size)
-            if len(raw) < 4:
-                raise LitReadError('Invalid Namelist section')
-            pos = 4
-            self.num_sections = u16(raw[2:pos])
-            
-            self.sections = {}
-            for section in range(self.num_sections):
-                size = u16(raw[pos:pos+2])
-                pos += 2
-                size = size*2 + 2
-                if pos + size > len(raw):
-                    raise LitReadError('Invalid Namelist section')
-                self.sections[section] = raw[pos:pos+size].decode('utf-16-le')
-                pos += size                
-        finally:
-            self._stream.seek(opos)
-                
-    def read_manifest(self, entry):
-        opos = self._stream.tell()
-        try:
-            self.manifest = []
-            self._stream.seek(self.content_offset + entry.offset)
-            raw = self._stream.read(entry.size)
-            pos = 0
-            while pos < len(raw):
-                size = ord(raw[pos])
-                if size == 0: break
-                pos += 1
-                root = raw[pos:pos+size].decode('utf8')
-                pos += size
-                if pos >= len(raw):
-                    raise LitReadError('Truncated manifest.')
-                for state in ['spine', 'not spine', 'css', 'images']:
-                    num_files = int32(raw[pos:pos+4])
-                    pos += 4
-                    if num_files == 0: continue
-                    
-                    i = 0
-                    while i < num_files:
-                        if pos+5 >= len(raw):
-                            raise LitReadError('Truncated manifest.')
-                        offset = u32(raw[pos:pos+4])
-                        pos += 4
-                        
-                        slen = ord(raw[pos])
-                        pos += 1
-                        internal = raw[pos:pos+slen].decode('utf8')
-                        pos += slen
-                        
-                        slen = ord(raw[pos])
-                        pos += 1
-                        original = raw[pos:pos+slen].decode('utf8')
-                        pos += slen
-                        
-                        slen = ord(raw[pos])
-                        pos += 1
-                        mime_type = raw[pos:pos+slen].decode('utf8')
-                        pos += slen +1
-                        
-                        self.manifest.append(ManifestItem(original, internal, mime_type, offset, root, state))                        
-                        i += 1
-        finally:
-            self._stream.seek(opos)        
-            
-    def read_meta(self, entry):
-        opos = self._stream.tell()
-        try:
-            self._stream.seek(self.content_offset + entry.offset)
-            raw = self._stream.read(entry.size)
-
-            xml = \
-'''\
-<?xml version="1.0" encoding="UTF-8" ?>
-<!DOCTYPE package
-  PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN"
-  "http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd">
-'''+\
-                unicode(UnBinary(raw, self.manifest))
-            self.meta = xml
-        finally:
-            self._stream.seek(opos)
-            
-    def read_image(self, internal_name):
-        cover_entry = None
-        for entry in self.entries:
-            if internal_name in entry.name:
-                cover_entry = entry
-                break
-        opos = self._stream.tell()
-        try:
-            self._stream.seek(self.content_offset + cover_entry.offset)
-            return self._stream.read(cover_entry.size)
-        finally:
-            self._stream.seek(opos)
+from calibre.ebooks.lit.reader import LitReader
 
 def get_metadata(stream):
     try:
-        litfile = LitFile(stream)
+        litfile = LitReader(stream)
         src = litfile.meta.encode('utf-8')
         mi = OPFReader(cStringIO.StringIO(src), dir=os.getcwd())
         cover_url, cover_item = mi.cover, None
         if cover_url:
             cover_url = relpath(cover_url, os.getcwd())
-            for item in litfile.manifest:
+            for item in litfile.manifest.values():
                 if item.path == cover_url:
                     cover_item = item.internal
         if cover_item is not None:
@@ -737,26 +28,28 @@ def get_metadata(stream):
                 ext = 'jpg'
             else:
                 ext = ext.lower()
-            cd = litfile.read_image(cover_item)
-            mi.cover_data = (ext, cd) if cd else (None, None)            
+            cd = litfile.get_file('/data/' + cover_item)
+            mi.cover_data = (ext, cd) if cd else (None, None)
     except:
         title = stream.name if hasattr(stream, 'name') and stream.name else 'Unknown'
         mi = MetaInformation(title, ['Unknown'])
     return mi
-        
-        
 
 def main(args=sys.argv):
     if len(args) != 2:
-        print >>sys.stderr, _('Usage: %s file.lit')%(args[0],)
+        print >>sys.stderr, _('Usage: %s file.lit') % args[0]
         return 1
-    mi = get_metadata(open(args[1], 'rb'))
+    fname = args[1]
+    mi = get_metadata(open(fname, 'rb'))
     print unicode(mi)
     if mi.cover_data[1]:
-        cover = os.path.abspath(os.path.splitext(os.path.basename(args[1]))[0] + '.' + mi.cover_data[0]) 
+        cover = os.path.abspath(
+            '.'.join((os.path.splitext(os.path.basename(fname))[0],
+                      mi.cover_data[0])))
         open(cover, 'wb').write(mi.cover_data[1])
         print _('Cover saved to'), cover
     return 0
 
 if __name__ == '__main__':
-    sys.exit(main())
\ No newline at end of file
+    sys.exit(main())
+

From 63b6550e21ca0e3b2d3ff5afeba479c0c3bec147 Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Sun, 20 Jul 2008 01:02:14 -0400
Subject: [PATCH 18/19] Fix for LIT files with '..' in filename paths

---
 src/calibre/ebooks/lit/reader.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py
index 65fce4f3e9..2a862141d1 100644
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@@ -331,7 +331,15 @@ class ManifestItem(object):
         self.offset = offset
         self.root = root
         self.state = state
-        self.path = self.original
+        # Some paths in Fictionwise "multiformat" LIT files contain '..' (!?)
+        nodes = original.split('/')
+        path = []
+        for node in nodes:
+            if node == '..':
+                if path: path.pop()
+                continue
+            path.append(node)
+        self.path = os.path.join(*path)
         
     def __eq__(self, other):
         if hasattr(other, 'internal'):

From 56b5b0e26c1505e16bccabeb513dc3d7f9c69241 Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Sun, 20 Jul 2008 01:08:36 -0400
Subject: [PATCH 19/19] Fix a few lines which flow beyond 80 columns

---
 src/calibre/ebooks/lit/reader.py | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py
index 2a862141d1..8cef0fdd18 100644
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@@ -122,7 +122,8 @@ class UnBinary(object):
                     break
             if not escape:
                 continue
-            self.raw = self.raw[:pos+offset] + '&amp;' + self.raw[pos+offset+1:]
+            self.raw = '&amp;'.join(
+                (self.raw[:pos+offset], self.raw[pos+offset+1:]))
             offset += 4
     
     def item_path(self, internal_id):
@@ -203,7 +204,8 @@ class UnBinary(object):
                         is_goingdown = False
                         if not tag_name:
                             raise LitError('Tag ends before it begins.')
-                        self.buf.write(u''.join(('</', tag_name, '>')).encode('utf-8'))
+                        self.buf.write(u''.join(
+                                ('</', tag_name, '>')).encode('utf-8'))
                         dynamic_tag = 0
                         tag_name = None
                     state = 'text'
@@ -606,7 +608,8 @@ class LitReader(object):
         except LitError:
             if 'PENGUIN group' not in raw: raise
             print "WARNING: attempting PENGUIN malformed OPF fix"
-            raw = raw.replace('PENGUIN group', '\x00\x01\x18\x00PENGUIN group', 1)
+            raw = raw.replace(
+                'PENGUIN group', '\x00\x01\x18\x00PENGUIN group', 1)
             xml = OPF_DECL + unicode(UnBinary(raw, self.manifest, OPF_MAP))
         self.meta = xml
 
@@ -735,7 +738,8 @@ class LitReader(object):
                     raise("Reset table entry out of bounds")
                 if bytes_remaining >= window_bytes:
                     lzx.reset()
-                    result.append(lzx.decompress(content[base:size], window_bytes))
+                    result.append(
+                        lzx.decompress(content[base:size], window_bytes))
                     bytes_remaining -= window_bytes
                     base = size
             accum += int32(reset_table[RESET_INTERVAL:])
@@ -778,10 +782,12 @@ class LitReader(object):
 def option_parser():
     from calibre import OptionParser
     parser = OptionParser(usage=_('%prog [options] LITFILE'))
-    parser.add_option('-o', '--output-dir', default='.', 
-                      help=_('Output directory. Defaults to current directory.'))
-    parser.add_option('--verbose', default=False, action='store_true',
-                      help='Useful for debugging.')
+    parser.add_option(
+        '-o', '--output-dir', default='.', 
+        help=_('Output directory. Defaults to current directory.'))
+    parser.add_option(
+        '--verbose', default=False, action='store_true',
+        help='Useful for debugging.')
     return parser
 
 def main(args=sys.argv):