From da29a58363f770f38f01e02e3cb4221331666c0a Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Fri, 11 Jul 2008 14:37:27 -0400
Subject: [PATCH 1/7] Integrated own cleanup patch

---
 src/calibre/ebooks/mobi/reader.py | 30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)
diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py
index dea87dbd8c..05093f3c1a 100644
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@@ -13,7 +13,7 @@ except ImportError:
     import Image as PILImage
 
 from calibre import __appname__
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
 from calibre.ebooks.mobi import MobiError
 from calibre.ebooks.mobi.huffcdic import HuffReader
 from calibre.ebooks.mobi.palmdoc import decompress_doc
@@ -165,13 +165,14 @@ class MobiReader(object):
         self.processed_html = self.processed_html.decode(self.book_header.codec, 'ignore')
         self.extract_images(processed_records, output_dir)
         self.replace_page_breaks()
-        self.cleanup()
+        self.cleanup_html()
         
         self.processed_html = re.compile('<head>', re.IGNORECASE).sub(
             '<head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\n',
                                      self.processed_html)
         
         soup = BeautifulSoup(self.processed_html.replace('> <', '>\n<'))
+        self.cleanup_soup(soup)
         guide = soup.find('guide')
         for elem in soup.findAll(['metadata', 'guide']):
             elem.extract()
@@ -192,10 +193,29 @@ class MobiReader(object):
             if ncx:
                 open(os.path.splitext(htmlfile)[0]+'.ncx', 'wb').write(ncx)
         
-    def cleanup(self):
+    def cleanup_html(self):
         self.processed_html = re.sub(r'<div height="0(pt|px|ex|em|%){0,1}"></div>', '', self.processed_html)
-        self.processed_html = re.sub(r'<([^>]*) height="([^"]*)"', r'<\1 style="margin-top: \2"', self.processed_html)
-        self.processed_html = re.sub(r'<([^>]*) width="([^"]*)"', r'<\1 style="text-indent: \2"', self.processed_html)
+    
+    def cleanup_soup(self, soup):
+        for tag in soup.recursiveChildGenerator():
+            if not isinstance(tag, Tag): continue
+            styles = []
+            try:
+                styles.append(tag['style'])
+            except KeyError:
+                pass
+            try:
+                styles.append('margin-top: %s' % tag['height'])
+                del tag['height']
+            except KeyError:
+                pass
+            try:
+                styles.append('text-indent: %s' % tag['width'])
+                del tag['width']
+            except KeyError:
+                pass
+            if styles:
+                tag['style'] = '; '.join(styles)
     
     def create_opf(self, htmlfile, guide=None):
         mi = self.book_header.exth.mi

From 615d5ea2795563f8af9dc34c2c2c03c84c9c9714 Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Wed, 16 Jul 2008 10:00:49 -0400
Subject: [PATCH 2/7] Checkpoint state to move to office

---
 src/calibre/ebooks/lit/maps/__init__.py |    7 +-
 src/calibre/ebooks/lit/maps/html.py     | 1568 +++++++++++------------
 src/calibre/ebooks/lit/maps/opf.py      |   54 +-
 src/calibre/ebooks/lit/mssha1.py        |  343 +++++
 src/calibre/ebooks/lit/reader.py        |  418 +++---
 5 files changed, 1352 insertions(+), 1038 deletions(-)
 create mode 100644 src/calibre/ebooks/lit/mssha1.py

diff --git a/src/calibre/ebooks/lit/maps/__init__.py b/src/calibre/ebooks/lit/maps/__init__.py
index eb99464d9b..2abab3efe9 100644
--- a/src/calibre/ebooks/lit/maps/__init__.py
+++ b/src/calibre/ebooks/lit/maps/__init__.py
@@ -1,5 +1,2 @@
-import calibre.ebooks.maps.opf as opf
-import calibre.ebooks.maps.html as html
-
-OPF_MAP = opf.MAP
-HTML_MAP = html.MAP
+from calibre.ebooks.lit.maps.opf import MAP as OPF_MAP
+from calibre.ebooks.lit.maps.html import MAP as HTML_MAP
diff --git a/src/calibre/ebooks/lit/maps/html.py b/src/calibre/ebooks/lit/maps/html.py
index 095b0bcc3e..de0286c764 100644
--- a/src/calibre/ebooks/lit/maps/html.py
+++ b/src/calibre/ebooks/lit/maps/html.py
@@ -1,786 +1,3 @@
-ATTRS0 = { 
-    0x8010 => "tabindex",
-    0x8046 => "title",
-    0x804b => "style",
-    0x804d => "disabled",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x83fe => "datafld",
-    0x83ff => "datasrc",
-    0x8400 => "dataformatas",
-    0x87d6 => "accesskey",
-    0x9392 => "lang",
-    0x93ed => "language",
-    0x93fe => "dir",
-    0x9771 => "onmouseover",
-    0x9772 => "onmouseout",
-    0x9773 => "onmousedown",
-    0x9774 => "onmouseup",
-    0x9775 => "onmousemove",
-    0x9776 => "onkeydown",
-    0x9777 => "onkeyup",
-    0x9778 => "onkeypress",
-    0x9779 => "onclick",
-    0x977a => "ondblclick",
-    0x977e => "onhelp",
-    0x977f => "onfocus",
-    0x9780 => "onblur",
-    0x9783 => "onrowexit",
-    0x9784 => "onrowenter",
-    0x9786 => "onbeforeupdate",
-    0x9787 => "onafterupdate",
-    0x978a => "onreadystatechange",
-    0x9790 => "onscroll",
-    0x9794 => "ondragstart",
-    0x9795 => "onresize",
-    0x9796 => "onselectstart",
-    0x9797 => "onerrorupdate",
-    0x9799 => "ondatasetchanged",
-    0x979a => "ondataavailable",
-    0x979b => "ondatasetcomplete",
-    0x979c => "onfilterchange",
-    0x979f => "onlosecapture",
-    0x97a0 => "onpropertychange",
-    0x97a2 => "ondrag",
-    0x97a3 => "ondragend",
-    0x97a4 => "ondragenter",
-    0x97a5 => "ondragover",
-    0x97a6 => "ondragleave",
-    0x97a7 => "ondrop",
-    0x97a8 => "oncut",
-    0x97a9 => "oncopy",
-    0x97aa => "onpaste",
-    0x97ab => "onbeforecut",
-    0x97ac => "onbeforecopy",
-    0x97ad => "onbeforepaste",
-    0x97af => "onrowsdelete",
-    0x97b0 => "onrowsinserted",
-    0x97b1 => "oncellchange",
-    0x97b2 => "oncontextmenu",
-    0x97b6 => "onbeforeeditfocus",
-    }
-ATTRS3 = {
-    0x0001 => "href",
-    0x03ec => "target",
-    0x03ee => "rel",
-    0x03ef => "rev",
-    0x03f0 => "urn",
-    0x03f1 => "methods",
-    0x8001 => "name",
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS5 = {
-    0x9399 => "clear",
-    }
-ATTRS6 = {
-    0x8001 => "name",
-    0x8006 => "width",
-    0x8007 => "height",
-    0x804a => "align",
-    0x8bbb => "classid",
-    0x8bbc => "data",
-    0x8bbf => "codebase",
-    0x8bc0 => "codetype",
-    0x8bc1 => "code",
-    0x8bc2 => "type",
-    0x8bc5 => "vspace",
-    0x8bc6 => "hspace",
-    0x978e => "onerror",
-    }
-ATTRS7 = {
-    0x0001 => "href",
-    0x03ea => "shape",
-    0x03eb => "coords",
-    0x03ed => "target",
-    0x03ee => "alt",
-    0x03ef => "nohref",
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS8 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS9 = {
-    0x03ec => "href",
-    0x03ed => "target",
-    }
-ATTRS10 = {
-    0x938b => "color",
-    0x939b => "face",
-    0x93a3 => "size",
-    }
-ATTRS12 = {
-    0x03ea => "src",
-    0x03eb => "loop",
-    0x03ec => "volume",
-    0x03ed => "balance",
-    }
-ATTRS13 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS15 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x9399 => "clear",
-    }
-ATTRS16 = {
-    0x07db => "link",
-    0x07dc => "alink",
-    0x07dd => "vlink",
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x938a => "background",
-    0x938b => "text",
-    0x938e => "nowrap",
-    0x93ae => "topmargin",
-    0x93af => "rightmargin",
-    0x93b0 => "bottommargin",
-    0x93b1 => "leftmargin",
-    0x93b6 => "bgproperties",
-    0x93d8 => "scroll",
-    0x977b => "onselect",
-    0x9791 => "onload",
-    0x9792 => "onunload",
-    0x9798 => "onbeforeunload",
-    0x97b3 => "onbeforeprint",
-    0x97b4 => "onafterprint",
-    0xfe0c => "bgcolor",
-    }
-ATTRS17 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x9399 => "clear",
-    }
-ATTRS18 = {
-    0x07d1 => "type",
-    0x8001 => "name",
-    }
-ATTRS19 = {
-    0x8046 => "title",
-    0x8049 => "align",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x93a8 => "valign",
-    }
-ATTRS20 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x9399 => "clear",
-    }
-ATTRS21 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS22 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS23 = {
-    0x03ea => "span",
-    0x8006 => "width",
-    0x8049 => "align",
-    0x93a8 => "valign",
-    0xfe0c => "bgcolor",
-    }
-ATTRS24 = {
-    0x03ea => "span",
-    0x8006 => "width",
-    0x8049 => "align",
-    0x93a8 => "valign",
-    0xfe0c => "bgcolor",
-    }
-ATTRS27 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x938e => "nowrap",
-    }
-ATTRS29 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS31 = {
-    0x8046 => "title",
-    0x8049 => "align",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x938e => "nowrap",
-    }
-ATTRS32 = {
-    0x03ea => "compact",
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS33 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x938e => "nowrap",
-    }
-ATTRS34 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS35 = {
-    0x8001 => "name",
-    0x8006 => "width",
-    0x8007 => "height",
-    0x804a => "align",
-    0x8bbd => "palette",
-    0x8bbe => "pluginspage",
-    0x8bbf => "codebase",
-    0x8bbf => "src",
-    0x8bc1 => "units",
-    0x8bc2 => "type",
-    0x8bc3 => "hidden",
-    }
-ATTRS36 = {
-    0x804a => "align",
-    }
-ATTRS37 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x938b => "color",
-    0x939b => "face",
-    0x939c => "size",
-    }
-ATTRS38 = {
-    0x03ea => "action",
-    0x03ec => "enctype",
-    0x03ed => "method",
-    0x03ef => "target",
-    0x03f4 => "accept-charset",
-    0x8001 => "name",
-    0x977c => "onsubmit",
-    0x977d => "onreset",
-    }
-ATTRS39 = {
-    0x8000 => "align",
-    0x8001 => "name",
-    0x8bb9 => "src",
-    0x8bbb => "border",
-    0x8bbc => "frameborder",
-    0x8bbd => "framespacing",
-    0x8bbe => "marginwidth",
-    0x8bbf => "marginheight",
-    0x8bc0 => "noresize",
-    0x8bc1 => "scrolling",
-    0x8fa2 => "bordercolor",
-    }
-ATTRS40 = {
-    0x03e9 => "rows",
-    0x03ea => "cols",
-    0x03eb => "border",
-    0x03ec => "bordercolor",
-    0x03ed => "frameborder",
-    0x03ee => "framespacing",
-    0x8001 => "name",
-    0x9791 => "onload",
-    0x9792 => "onunload",
-    0x9798 => "onbeforeunload",
-    0x97b3 => "onbeforeprint",
-    0x97b4 => "onafterprint",
-    }
-ATTRS42 = {
-    0x8046 => "title",
-    0x8049 => "align",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x9399 => "clear",
-    }
-ATTRS43 = {
-    0x8046 => "title",
-    0x8049 => "align",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x9399 => "clear",
-    }
-ATTRS44 = {
-    0x8046 => "title",
-    0x8049 => "align",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x9399 => "clear",
-    }
-ATTRS45 = {
-    0x8046 => "title",
-    0x8049 => "align",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x9399 => "clear",
-    }
-ATTRS46 = {
-    0x8046 => "title",
-    0x8049 => "align",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x9399 => "clear",
-    }
-ATTRS47 = {
-    0x8046 => "title",
-    0x8049 => "align",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x9399 => "clear",
-    }
-ATTRS49 = {
-    0x03ea => "noshade",
-    0x8006 => "width",
-    0x8007 => "size",
-    0x8046 => "title",
-    0x8049 => "align",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x938b => "color",
-    }
-ATTRS51 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS52 = {
-    0x8001 => "name",
-    0x8006 => "width",
-    0x8007 => "height",
-    0x804a => "align",
-    0x8bb9 => "src",
-    0x8bbb => "border",
-    0x8bbc => "frameborder",
-    0x8bbd => "framespacing",
-    0x8bbe => "marginwidth",
-    0x8bbf => "marginheight",
-    0x8bc0 => "noresize",
-    0x8bc1 => "scrolling",
-    0x8fa2 => "vspace",
-    0x8fa3 => "hspace",
-    }
-ATTRS53 = {
-    0x03eb => "alt",
-    0x03ec => "src",
-    0x03ed => "border",
-    0x03ee => "vspace",
-    0x03ef => "hspace",
-    0x03f0 => "lowsrc",
-    0x03f1 => "vrml",
-    0x03f2 => "dynsrc",
-    0x03f4 => "loop",
-    0x03f6 => "start",
-    0x07d3 => "ismap",
-    0x07d9 => "usemap",
-    0x8001 => "name",
-    0x8006 => "width",
-    0x8007 => "height",
-    0x8046 => "title",
-    0x804a => "align",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x978d => "onabort",
-    0x978e => "onerror",
-    0x9791 => "onload",
-    }
-ATTRS54 = {
-    0x07d1 => "type",
-    0x07d3 => "size",
-    0x07d4 => "maxlength",
-    0x07d6 => "readonly",
-    0x07d8 => "indeterminate",
-    0x07da => "checked",
-    0x07db => "alt",
-    0x07dc => "src",
-    0x07dd => "border",
-    0x07de => "vspace",
-    0x07df => "hspace",
-    0x07e0 => "lowsrc",
-    0x07e1 => "vrml",
-    0x07e2 => "dynsrc",
-    0x07e4 => "loop",
-    0x07e5 => "start",
-    0x8001 => "name",
-    0x8006 => "width",
-    0x8007 => "height",
-    0x804a => "align",
-    0x93ee => "value",
-    0x977b => "onselect",
-    0x978d => "onabort",
-    0x978e => "onerror",
-    0x978f => "onchange",
-    0x9791 => "onload",
-    }
-ATTRS56 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS57 = {
-    0x03e9 => "for",
-    }
-ATTRS58 = {
-    0x804a => "align",
-    }
-ATTRS59 = {
-    0x03ea => "value",
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x939a => "type",
-    }
-ATTRS60 = {
-    0x03ee => "href",
-    0x03ef => "rel",
-    0x03f0 => "rev",
-    0x03f1 => "type",
-    0x03f9 => "media",
-    0x03fa => "target",
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x978e => "onerror",
-    0x9791 => "onload",
-    }
-ATTRS61 = {
-    0x9399 => "clear",
-    }
-ATTRS62 = {
-    0x8001 => "name",
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS63 = {
-    0x1771 => "scrolldelay",
-    0x1772 => "direction",
-    0x1773 => "behavior",
-    0x1774 => "scrollamount",
-    0x1775 => "loop",
-    0x1776 => "vspace",
-    0x1777 => "hspace",
-    0x1778 => "truespeed",
-    0x8006 => "width",
-    0x8007 => "height",
-    0x9785 => "onbounce",
-    0x978b => "onfinish",
-    0x978c => "onstart",
-    0xfe0c => "bgcolor",
-    }
-ATTRS65 = {
-    0x03ea => "http-equiv",
-    0x03eb => "content",
-    0x03ec => "url",
-    0x03f6 => "charset",
-    0x8001 => "name",
-    }
-ATTRS66 = {
-    0x03f5 => "n",
-    }
-ATTRS71 = {
-    0x8000 => "border",
-    0x8000 => "usemap",
-    0x8001 => "name",
-    0x8006 => "width",
-    0x8007 => "height",
-    0x8046 => "title",
-    0x804a => "align",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x8bbb => "classid",
-    0x8bbc => "data",
-    0x8bbf => "codebase",
-    0x8bc0 => "codetype",
-    0x8bc1 => "code",
-    0x8bc2 => "type",
-    0x8bc5 => "vspace",
-    0x8bc6 => "hspace",
-    0x978e => "onerror",
-    }
-ATTRS72 = {
-    0x03eb => "compact",
-    0x03ec => "start",
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x939a => "type",
-    }
-ATTRS73 = {
-    0x03ea => "selected",
-    0x03eb => "value",
-    }
-ATTRS74 = {
-    0x8046 => "title",
-    0x8049 => "align",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x9399 => "clear",
-    }
-ATTRS75 = {
-    0x8000 => "name",
-    0x8000 => "value",
-    0x8000 => "type",
-    }
-ATTRS76 = {
-    0x9399 => "clear",
-    }
-ATTRS77 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x9399 => "clear",
-    }
-ATTRS78 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS82 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS83 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS84 = {
-    0x03ea => "src",
-    0x03ed => "for",
-    0x03ee => "event",
-    0x03f0 => "defer",
-    0x03f2 => "type",
-    0x978e => "onerror",
-    }
-ATTRS85 = {
-    0x03eb => "size",
-    0x03ec => "multiple",
-    0x8000 => "align",
-    0x8001 => "name",
-    0x978f => "onchange",
-    }
-ATTRS86 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS87 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS88 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS89 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS90 = {
-    0x03eb => "type",
-    0x03ef => "media",
-    0x8046 => "title",
-    0x978e => "onerror",
-    0x9791 => "onload",
-    }
-ATTRS91 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS92 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS93 = {
-    0x03ea => "cols",
-    0x03eb => "border",
-    0x03ec => "rules",
-    0x03ed => "frame",
-    0x03ee => "cellspacing",
-    0x03ef => "cellpadding",
-    0x03fa => "datapagesize",
-    0x8006 => "width",
-    0x8007 => "height",
-    0x8046 => "title",
-    0x804a => "align",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x938a => "background",
-    0x93a5 => "bordercolor",
-    0x93a6 => "bordercolorlight",
-    0x93a7 => "bordercolordark",
-    0xfe0c => "bgcolor",
-    }
-ATTRS94 = {
-    0x8049 => "align",
-    0x93a8 => "valign",
-    0xfe0c => "bgcolor",
-    }
-ATTRS95 = {
-    0x8049 => "align",
-    0x93a8 => "valign",
-    }
-ATTRS96 = {
-    0x07d2 => "rowspan",
-    0x07d3 => "colspan",
-    0x8006 => "width",
-    0x8007 => "height",
-    0x8046 => "title",
-    0x8049 => "align",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x938a => "background",
-    0x938e => "nowrap",
-    0x93a5 => "bordercolor",
-    0x93a6 => "bordercolorlight",
-    0x93a7 => "bordercolordark",
-    0x93a8 => "valign",
-    0xfe0c => "bgcolor",
-    }
-ATTRS97 = {
-    0x1b5a => "rows",
-    0x1b5b => "cols",
-    0x1b5c => "wrap",
-    0x1b5d => "readonly",
-    0x8001 => "name",
-    0x977b => "onselect",
-    0x978f => "onchange",
-    }
-ATTRS98 = {
-    0x8049 => "align",
-    0x93a8 => "valign",
-    0xfe0c => "bgcolor",
-    }
-ATTRS99 = {
-    0x07d2 => "rowspan",
-    0x07d3 => "colspan",
-    0x8006 => "width",
-    0x8007 => "height",
-    0x8046 => "title",
-    0x8049 => "align",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x938a => "background",
-    0x938e => "nowrap",
-    0x93a5 => "bordercolor",
-    0x93a6 => "bordercolorlight",
-    0x93a7 => "bordercolordark",
-    0x93a8 => "valign",
-    0xfe0c => "bgcolor",
-    }
-ATTRS100 = {
-    0x8049 => "align",
-    0x93a8 => "valign",
-    0xfe0c => "bgcolor",
-    }
-ATTRS102 = {
-    0x8007 => "height",
-    0x8046 => "title",
-    0x8049 => "align",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x93a5 => "bordercolor",
-    0x93a6 => "bordercolorlight",
-    0x93a7 => "bordercolordark",
-    0x93a8 => "valign",
-    0xfe0c => "bgcolor",
-    }
-ATTRS103 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS104 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS105 = {
-    0x03eb => "compact",
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    0x939a => "type",
-    }
-ATTRS106 = {
-    0x8046 => "title",
-    0x804b => "style",
-    0x83ea => "class",
-    0x83eb => "id",
-    }
-ATTRS108 = {
-    0x9399 => "clear",
-    }
-
 TAGS = [
     None, 
     None,
@@ -893,6 +110,789 @@ TAGS = [
     None,
     ]
 
+ATTRS0 = { 
+    0x8010: "tabindex",
+    0x8046: "title",
+    0x804b: "style",
+    0x804d: "disabled",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x83fe: "datafld",
+    0x83ff: "datasrc",
+    0x8400: "dataformatas",
+    0x87d6: "accesskey",
+    0x9392: "lang",
+    0x93ed: "language",
+    0x93fe: "dir",
+    0x9771: "onmouseover",
+    0x9772: "onmouseout",
+    0x9773: "onmousedown",
+    0x9774: "onmouseup",
+    0x9775: "onmousemove",
+    0x9776: "onkeydown",
+    0x9777: "onkeyup",
+    0x9778: "onkeypress",
+    0x9779: "onclick",
+    0x977a: "ondblclick",
+    0x977e: "onhelp",
+    0x977f: "onfocus",
+    0x9780: "onblur",
+    0x9783: "onrowexit",
+    0x9784: "onrowenter",
+    0x9786: "onbeforeupdate",
+    0x9787: "onafterupdate",
+    0x978a: "onreadystatechange",
+    0x9790: "onscroll",
+    0x9794: "ondragstart",
+    0x9795: "onresize",
+    0x9796: "onselectstart",
+    0x9797: "onerrorupdate",
+    0x9799: "ondatasetchanged",
+    0x979a: "ondataavailable",
+    0x979b: "ondatasetcomplete",
+    0x979c: "onfilterchange",
+    0x979f: "onlosecapture",
+    0x97a0: "onpropertychange",
+    0x97a2: "ondrag",
+    0x97a3: "ondragend",
+    0x97a4: "ondragenter",
+    0x97a5: "ondragover",
+    0x97a6: "ondragleave",
+    0x97a7: "ondrop",
+    0x97a8: "oncut",
+    0x97a9: "oncopy",
+    0x97aa: "onpaste",
+    0x97ab: "onbeforecut",
+    0x97ac: "onbeforecopy",
+    0x97ad: "onbeforepaste",
+    0x97af: "onrowsdelete",
+    0x97b0: "onrowsinserted",
+    0x97b1: "oncellchange",
+    0x97b2: "oncontextmenu",
+    0x97b6: "onbeforeeditfocus",
+    }
+ATTRS3 = {
+    0x0001: "href",
+    0x03ec: "target",
+    0x03ee: "rel",
+    0x03ef: "rev",
+    0x03f0: "urn",
+    0x03f1: "methods",
+    0x8001: "name",
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS5 = {
+    0x9399: "clear",
+    }
+ATTRS6 = {
+    0x8001: "name",
+    0x8006: "width",
+    0x8007: "height",
+    0x804a: "align",
+    0x8bbb: "classid",
+    0x8bbc: "data",
+    0x8bbf: "codebase",
+    0x8bc0: "codetype",
+    0x8bc1: "code",
+    0x8bc2: "type",
+    0x8bc5: "vspace",
+    0x8bc6: "hspace",
+    0x978e: "onerror",
+    }
+ATTRS7 = {
+    0x0001: "href",
+    0x03ea: "shape",
+    0x03eb: "coords",
+    0x03ed: "target",
+    0x03ee: "alt",
+    0x03ef: "nohref",
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS8 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS9 = {
+    0x03ec: "href",
+    0x03ed: "target",
+    }
+ATTRS10 = {
+    0x938b: "color",
+    0x939b: "face",
+    0x93a3: "size",
+    }
+ATTRS12 = {
+    0x03ea: "src",
+    0x03eb: "loop",
+    0x03ec: "volume",
+    0x03ed: "balance",
+    }
+ATTRS13 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS15 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x9399: "clear",
+    }
+ATTRS16 = {
+    0x07db: "link",
+    0x07dc: "alink",
+    0x07dd: "vlink",
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x938a: "background",
+    0x938b: "text",
+    0x938e: "nowrap",
+    0x93ae: "topmargin",
+    0x93af: "rightmargin",
+    0x93b0: "bottommargin",
+    0x93b1: "leftmargin",
+    0x93b6: "bgproperties",
+    0x93d8: "scroll",
+    0x977b: "onselect",
+    0x9791: "onload",
+    0x9792: "onunload",
+    0x9798: "onbeforeunload",
+    0x97b3: "onbeforeprint",
+    0x97b4: "onafterprint",
+    0xfe0c: "bgcolor",
+    }
+ATTRS17 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x9399: "clear",
+    }
+ATTRS18 = {
+    0x07d1: "type",
+    0x8001: "name",
+    }
+ATTRS19 = {
+    0x8046: "title",
+    0x8049: "align",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x93a8: "valign",
+    }
+ATTRS20 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x9399: "clear",
+    }
+ATTRS21 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS22 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS23 = {
+    0x03ea: "span",
+    0x8006: "width",
+    0x8049: "align",
+    0x93a8: "valign",
+    0xfe0c: "bgcolor",
+    }
+ATTRS24 = {
+    0x03ea: "span",
+    0x8006: "width",
+    0x8049: "align",
+    0x93a8: "valign",
+    0xfe0c: "bgcolor",
+    }
+ATTRS27 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x938e: "nowrap",
+    }
+ATTRS29 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS31 = {
+    0x8046: "title",
+    0x8049: "align",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x938e: "nowrap",
+    }
+ATTRS32 = {
+    0x03ea: "compact",
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS33 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x938e: "nowrap",
+    }
+ATTRS34 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS35 = {
+    0x8001: "name",
+    0x8006: "width",
+    0x8007: "height",
+    0x804a: "align",
+    0x8bbd: "palette",
+    0x8bbe: "pluginspage",
+    0x8bbf: "codebase",
+    0x8bbf: "src",
+    0x8bc1: "units",
+    0x8bc2: "type",
+    0x8bc3: "hidden",
+    }
+ATTRS36 = {
+    0x804a: "align",
+    }
+ATTRS37 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x938b: "color",
+    0x939b: "face",
+    0x939c: "size",
+    }
+ATTRS38 = {
+    0x03ea: "action",
+    0x03ec: "enctype",
+    0x03ed: "method",
+    0x03ef: "target",
+    0x03f4: "accept-charset",
+    0x8001: "name",
+    0x977c: "onsubmit",
+    0x977d: "onreset",
+    }
+ATTRS39 = {
+    0x8000: "align",
+    0x8001: "name",
+    0x8bb9: "src",
+    0x8bbb: "border",
+    0x8bbc: "frameborder",
+    0x8bbd: "framespacing",
+    0x8bbe: "marginwidth",
+    0x8bbf: "marginheight",
+    0x8bc0: "noresize",
+    0x8bc1: "scrolling",
+    0x8fa2: "bordercolor",
+    }
+ATTRS40 = {
+    0x03e9: "rows",
+    0x03ea: "cols",
+    0x03eb: "border",
+    0x03ec: "bordercolor",
+    0x03ed: "frameborder",
+    0x03ee: "framespacing",
+    0x8001: "name",
+    0x9791: "onload",
+    0x9792: "onunload",
+    0x9798: "onbeforeunload",
+    0x97b3: "onbeforeprint",
+    0x97b4: "onafterprint",
+    }
+ATTRS42 = {
+    0x8046: "title",
+    0x8049: "align",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x9399: "clear",
+    }
+ATTRS43 = {
+    0x8046: "title",
+    0x8049: "align",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x9399: "clear",
+    }
+ATTRS44 = {
+    0x8046: "title",
+    0x8049: "align",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x9399: "clear",
+    }
+ATTRS45 = {
+    0x8046: "title",
+    0x8049: "align",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x9399: "clear",
+    }
+ATTRS46 = {
+    0x8046: "title",
+    0x8049: "align",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x9399: "clear",
+    }
+ATTRS47 = {
+    0x8046: "title",
+    0x8049: "align",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x9399: "clear",
+    }
+ATTRS49 = {
+    0x03ea: "noshade",
+    0x8006: "width",
+    0x8007: "size",
+    0x8046: "title",
+    0x8049: "align",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x938b: "color",
+    }
+ATTRS51 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS52 = {
+    0x8001: "name",
+    0x8006: "width",
+    0x8007: "height",
+    0x804a: "align",
+    0x8bb9: "src",
+    0x8bbb: "border",
+    0x8bbc: "frameborder",
+    0x8bbd: "framespacing",
+    0x8bbe: "marginwidth",
+    0x8bbf: "marginheight",
+    0x8bc0: "noresize",
+    0x8bc1: "scrolling",
+    0x8fa2: "vspace",
+    0x8fa3: "hspace",
+    }
+ATTRS53 = {
+    0x03eb: "alt",
+    0x03ec: "src",
+    0x03ed: "border",
+    0x03ee: "vspace",
+    0x03ef: "hspace",
+    0x03f0: "lowsrc",
+    0x03f1: "vrml",
+    0x03f2: "dynsrc",
+    0x03f4: "loop",
+    0x03f6: "start",
+    0x07d3: "ismap",
+    0x07d9: "usemap",
+    0x8001: "name",
+    0x8006: "width",
+    0x8007: "height",
+    0x8046: "title",
+    0x804a: "align",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x978d: "onabort",
+    0x978e: "onerror",
+    0x9791: "onload",
+    }
+ATTRS54 = {
+    0x07d1: "type",
+    0x07d3: "size",
+    0x07d4: "maxlength",
+    0x07d6: "readonly",
+    0x07d8: "indeterminate",
+    0x07da: "checked",
+    0x07db: "alt",
+    0x07dc: "src",
+    0x07dd: "border",
+    0x07de: "vspace",
+    0x07df: "hspace",
+    0x07e0: "lowsrc",
+    0x07e1: "vrml",
+    0x07e2: "dynsrc",
+    0x07e4: "loop",
+    0x07e5: "start",
+    0x8001: "name",
+    0x8006: "width",
+    0x8007: "height",
+    0x804a: "align",
+    0x93ee: "value",
+    0x977b: "onselect",
+    0x978d: "onabort",
+    0x978e: "onerror",
+    0x978f: "onchange",
+    0x9791: "onload",
+    }
+ATTRS56 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS57 = {
+    0x03e9: "for",
+    }
+ATTRS58 = {
+    0x804a: "align",
+    }
+ATTRS59 = {
+    0x03ea: "value",
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x939a: "type",
+    }
+ATTRS60 = {
+    0x03ee: "href",
+    0x03ef: "rel",
+    0x03f0: "rev",
+    0x03f1: "type",
+    0x03f9: "media",
+    0x03fa: "target",
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x978e: "onerror",
+    0x9791: "onload",
+    }
+ATTRS61 = {
+    0x9399: "clear",
+    }
+ATTRS62 = {
+    0x8001: "name",
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS63 = {
+    0x1771: "scrolldelay",
+    0x1772: "direction",
+    0x1773: "behavior",
+    0x1774: "scrollamount",
+    0x1775: "loop",
+    0x1776: "vspace",
+    0x1777: "hspace",
+    0x1778: "truespeed",
+    0x8006: "width",
+    0x8007: "height",
+    0x9785: "onbounce",
+    0x978b: "onfinish",
+    0x978c: "onstart",
+    0xfe0c: "bgcolor",
+    }
+ATTRS65 = {
+    0x03ea: "http-equiv",
+    0x03eb: "content",
+    0x03ec: "url",
+    0x03f6: "charset",
+    0x8001: "name",
+    }
+ATTRS66 = {
+    0x03f5: "n",
+    }
+ATTRS71 = {
+    0x8000: "border",
+    0x8000: "usemap",
+    0x8001: "name",
+    0x8006: "width",
+    0x8007: "height",
+    0x8046: "title",
+    0x804a: "align",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x8bbb: "classid",
+    0x8bbc: "data",
+    0x8bbf: "codebase",
+    0x8bc0: "codetype",
+    0x8bc1: "code",
+    0x8bc2: "type",
+    0x8bc5: "vspace",
+    0x8bc6: "hspace",
+    0x978e: "onerror",
+    }
+ATTRS72 = {
+    0x03eb: "compact",
+    0x03ec: "start",
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x939a: "type",
+    }
+ATTRS73 = {
+    0x03ea: "selected",
+    0x03eb: "value",
+    }
+ATTRS74 = {
+    0x8046: "title",
+    0x8049: "align",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x9399: "clear",
+    }
+ATTRS75 = {
+    0x8000: "name",
+    0x8000: "value",
+    0x8000: "type",
+    }
+ATTRS76 = {
+    0x9399: "clear",
+    }
+ATTRS77 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x9399: "clear",
+    }
+ATTRS78 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS82 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS83 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS84 = {
+    0x03ea: "src",
+    0x03ed: "for",
+    0x03ee: "event",
+    0x03f0: "defer",
+    0x03f2: "type",
+    0x978e: "onerror",
+    }
+ATTRS85 = {
+    0x03eb: "size",
+    0x03ec: "multiple",
+    0x8000: "align",
+    0x8001: "name",
+    0x978f: "onchange",
+    }
+ATTRS86 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS87 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS88 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS89 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS90 = {
+    0x03eb: "type",
+    0x03ef: "media",
+    0x8046: "title",
+    0x978e: "onerror",
+    0x9791: "onload",
+    }
+ATTRS91 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS92 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS93 = {
+    0x03ea: "cols",
+    0x03eb: "border",
+    0x03ec: "rules",
+    0x03ed: "frame",
+    0x03ee: "cellspacing",
+    0x03ef: "cellpadding",
+    0x03fa: "datapagesize",
+    0x8006: "width",
+    0x8007: "height",
+    0x8046: "title",
+    0x804a: "align",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x938a: "background",
+    0x93a5: "bordercolor",
+    0x93a6: "bordercolorlight",
+    0x93a7: "bordercolordark",
+    0xfe0c: "bgcolor",
+    }
+ATTRS94 = {
+    0x8049: "align",
+    0x93a8: "valign",
+    0xfe0c: "bgcolor",
+    }
+ATTRS95 = {
+    0x8049: "align",
+    0x93a8: "valign",
+    }
+ATTRS96 = {
+    0x07d2: "rowspan",
+    0x07d3: "colspan",
+    0x8006: "width",
+    0x8007: "height",
+    0x8046: "title",
+    0x8049: "align",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x938a: "background",
+    0x938e: "nowrap",
+    0x93a5: "bordercolor",
+    0x93a6: "bordercolorlight",
+    0x93a7: "bordercolordark",
+    0x93a8: "valign",
+    0xfe0c: "bgcolor",
+    }
+ATTRS97 = {
+    0x1b5a: "rows",
+    0x1b5b: "cols",
+    0x1b5c: "wrap",
+    0x1b5d: "readonly",
+    0x8001: "name",
+    0x977b: "onselect",
+    0x978f: "onchange",
+    }
+ATTRS98 = {
+    0x8049: "align",
+    0x93a8: "valign",
+    0xfe0c: "bgcolor",
+    }
+ATTRS99 = {
+    0x07d2: "rowspan",
+    0x07d3: "colspan",
+    0x8006: "width",
+    0x8007: "height",
+    0x8046: "title",
+    0x8049: "align",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x938a: "background",
+    0x938e: "nowrap",
+    0x93a5: "bordercolor",
+    0x93a6: "bordercolorlight",
+    0x93a7: "bordercolordark",
+    0x93a8: "valign",
+    0xfe0c: "bgcolor",
+    }
+ATTRS100 = {
+    0x8049: "align",
+    0x93a8: "valign",
+    0xfe0c: "bgcolor",
+    }
+ATTRS102 = {
+    0x8007: "height",
+    0x8046: "title",
+    0x8049: "align",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x93a5: "bordercolor",
+    0x93a6: "bordercolorlight",
+    0x93a7: "bordercolordark",
+    0x93a8: "valign",
+    0xfe0c: "bgcolor",
+    }
+ATTRS103 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS104 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS105 = {
+    0x03eb: "compact",
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    0x939a: "type",
+    }
+ATTRS106 = {
+    0x8046: "title",
+    0x804b: "style",
+    0x83ea: "class",
+    0x83eb: "id",
+    }
+ATTRS108 = {
+    0x9399: "clear",
+    }
+
 TAGS_ATTRS = [
     None, 
     None,
@@ -1005,4 +1005,4 @@ TAGS_ATTRS = [
     None,
     ]
 
-MAP = (TAGS, TAGS_ATTRS, ATTRS0)
+MAP = (TAGS, ATTRS0, TAGS_ATTRS)
diff --git a/src/calibre/ebooks/lit/maps/opf.py b/src/calibre/ebooks/lit/maps/opf.py
index a39e6bf8e8..cc1acc4dfa 100644
--- a/src/calibre/ebooks/lit/maps/opf.py
+++ b/src/calibre/ebooks/lit/maps/opf.py
@@ -1,28 +1,3 @@
-ATTRS = {
-    0x0001 => "href",   
-    0x0002 => "%never-used",
-    0x0003 => "%guid",
-    0x0004 => "%minimum_level",
-    0x0005 => "%attr5",
-    0x0006 => "id",
-    0x0007 => "href",
-    0x0008 => "media-type",
-    0x0009 => "fallback",
-    0x000A => "idref",
-    0x000B => "xmlns:dc",
-    0x000C => "xmlns:oebpackage",
-    0x000D => "role",
-    0x000E => "file-as",
-    0x000F => "event",
-    0x0010 => "scheme",
-    0x0011 => "title",
-    0x0012 => "type",
-    0x0013 => "unique-identifier",
-    0x0014 => "name",
-    0x0015 => "content",
-    0x0016 => "xml:lang",
-    }
-
 TAGS = [
     None,
     "package",
@@ -69,6 +44,31 @@ TAGS = [
     None,
    ]
 
-TAGS_ATTR = [{} for i in xrange(43)]
+ATTRS = {
+    0x0001: "href",   
+    0x0002: "%never-used",
+    0x0003: "%guid",
+    0x0004: "%minimum_level",
+    0x0005: "%attr5",
+    0x0006: "id",
+    0x0007: "href",
+    0x0008: "media-type",
+    0x0009: "fallback",
+    0x000A: "idref",
+    0x000B: "xmlns:dc",
+    0x000C: "xmlns:oebpackage",
+    0x000D: "role",
+    0x000E: "file-as",
+    0x000F: "event",
+    0x0010: "scheme",
+    0x0011: "title",
+    0x0012: "type",
+    0x0013: "unique-identifier",
+    0x0014: "name",
+    0x0015: "content",
+    0x0016: "xml:lang",
+    }
 
-MAP = (TAGS, TAGS_ATTRS, ATTRS0)
+TAGS_ATTRS = [{} for i in xrange(43)]
+
+MAP = (TAGS, ATTRS, TAGS_ATTRS)
diff --git a/src/calibre/ebooks/lit/mssha1.py b/src/calibre/ebooks/lit/mssha1.py
new file mode 100644
index 0000000000..f6f7c33444
--- /dev/null
+++ b/src/calibre/ebooks/lit/mssha1.py
@@ -0,0 +1,343 @@
+#!/usr/bin/env python
+# -*- coding: iso-8859-1
+
+"""A sample implementation of SHA-1 in pure Python.
+
+   Framework adapted from Dinu Gherman's MD5 implementation by
+   J. Hallén and L. Creighton. SHA-1 implementation based directly on
+   the text of the NIST standard FIPS PUB 180-1.
+"""
+
+
+__date__    = '2004-11-17'
+__version__ = 0.91 # Modernised by J. Hallén and L. Creighton for Pypy
+
+
+import struct, copy
+
+
+# ======================================================================
+# Bit-Manipulation helpers
+#
+#   _long2bytes() was contributed by Barry Warsaw
+#   and is reused here with tiny modifications.
+# ======================================================================
+
+def _long2bytesBigEndian(n, blocksize=0):
+    """Convert a long integer to a byte string.
+
+    If optional blocksize is given and greater than zero, pad the front
+    of the byte string with binary zeros so that the length is a multiple
+    of blocksize.
+    """
+
+    # After much testing, this algorithm was deemed to be the fastest.
+    s = ''
+    pack = struct.pack
+    while n > 0:
+        s = pack('>I', n & 0xffffffffL) + s
+        n = n >> 32
+
+    # Strip off leading zeros.
+    for i in range(len(s)):
+        if s[i] != '\000':
+            break
+    else:
+        # Only happens when n == 0.
+        s = '\000'
+        i = 0
+
+    s = s[i:]
+
+    # Add back some pad bytes. This could be done more efficiently
+    # w.r.t. the de-padding being done above, but sigh...
+    if blocksize > 0 and len(s) % blocksize:
+        s = (blocksize - len(s) % blocksize) * '\000' + s
+
+    return s
+
+
+def _bytelist2longBigEndian(list):
+    "Transform a list of characters into a list of longs."
+
+    imax = len(list)/4
+    hl = [0L] * imax
+
+    j = 0
+    i = 0
+    while i < imax:
+        b0 = long(ord(list[j])) << 24
+        b1 = long(ord(list[j+1])) << 16
+        b2 = long(ord(list[j+2])) << 8
+        b3 = long(ord(list[j+3]))
+        hl[i] = b0 | b1 | b2 | b3
+        i = i+1
+        j = j+4
+
+    return hl
+
+
+def _rotateLeft(x, n):
+    "Rotate x (32 bit) left n bits circularly."
+
+    return (x << n) | (x >> (32-n))
+
+
+# ======================================================================
+# The SHA transformation functions
+#
+# ======================================================================
+
+def f0_19(B, C, D):
+    return (B & (C ^ D)) ^ D
+
+def f20_39(B, C, D):
+    return B ^ C ^ D
+
+def f40_59(B, C, D):
+    return ((B | C) & D) | (B & C)
+
+def f60_79(B, C, D):
+    return B ^ C ^ D
+
+def f6_42(B, C, D):
+    return (B + C) ^ C
+
+f = [f0_19]*20 + [f20_39]*20 + [f40_59]*20 + [f60_79]*20
+f[3] = f20_39
+f[6] = f6_42
+f[10] = f20_39
+f[15] = f20_39
+f[26] = f0_19
+f[31] = f40_59
+f[42] = f6_42
+f[51] = f20_39
+f[68] = f0_19
+
+
+# Constants to be used
+K = [
+    0x5A827999L, # ( 0 <= t <= 19)
+    0x6ED9EBA1L, # (20 <= t <= 39)
+    0x8F1BBCDCL, # (40 <= t <= 59)
+    0xCA62C1D6L  # (60 <= t <= 79)
+    ]
+
+class sha:
+    "An implementation of the MD5 hash function in pure Python."
+
+    def __init__(self):
+        "Initialisation."
+        
+        # Initial message length in bits(!).
+        self.length = 0L
+        self.count = [0, 0]
+
+        # Initial empty message as a sequence of bytes (8 bit characters).
+        self.input = []
+
+        # Call a separate init function, that can be used repeatedly
+        # to start from scratch on the same object.
+        self.init()
+
+
+    def init(self):
+        "Initialize the message-digest and set all fields to zero."
+
+        self.length = 0L
+        self.input = []
+
+        # Initial 160 bit message digest (5 times 32 bit).
+        self.H0 = 0x32107654L
+        self.H1 = 0x23016745L
+        self.H2 = 0xC4E680A2L
+        self.H3 = 0xDC679823L
+        self.H4 = 0xD0857A34L
+
+    def _transform(self, W):
+        for t in range(16, 80):
+            W.append(_rotateLeft(
+                W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16], 1) & 0xffffffffL)
+
+        A = self.H0
+        B = self.H1
+        C = self.H2
+        D = self.H3
+        E = self.H4
+
+        for t in xrange(0, 80):
+            TEMP = _rotateLeft(A, 5) + f[t](B, C, D) + E + W[t] + K[t/20]
+            E = D
+            D = C
+            C = _rotateLeft(B, 30) & 0xffffffffL
+            B = A
+            A = TEMP & 0xffffffffL
+            
+        self.H0 = (self.H0 + A) & 0xffffffffL
+        self.H1 = (self.H1 + B) & 0xffffffffL
+        self.H2 = (self.H2 + C) & 0xffffffffL
+        self.H3 = (self.H3 + D) & 0xffffffffL
+        self.H4 = (self.H4 + E) & 0xffffffffL
+    
+
+    # Down from here all methods follow the Python Standard Library
+    # API of the sha module.
+
+    def update(self, inBuf):
+        """Add to the current message.
+
+        Update the sha object with the string arg. Repeated calls
+        are equivalent to a single call with the concatenation of all
+        the arguments, i.e. s.update(a); s.update(b) is equivalent
+        to s.update(a+b).
+
+        The hash is immediately calculated for all full blocks. The final
+        calculation is made in digest(). It will calculate 1-2 blocks,
+        depending on how much padding we have to add. This allows us to
+        keep an intermediate value for the hash, so that we only need to
+        make minimal recalculation if we call update() to add more data
+        to the hashed string.
+        """
+
+        leninBuf = long(len(inBuf))
+
+        # Compute number of bytes mod 64.
+        index = (self.count[1] >> 3) & 0x3FL
+
+        # Update number of bits.
+        self.count[1] = self.count[1] + (leninBuf << 3)
+        if self.count[1] < (leninBuf << 3):
+            self.count[0] = self.count[0] + 1
+        self.count[0] = self.count[0] + (leninBuf >> 29)
+
+        partLen = 64 - index
+
+        if leninBuf >= partLen:
+            self.input[index:] = list(inBuf[:partLen])
+            self._transform(_bytelist2longBigEndian(self.input))
+            i = partLen
+            while i + 63 < leninBuf:
+                self._transform(_bytelist2longBigEndian(list(inBuf[i:i+64])))
+                i = i + 64
+            else:
+                self.input = list(inBuf[i:leninBuf])
+        else:
+            i = 0
+            self.input = self.input + list(inBuf)
+
+
+    def digest(self):
+        """Terminate the message-digest computation and return digest.
+
+        Return the digest of the strings passed to the update()
+        method so far. This is a 16-byte string which may contain
+        non-ASCII characters, including null bytes.
+        """
+
+        H0 = self.H0
+        H1 = self.H1
+        H2 = self.H2
+        H3 = self.H3
+        H4 = self.H4
+        input = [] + self.input
+        count = [] + self.count
+
+        index = (self.count[1] >> 3) & 0x3fL
+
+        if index < 56:
+            padLen = 56 - index
+        else:
+            padLen = 120 - index
+
+        padding = ['\200'] + ['\000'] * 63
+        self.update(padding[:padLen])
+
+        # Append length (before padding).
+        bits = _bytelist2longBigEndian(self.input[:56]) + count
+
+        self._transform(bits)
+
+        # Store state in digest.
+        digest = _long2bytesBigEndian(self.H0, 4) + \
+                 _long2bytesBigEndian(self.H1, 4) + \
+                 _long2bytesBigEndian(self.H2, 4) + \
+                 _long2bytesBigEndian(self.H3, 4) + \
+                 _long2bytesBigEndian(self.H4, 4)
+
+        self.H0 = H0 
+        self.H1 = H1 
+        self.H2 = H2
+        self.H3 = H3
+        self.H4 = H4
+        self.input = input 
+        self.count = count 
+
+        return digest
+
+
+    def hexdigest(self):
+        """Terminate and return digest in HEX form.
+
+        Like digest() except the digest is returned as a string of
+        length 32, containing only hexadecimal digits. This may be
+        used to exchange the value safely in email or other non-
+        binary environments.
+        """
+        return ''.join(['%02x' % ord(c) for c in self.digest()])
+
+    def copy(self):
+        """Return a clone object.
+
+        Return a copy ('clone') of the md5 object. This can be used
+        to efficiently compute the digests of strings that share
+        a common initial substring.
+        """
+
+        return copy.deepcopy(self)
+
+
+# ======================================================================
+# Mimic Python top-level functions from standard library API
+# for consistency with the md5 module of the standard library.
+# ======================================================================
+
+# These are mandatory variables in the module. They have constant values
+# in the SHA standard.
+
+digest_size = digestsize = 20
+blocksize = 1
+
+def new(arg=None):
+    """Return a new sha crypto object.
+
+    If arg is present, the method call update(arg) is made.
+    """
+
+    crypto = sha()
+    if arg:
+        crypto.update(arg)
+
+    return crypto
+
+if __name__ == '__main__':
+    def main():
+        import sys
+        file = None
+        if len(sys.argv) > 2:
+            print "usage: %s [FILE]" % sys.argv[0]
+            return
+        elif len(sys.argv) < 2:
+            file = sys.stdin
+        else:
+            file = open(sys.argv[1], 'rb')
+        context = new()
+        data = file.read(16384)
+        while data:
+            context.update(data)
+            data = file.read(16384)
+        file.close()
+        digest = context.hexdigest().upper()
+        for i in xrange(0, 40, 8):
+            print digest[i:i+8],
+        print
+    main()
diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py
index 1a0f42f8db..711aef6586 100644
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@@ -5,6 +5,7 @@ Support for reading the metadata from a lit file.
 '''
 
 import sys, struct, cStringIO, os
+import functools
 from itertools import repeat
 
 from calibre import relpath
@@ -13,6 +14,31 @@ from calibre.ebooks.metadata.opf import OPFReader
 from calibre.ebooks.lit import LitError
 from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
 
+OPF_DECL = """"<?xml version="1.0" encoding="UTF-8" ?>
+<!DOCTYPE package 
+  PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN"
+  "http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd">
+"""
+XHTML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
+<!DOCTYPE html PUBLIC
+ "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Document//EN"
+ "http://openebook.org/dtds/oeb-1.0.1/oebdoc101.dtd">
+"""
+
+class DirectoryEntry(object):
+    def __init__(self, name, section, offset, size):
+        self.name = name
+        self.section = section
+        self.offset = offset
+        self.size = size
+        
+    def __repr__(self):
+        return "<DirectoryEntry name='%s' section='%d' offset='%d' size='%d'>" \
+            % (self.name, self.section, self.offset, self.size)
+        
+    def __str__(self):
+        return repr(self)
+
 def u32(bytes):
     return struct.unpack('<L', bytes[:4])[0]
 
@@ -67,7 +93,7 @@ XML_ENTITIES   = ['&amp;', '&apos;', '&lt;', '&gt;', '&quot;']
 class UnBinary(object):
     def __init__(self, bin, manifest, map=OPF_MAP):
         self.manifest = manifest
-        self.attr_map, self.tag_map, self.tag_to_attr_map = map
+        self.tag_map, self.attr_map, self.tag_to_attr_map = map
         self.opf = map is OPF_MAP
         self.bin = bin
         self.buf = cStringIO.StringIO()
@@ -104,7 +130,7 @@ class UnBinary(object):
     def binary_to_text(self, base=0, depth=0):
         tag_name = current_map = None
         dynamic_tag = errors = 0
-        in_censorship = False
+        in_censorship = is_goingdown = False
         state = 'text'
         index =  base
         flags = 0
@@ -136,7 +162,7 @@ class UnBinary(object):
                     tag = oc
                     self.buf.write('<')
                     if not (flags & FLAG_CLOSING):
-                        is_goingdown = 1
+                        is_goingdown = True
                     if tag == 0x8000:
                         state = 'get custom length'
                         continue
@@ -167,7 +193,7 @@ class UnBinary(object):
                     else:
                         self.buf.write('>')
                         index = self.binary_to_text(base=index, depth=depth+1)
-                        is_goingdown = 0
+                        is_goingdown = False
                         if not tag_name:
                             raise LitError('Tag ends before it begins.')
                         self.buf.write('</'+tag_name+'>')
@@ -222,7 +248,7 @@ class UnBinary(object):
                     if not in_censorship:
                         self.buf.write(c)
                     count -= 1
-                elif count == 0:
+                if count == 0:
                     if not in_censorship:
                         self.buf.write('"')
                     in_censorship = False
@@ -268,7 +294,7 @@ class UnBinary(object):
                 href += c
                 count -= 1
                 if count == 0:
-                    doc, m, frag = href.partition('#')
+                    doc, m, frag = href[1:].partition('#')
                     path = self.item_path(doc)
                     if m and frag:
                         path += m + frag
@@ -297,100 +323,74 @@ class ManifestItem(object):
     def __repr__(self):
         return self.internal + u'->' + self.path 
 
+def preserve(function):
+    def wrapper(self, *args, **kwargs):
+        opos = self._stream.tell()
+        try:
+            return function(self, *args, **kwargs)
+        finally:
+            self._stream.seek(opos)
+    functools.update_wrapper(wrapper, function)
+    return wrapper
+    
 class LitFile(object):
     PIECE_SIZE = 16
 
     def magic():
+        @preserve
         def fget(self):
-            val = None
-            opos = self._stream.tell()
-            try:
-                self._stream.seek(0)
-                val = self._stream.read(8)
-            finally:
-                self._stream.seek(opos)
-            return val
+            self._stream.seek(0)
+            return self._stream.read(8)
         return property(fget=fget)
     magic = magic()
     
     def version():
         def fget(self):
-            val = None
-            opos = self._stream.tell()
-            try:
-                self._stream.seek(8)
-                val = u32(self._stream.read(4))
-            finally:
-                self._stream.seek(opos)
-            return val
+            self._stream.seek(8)
+            return u32(self._stream.read(4))
         return property(fget=fget)
     version = version()
     
     def hdr_len():
+        @preserve
         def fget(self):
-            val = None
-            opos = self._stream.tell()
-            try:
-                self._stream.seek(12)
-                val = int32(self._stream.read(4))
-            finally:
-                self._stream.seek(opos)
-            return val
+            self._stream.seek(12)
+            return int32(self._stream.read(4))
         return property(fget=fget)
     hdr_len = hdr_len()
     
     def num_pieces():
+        @preserve
         def fget(self):
-            val = None
-            opos = self._stream.tell()
-            try:
-                self._stream.seek(16)
-                val = int32(self._stream.read(4))
-            finally:
-                self._stream.seek(opos)
-            return val
+            self._stream.seek(16)
+            return int32(self._stream.read(4))
         return property(fget=fget)
     num_pieces = num_pieces()
     
     def sec_hdr_len():
+        @preserve
         def fget(self):
-            val = None
-            opos = self._stream.tell()
-            try:
-                self._stream.seek(20)
-                val = int32(self._stream.read(4))
-            finally:
-                self._stream.seek(opos)
-            return val
+            self._stream.seek(20)
+            return int32(self._stream.read(4))
         return property(fget=fget)
     sec_hdr_len = sec_hdr_len()
     
     def guid():
+        @preserve
         def fget(self):
-            val = None
-            opos = self._stream.tell()
-            try:
-                self._stream.seek(24)
-                val = self._stream.read(16)
-            finally:
-                self._stream.seek(opos)
-            return val
+            self._stream.seek(24)
+            return self._stream.read(16)
         return property(fget=fget)
     guid = guid()
     
     def header():
+        @preserve
         def fget(self):
-            val = None
-            opos = self._stream.tell()
-            try:
-                size = self.hdr_len \
-                    + (self.num_pieces * self.PIECE_SIZE) \
-                    + self.sec_hdr_len
-                self._stream.seek(0)
-                val = self._stream.read(size)
-            finally:
-                self._stream.seek(opos)
-            return val
+            size = self.hdr_len \
+                + (self.num_pieces * self.PIECE_SIZE) \
+                + self.sec_hdr_len
+            self._stream.seek(0)
+            return self._stream.read(size)
         return property(fget=fget)
     header = header()        
     
@@ -402,70 +402,64 @@ class LitFile(object):
             raise LitError('Unknown LIT version %d'%(self.version,))
         self.read_secondary_header()
         self.read_header_pieces()
-    
-    def read_secondary_header(self):
-        opos = self._stream.tell()
-        try:
-            self._stream.seek(self.hdr_len + self.num_pieces*self.PIECE_SIZE)
-            bytes = self._stream.read(self.sec_hdr_len)
-            offset = int32(bytes[4:])
-            while offset < len(bytes):
-                blocktype = bytes[offset:offset+4]
-                blockver  = u32(bytes[offset+4:])
-                if blocktype == 'CAOL':
-                    if blockver != 2:
-                        raise LitError(
-                            'Unknown CAOL block format %d' % blockver)
-                    self.creator_id     = u32(bytes[offset+12:])
-                    self.entry_chunklen = u32(bytes[offset+20:])
-                    self.count_chunklen = u32(bytes[offset+24:])
-                    self.entry_unknown  = u32(bytes[offset+28:])
-                    self.count_unknown  = u32(bytes[offset+32:])
-                    offset += 48
-                elif blocktype == 'ITSF':
-                    if blockver != 4:
-                        raise LitError(
-                            'Unknown ITSF block format %d' % blockver)
-                    if u32(bytes[offset+4+16:]):
-                        raise LitError('This file has a 64bit content offset')
-                    self.content_offset = u32(bytes[offset+16:])
-                    self.timestamp      = u32(bytes[offset+24:]) 
-                    self.language_id    = u32(bytes[offset+28:])
-                    offset += 48
-            if not hasattr(self, 'content_offset'):
-                raise LitError('Could not figure out the content offset')
-        finally:
-            self._stream.seek(opos)
 
+    @preserve
+    def read_secondary_header(self):
+        self._stream.seek(self.hdr_len + self.num_pieces*self.PIECE_SIZE)
+        bytes = self._stream.read(self.sec_hdr_len)
+        offset = int32(bytes[4:])
+        while offset < len(bytes):
+            blocktype = bytes[offset:offset+4]
+            blockver  = u32(bytes[offset+4:])
+            if blocktype == 'CAOL':
+                if blockver != 2:
+                    raise LitError(
+                        'Unknown CAOL block format %d' % blockver)
+                self.creator_id     = u32(bytes[offset+12:])
+                self.entry_chunklen = u32(bytes[offset+20:])
+                self.count_chunklen = u32(bytes[offset+24:])
+                self.entry_unknown  = u32(bytes[offset+28:])
+                self.count_unknown  = u32(bytes[offset+32:])
+                offset += 48
+            elif blocktype == 'ITSF':
+                if blockver != 4:
+                    raise LitError(
+                        'Unknown ITSF block format %d' % blockver)
+                if u32(bytes[offset+4+16:]):
+                    raise LitError('This file has a 64bit content offset')
+                self.content_offset = u32(bytes[offset+16:])
+                self.timestamp      = u32(bytes[offset+24:]) 
+                self.language_id    = u32(bytes[offset+28:])
+                offset += 48
+        if not hasattr(self, 'content_offset'):
+            raise LitError('Could not figure out the content offset')
+    
+    @preserve
     def read_header_pieces(self):
-        opos = self._stream.tell()
-        try:
-            src = self.header[self.hdr_len:]
-            for i in range(self.num_pieces):
-                piece = src[i*self.PIECE_SIZE:(i+1)*self.PIECE_SIZE]
-                if u32(piece[4:]) != 0 or u32(piece[12:]) != 0:
-                    raise LitError('Piece %s has 64bit value' % repr(piece))
-                offset, size = u32(piece), int32(piece[8:])
-                self._stream.seek(offset)
-                piece = self._stream.read(size)
-                if i == 0:
-                    continue # Dont need this piece
-                elif i == 1:
-                    if u32(piece[8:])  != self.entry_chunklen or \
-                       u32(piece[12:]) != self.entry_unknown:
-                        raise LitError('Secondary header does not match piece')
-                    self.read_directory(piece)
-                elif i == 2:
-                    if u32(piece[8:])  != self.count_chunklen or \
-                       u32(piece[12:]) != self.count_unknown:
-                        raise LitError('Secondary header does not match piece')
-                    continue # No data needed from this piece
-                elif i == 3:
-                    self.piece3_guid = piece
-                elif i == 4:
-                    self.piece4_guid = piece
-        finally:
-            self._stream.seek(opos)
+        src = self.header[self.hdr_len:]
+        for i in range(self.num_pieces):
+            piece = src[i*self.PIECE_SIZE:(i+1)*self.PIECE_SIZE]
+            if u32(piece[4:]) != 0 or u32(piece[12:]) != 0:
+                raise LitError('Piece %s has 64bit value' % repr(piece))
+            offset, size = u32(piece), int32(piece[8:])
+            self._stream.seek(offset)
+            piece = self._stream.read(size)
+            if i == 0:
+                continue # Dont need this piece
+            elif i == 1:
+                if u32(piece[8:])  != self.entry_chunklen or \
+                   u32(piece[12:]) != self.entry_unknown:
+                    raise LitError('Secondary header does not match piece')
+                self.read_directory(piece)
+            elif i == 2:
+                if u32(piece[8:])  != self.count_chunklen or \
+                   u32(piece[12:]) != self.count_unknown:
+                    raise LitError('Secondary header does not match piece')
+                continue # No data needed from this piece
+            elif i == 3:
+                self.piece3_guid = piece
+            elif i == 4:
+                self.piece4_guid = piece
                 
     def read_directory(self, piece):
         self.entries = []
@@ -521,108 +515,88 @@ class LitFile(object):
             
             if not hasattr(self, 'manifest'):
                 raise LitError('Lit file does not have a valid manifest')
-                
-    def read_section_names(self, entry):
-        opos = self._stream.tell()
-        try:
-            self._stream.seek(self.content_offset + entry.offset)
-            raw = self._stream.read(entry.size)
-            if len(raw) < 4:
-                raise LitError('Invalid Namelist section')
-            pos = 4
-            self.num_sections = u16(raw[2:pos])
-            
-            self.sections = {}
-            for section in range(self.num_sections):
-                size = u16(raw[pos:pos+2])
-                pos += 2
-                size = size*2 + 2
-                if pos + size > len(raw):
-                    raise LitError('Invalid Namelist section')
-                self.sections[section] = raw[pos:pos+size].decode('utf-16-le')
-                pos += size                
-        finally:
-            self._stream.seek(opos)
-                
-    def read_manifest(self, entry):
-        opos = self._stream.tell()
-        try:
-            self.manifest = []
-            self._stream.seek(self.content_offset + entry.offset)
-            raw = self._stream.read(entry.size)
-            pos = 0
-            while pos < len(raw):
-                size = ord(raw[pos])
-                if size == 0: break
-                pos += 1
-                root = raw[pos:pos+size].decode('utf8')
-                pos += size
-                if pos >= len(raw):
-                    raise LitError('Truncated manifest.')
-                for state in ['spine', 'not spine', 'css', 'images']:
-                    num_files = int32(raw[pos:pos+4])
-                    pos += 4
-                    if num_files == 0: continue
-                    
-                    i = 0
-                    while i < num_files:
-                        if pos+5 >= len(raw):
-                            raise LitError('Truncated manifest.')
-                        offset = u32(raw[pos:pos+4])
-                        pos += 4
-                        
-                        slen = ord(raw[pos])
-                        pos += 1
-                        internal = raw[pos:pos+slen].decode('utf8')
-                        pos += slen
-                        
-                        slen = ord(raw[pos])
-                        pos += 1
-                        original = raw[pos:pos+slen].decode('utf8')
-                        pos += slen
-                        
-                        slen = ord(raw[pos])
-                        pos += 1
-                        mime_type = raw[pos:pos+slen].decode('utf8')
-                        pos += slen + 1
-                        
-                        self.manifest.append(
-                            ManifestItem(original, internal, mime_type,
-                                         offset, root, state))
-                        i += 1
-        finally:
-            self._stream.seek(opos)        
-            
-    def read_meta(self, entry):
-        opos = self._stream.tell()
-        try:
-            self._stream.seek(self.content_offset + entry.offset)
-            raw = self._stream.read(entry.size)
 
-            xml = \
-'''\
-<?xml version="1.0" encoding="UTF-8" ?>
-<!DOCTYPE package
-  PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN"
-  "http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd">
-'''+\
-                unicode(UnBinary(raw, self.manifest))
-            self.meta = xml
-        finally:
-            self._stream.seek(opos)
-            
+    @preserve
+    def read_section_names(self, entry):
+        self._stream.seek(self.content_offset + entry.offset)
+        raw = self._stream.read(entry.size)
+        if len(raw) < 4:
+            raise LitError('Invalid Namelist section')
+        pos = 4
+        self.num_sections = u16(raw[2:pos])
+        
+        self.sections = {}
+        for section in range(self.num_sections):
+            size = u16(raw[pos:pos+2])
+            pos += 2
+            size = size*2 + 2
+            if pos + size > len(raw):
+                raise LitError('Invalid Namelist section')
+            self.sections[section] = raw[pos:pos+size].decode('utf-16-le')
+            pos += size                
+
+    @preserve
+    def read_manifest(self, entry):
+        self.manifest = []
+        self._stream.seek(self.content_offset + entry.offset)
+        raw = self._stream.read(entry.size)
+        pos = 0
+        while pos < len(raw):
+            size = ord(raw[pos])
+            if size == 0: break
+            pos += 1
+            root = raw[pos:pos+size].decode('utf8')
+            pos += size
+            if pos >= len(raw):
+                raise LitError('Truncated manifest.')
+            for state in ['spine', 'not spine', 'css', 'images']:
+                num_files = int32(raw[pos:pos+4])
+                pos += 4
+                if num_files == 0: continue
+                
+                i = 0
+                while i < num_files:
+                    if pos+5 >= len(raw):
+                        raise LitError('Truncated manifest.')
+                    offset = u32(raw[pos:pos+4])
+                    pos += 4
+                    
+                    slen = ord(raw[pos])
+                    pos += 1
+                    internal = raw[pos:pos+slen].decode('utf8')
+                    pos += slen
+                    
+                    slen = ord(raw[pos])
+                    pos += 1
+                    original = raw[pos:pos+slen].decode('utf8')
+                    pos += slen
+                    
+                    slen = ord(raw[pos])
+                    pos += 1
+                    mime_type = raw[pos:pos+slen].decode('utf8')
+                    pos += slen + 1
+                    
+                    self.manifest.append(
+                        ManifestItem(original, internal, mime_type,
+                                     offset, root, state))
+                    i += 1
+
+    @preserve
+    def read_meta(self, entry):
+        self._stream.seek(self.content_offset + entry.offset)
+        raw = self._stream.read(entry.size)
+        xml = OPF_DECL + unicode(UnBinary(raw, self.manifest))
+        self.meta = xml
+
+    @preserve
     def read_image(self, internal_name):
         cover_entry = None
         for entry in self.entries:
             if internal_name in entry.name:
                 cover_entry = entry
                 break
-        opos = self._stream.tell()
-        try:
-            self._stream.seek(self.content_offset + cover_entry.offset)
-            return self._stream.read(cover_entry.size)
-        finally:
-            self._stream.seek(opos)
+        self._stream.seek(self.content_offset + cover_entry.offset)
+        return self._stream.read(cover_entry.size)
 
 def get_metadata(stream):
     try:

From a48282500fb831dc8019a69068700a98c6d8a90d Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Wed, 16 Jul 2008 15:00:47 -0400
Subject: [PATCH 3/7] Checkpoint for changing computers

---
 src/calibre/ebooks/lit/reader.py | 65 ++++++++++++++++++++------------
 1 file changed, 41 insertions(+), 24 deletions(-)

diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py
index 711aef6586..4d149042cc 100644
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@@ -25,20 +25,6 @@ XHTML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
  "http://openebook.org/dtds/oeb-1.0.1/oebdoc101.dtd">
 """
 
-class DirectoryEntry(object):
-    def __init__(self, name, section, offset, size):
-        self.name = name
-        self.section = section
-        self.offset = offset
-        self.size = size
-        
-    def __repr__(self):
-        return "<DirectoryEntry name='%s' section='%d' offset='%d' size='%d'>" \
-            % (self.name, self.section, self.offset, self.size)
-        
-    def __str__(self):
-        return repr(self)
-
 def u32(bytes):
     return struct.unpack('<L', bytes[:4])[0]
 
@@ -302,6 +288,20 @@ class UnBinary(object):
                     state = 'get attr'
         return index
     
+class DirectoryEntry(object):
+    def __init__(self, name, section, offset, size):
+        self.name = name
+        self.section = section
+        self.offset = offset
+        self.size = size
+        
+    def __repr__(self):
+        return "DirectoryEntry(name=%s, section=%d, offset=%d, size=%d)" \
+            % (repr(self.name), self.section, self.offset, self.size)
+        
+    def __str__(self):
+        return repr(self)
+
 class ManifestItem(object):
     def __init__(self, original, internal, mime_type, offset, root, state):
         self.original = original
@@ -310,8 +310,7 @@ class ManifestItem(object):
         self.offset = offset
         self.root = root
         self.state = state
-        self.prefix = 'images' \
-            if state == 'images' else 'css' if state == 'css' else ''
+        self.prefix = state if state in ('images', 'css') else ''
         self.prefix = self.prefix + os.sep if self.prefix else ''
         self.path = self.prefix + self.original
         
@@ -321,7 +320,8 @@ class ManifestItem(object):
         return self.internal == other
     
     def __repr__(self):
-        return self.internal + u'->' + self.path 
+        return "ManifestItem(internal='%s', path='%s')" \
+            % (repr(self.internal), repr(self.path))
 
 def preserve(function):
     def wrapper(self, *args, **kwargs):
@@ -382,6 +382,7 @@ class LitFile(object):
             return self._stream.read(16)
         return property(fget=fget)
     guid = guid()
+
     
     def header():
         @preserve
@@ -403,6 +404,19 @@ class LitFile(object):
         self.read_secondary_header()
         self.read_header_pieces()
 
+    @preserve
+    def __len__(self):
+        self._stream.seek(0, 2)
+        return self._stream.tell()
+
+    @preserve
+    def _read_raw(self, offset, size):
+        self._stream.seek(offset)
+        return self._stream.read(size)
+
+    def _read_content(self, offset, size):
+        return self._read_raw(self.content_offset + offset, size)
+    
     @preserve
     def read_secondary_header(self):
         self._stream.seek(self.hdr_len + self.num_pieces*self.PIECE_SIZE)
@@ -462,7 +476,7 @@ class LitFile(object):
                 self.piece4_guid = piece
                 
     def read_directory(self, piece):
-        self.entries = []
+        self.entries = {}
         if not piece.startswith('IFCM'):
             raise LitError('Header piece #1 is not main directory.')
         chunk_size, num_chunks = int32(piece[8:12]), int32(piece[24:28])
@@ -507,7 +521,7 @@ class LitFile(object):
                     self.read_manifest(entry)
                 elif name == '/meta':
                     self.read_meta(entry)
-                self.entries.append(entry)
+                self.entries[name] = entry
                 i += 1
             
             if not hasattr(self, 'sections'):
@@ -590,14 +604,17 @@ class LitFile(object):
 
     @preserve
     def read_image(self, internal_name):
-        cover_entry = None
-        for entry in self.entries:
-            if internal_name in entry.name:
-                cover_entry = entry
-                break
+        cover_entry = self.entries[internal_name]
         self._stream.seek(self.content_offset + cover_entry.offset)
         return self._stream.read(cover_entry.size)
 
+    def get_file(self, name):
+        entry = self.entries[name]
+        if entry.section == 0:
+            return self._read_content(entry.offset, entry.size)
+        section = self.get_section(entry.section)
+        return section[entry.offset:entry.offset+entry.size]
+
 def get_metadata(stream):
     try:
         litfile = LitFile(stream)

From 9cf4508547a499d7174dfb90cabd5945ba3b356d Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Thu, 17 Jul 2008 19:33:30 -0400
Subject: [PATCH 4/7] Checkpoint for switching computers

---
 src/calibre/ebooks/lit/msdes.py  | 481 +++++++++++++++++++++++++++++++
 src/calibre/ebooks/lit/mssha1.py |   8 +-
 src/calibre/ebooks/lit/reader.py | 118 ++++++--
 3 files changed, 579 insertions(+), 28 deletions(-)
 create mode 100644 src/calibre/ebooks/lit/msdes.py

diff --git a/src/calibre/ebooks/lit/msdes.py b/src/calibre/ebooks/lit/msdes.py
new file mode 100644
index 0000000000..5bc67b09bb
--- /dev/null
+++ b/src/calibre/ebooks/lit/msdes.py
@@ -0,0 +1,481 @@
+# Re-modified for use in MS LIT decryption.  Un-reversed the bytebit[] array.
+# Substituted Microsoft's absurd modified S-boxes.  Modified the encrypt/decrypt
+# methods to handle more than one block at a time.
+#
+# And lo, all the previous notices follow:
+
+# Modified DES encryption for VNC password authentication.
+# Ported from realvnc's java viewer by <cliechti@gmx.net>
+# I chose this package name because it is not compatible with the
+# original DES algorithm, e.g. found pycrypto.
+#
+# (C) 2003 chris <cliechti@gmx.net>
+# Released as free software under the Python License.
+#
+# You're free to use it for commercial and noncommercial
+# application, modify and redistribute it as long as the
+# copyright notices are intact. There are no warranties, not
+# even that it does what it says to do ;-)
+#
+# Original notice following:
+
+# This DES class has been extracted from package Acme.Crypto for use in VNC.
+# The bytebit[] array has been reversed so that the most significant bit
+# in each byte of the key is ignored, not the least significant.  Also the
+# unnecessary odd parity code has been removed.
+#
+# These changes are:
+#  Copyright (C) 1999 AT&T Laboratories Cambridge.  All Rights Reserved.
+#
+# This software is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+
+# DesCipher - the DES encryption method
+#
+# The meat of this code is by Dave Zimmerman <dzimm@widget.com>, and is:
+#
+# Copyright (c) 1996 Widget Workshop, Inc. All Rights Reserved.
+#
+# Permission to use, copy, modify, and distribute this software
+# and its documentation for NON-COMMERCIAL or COMMERCIAL purposes and
+# without fee is hereby granted, provided that this copyright notice is kept 
+# intact. 
+# 
+# WIDGET WORKSHOP MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY
+# OF THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+# TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+# PARTICULAR PURPOSE, OR NON-INFRINGEMENT. WIDGET WORKSHOP SHALL NOT BE LIABLE
+# FOR ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
+# DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
+# 
+# THIS SOFTWARE IS NOT DESIGNED OR INTENDED FOR USE OR RESALE AS ON-LINE
+# CONTROL EQUIPMENT IN HAZARDOUS ENVIRONMENTS REQUIRING FAIL-SAFE
+# PERFORMANCE, SUCH AS IN THE OPERATION OF NUCLEAR FACILITIES, AIRCRAFT
+# NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL, DIRECT LIFE
+# SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH THE FAILURE OF THE
+# SOFTWARE COULD LEAD DIRECTLY TO DEATH, PERSONAL INJURY, OR SEVERE
+# PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH RISK ACTIVITIES").  WIDGET WORKSHOP
+# SPECIFICALLY DISCLAIMS ANY EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR
+# HIGH RISK ACTIVITIES.
+#
+#
+# The rest is:
+#
+# Copyright (C) 1996 by Jef Poskanzer <jef@acme.com>.  All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# Visit the ACME Labs Java page for up-to-date versions of this and other
+# fine Java utilities: http://www.acme.com/java/
+
+
+#/ The DES encryption method.
+# <P>
+# This is surprisingly fast, for pure Java.  On a SPARC 20, wrapped
+# in Acme.Crypto.EncryptedOutputStream or Acme.Crypto.EncryptedInputStream,
+# it does around 7000 bytes/second.
+# <P>
+# Most of this code is by Dave Zimmerman <dzimm@widget.com>, and is
+# Copyright (c) 1996 Widget Workshop, Inc.  See the source file for details.
+# <P>
+# <A HREF="/resources/classes/Acme/Crypto/DesCipher.java">Fetch the software.</A><BR>
+# <A HREF="/resources/classes/Acme.tar.Z">Fetch the entire Acme package.</A>
+# <P>
+# @see Des3Cipher
+# @see EncryptedOutputStream
+# @see EncryptedInputStream
+
+import struct
+
+class DesCipher:
+    # Constructor, byte-array key.
+    def __init__(self, key):
+        self.setKey(key)
+
+    #/ Set the key.
+    def setKey(self, key):
+        self.encryptKeys = self.deskey([ord(x) for x in key], 1)
+        self.decryptKeys = self.deskey([ord(x) for x in key], 0)
+
+    # Turn an 8-byte key into internal keys.
+    def deskey(self, keyBlock, encrypting):
+        #~ int i, j, l, m, n;
+        pc1m = [0]*56   #new int[56];
+        pcr = [0]*56    #new int[56];
+        kn = [0]*32     #new int[32];
+
+        for j in range(56):
+            l = pc1[j]
+            m = l & 07
+            pc1m[j] = ((keyBlock[l >> 3] & bytebit[m]) != 0)
+        for i in range(16):
+            if encrypting:
+                m = i << 1
+            else:
+                m = (15-i) << 1
+            n = m + 1
+            kn[m] = kn[n] = 0
+            for j in range(28):
+                l = j + totrot[i]
+                if l < 28:
+                    pcr[j] = pc1m[l]
+                else:
+                    pcr[j] = pc1m[l - 28]
+            for j in range(28, 56):
+                l = j + totrot[i]
+                if l < 56:
+                    pcr[j] = pc1m[l]
+                else:
+                    pcr[j] = pc1m[l - 28]
+            for j in range(24):
+                if pcr[pc2[j]] != 0:
+                    kn[m] |= bigbyte[j]
+                if pcr[pc2[j+24]] != 0:
+                    kn[n] |= bigbyte[j]
+        return self.cookey(kn)
+
+    def cookey(self, raw):
+        #~ int raw0, raw1;
+        #~ int rawi, KnLi;
+        #~ int i;
+        KnL = [0]*32
+
+        rawi = 0
+        KnLi = 0
+        for i in range(16):
+            raw0 = raw[rawi]
+            rawi += 1
+            raw1 = raw[rawi]
+            rawi += 1
+            KnL[KnLi]  = (raw0 & 0x00fc0000L) <<  6
+            KnL[KnLi] |= (raw0 & 0x00000fc0L) << 10
+            KnL[KnLi] |= (raw1 & 0x00fc0000L) >> 10
+            KnL[KnLi] |= (raw1 & 0x00000fc0L) >>  6
+            KnLi += 1
+            KnL[KnLi]  = (raw0 & 0x0003f000L) << 12
+            KnL[KnLi] |= (raw0 & 0x0000003fL) << 16
+            KnL[KnLi] |= (raw1 & 0x0003f000L) >>  4
+            KnL[KnLi] |= (raw1 & 0x0000003fL)
+            KnLi += 1
+        return KnL
+
+    # Block encryption routines.
+    
+    #/ Encrypt a block of eight bytes.
+    def encrypt(self, clearText):
+        if len(clearText) % 8 != 0:
+            raise TypeError, "length must be multiple of block size"
+        result = []
+        while clearText:
+            result.append(struct.pack(
+                ">LL", *self.des(struct.unpack(">LL", clearText[:8]),
+                                 self.encryptKeys)))
+            clearText = clearText[8:]
+        return ''.join(result)
+
+    #/ Decrypt a block of eight bytes.
+    def decrypt(self, cipherText):
+        if len(cipherText) % 8 != 0:
+            raise TypeError, "length must be multiple of block size"
+        result = []
+        while cipherText:
+            result.append(struct.pack(
+                ">LL", *self.des(struct.unpack(">LL", cipherText[:8]),
+                                 self.decryptKeys)))
+            cipherText = cipherText[8:]
+        return ''.join(result)
+
+    # The DES function.
+    def des(self, (leftt, right), keys):
+        #~ int fval, work, right, leftt;
+        #~ int round
+        keysi = 0
+
+        work   = ((leftt >>  4) ^ right) & 0x0f0f0f0fL
+        right ^= work
+        leftt ^= (work << 4) & 0xffffffffL
+
+        work   = ((leftt >> 16) ^ right) & 0x0000ffffL
+        right ^= work
+        leftt ^= (work << 16) & 0xffffffffL
+
+        work   = ((right >>  2) ^ leftt) & 0x33333333L
+        leftt ^= work
+        right ^= (work << 2) & 0xffffffffL
+
+        work   = ((right >>  8) ^ leftt) & 0x00ff00ffL
+        leftt ^= work
+        right ^= (work << 8) & 0xffffffffL
+        right  = ((right << 1) | ((right >> 31) & 1)) & 0xffffffffL
+
+        work   = (leftt ^ right) & 0xaaaaaaaaL
+        leftt ^= work
+        right ^= work
+        leftt  = ((leftt << 1) | ((leftt >> 31) & 1)) & 0xffffffffL
+
+        for round in range(8):
+            work   = ((right << 28) | (right >> 4)) & 0xffffffffL
+            work  ^= keys[keysi]
+            keysi += 1
+            fval   = SP7[ work        & 0x0000003fL ]
+            fval  |= SP5[(work >>  8) & 0x0000003fL ]
+            fval  |= SP3[(work >> 16) & 0x0000003fL ]
+            fval  |= SP1[(work >> 24) & 0x0000003fL ]
+            work   = right ^ keys[keysi]
+            keysi += 1
+            fval  |= SP8[ work        & 0x0000003fL ]
+            fval  |= SP6[(work >>  8) & 0x0000003fL ]
+            fval  |= SP4[(work >> 16) & 0x0000003fL ]
+            fval  |= SP2[(work >> 24) & 0x0000003fL ]
+            leftt ^= fval
+            work   = ((leftt << 28) | (leftt >> 4)) & 0xffffffffL
+            work  ^= keys[keysi]
+            keysi += 1
+            fval   = SP7[ work        & 0x0000003fL ]
+            fval  |= SP5[(work >>  8) & 0x0000003fL ]
+            fval  |= SP3[(work >> 16) & 0x0000003fL ]
+            fval  |= SP1[(work >> 24) & 0x0000003fL ]
+            work   = leftt ^ keys[keysi]
+            keysi += 1
+            fval  |= SP8[ work        & 0x0000003fL ]
+            fval  |= SP6[(work >>  8) & 0x0000003fL ]
+            fval  |= SP4[(work >> 16) & 0x0000003fL ]
+            fval  |= SP2[(work >> 24) & 0x0000003fL ]
+            right ^= fval
+
+        right  = ((right << 31) | (right >> 1)) & 0xffffffffL
+        work   = (leftt ^ right) & 0xaaaaaaaaL
+        leftt ^= work
+        right ^= work
+        leftt  = ((leftt << 31) | (leftt >> 1)) & 0xffffffffL
+        work   = ((leftt >>  8) ^ right) & 0x00ff00ffL
+        right ^= work
+        leftt ^= (work << 8) & 0xffffffffL
+        work   = ((leftt >>  2) ^ right) & 0x33333333L
+        right ^= work
+        leftt ^= (work << 2) & 0xffffffffL
+        work   = ((right >> 16) ^ leftt) & 0x0000ffffL
+        leftt ^= work
+        right ^= (work << 16) & 0xffffffffL
+        work   = ((right >>  4) ^ leftt) & 0x0f0f0f0fL
+        leftt ^= work
+        right ^= (work << 4) & 0xffffffffL
+        return right, leftt
+
+# Tables, permutations, S-boxes, etc.
+
+bytebit = [0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01]
+
+bigbyte = [
+    0x800000, 0x400000, 0x200000, 0x100000,
+    0x080000, 0x040000, 0x020000, 0x010000,
+    0x008000, 0x004000, 0x002000, 0x001000,
+    0x000800, 0x000400, 0x000200, 0x000100,
+    0x000080, 0x000040, 0x000020, 0x000010,
+    0x000008, 0x000004, 0x000002, 0x000001
+]
+
+pc1 = [
+    56, 48, 40, 32, 24, 16,  8,
+     0, 57, 49, 41, 33, 25, 17,
+     9,  1, 58, 50, 42, 34, 26,
+    18, 10,  2, 59, 51, 43, 35,
+    62, 54, 46, 38, 30, 22, 14,
+     6, 61, 53, 45, 37, 29, 21,
+    13,  5, 60, 52, 44, 36, 28,
+    20, 12,  4, 27, 19, 11, 3
+]
+
+totrot = [
+    1, 2, 4, 6, 8, 10, 12, 14, 15, 17, 19, 21, 23, 25, 27, 28
+]
+
+pc2 = [
+    13, 16, 10, 23,  0,  4,
+    2, 27, 14,  5, 20,  9,
+    22, 18, 11, 3 , 25,  7,
+    15,  6, 26, 19, 12,  1,
+    40, 51, 30, 36, 46, 54,
+    29, 39, 50, 44, 32, 47,
+    43, 48, 38, 55, 33, 52,
+    45, 41, 49, 35, 28, 31,
+]
+
+SP1 = [
+0x02080800L, 0x00080000L, 0x02000002L, 0x02080802L,
+0x02000000L, 0x00080802L, 0x00080002L, 0x02000002L,
+0x00080802L, 0x02080800L, 0x02080000L, 0x00000802L,
+0x02000802L, 0x02000000L, 0x00000000L, 0x00080002L,
+0x00080000L, 0x00000002L, 0x02000800L, 0x00080800L,
+0x02080802L, 0x02080000L, 0x00000802L, 0x02000800L,
+0x00000002L, 0x00000800L, 0x00080800L, 0x02080002L,
+0x00000800L, 0x02000802L, 0x02080002L, 0x00000000L,
+0x00000000L, 0x02080802L, 0x02000800L, 0x00080002L,
+0x02080800L, 0x00080000L, 0x00000802L, 0x02000800L,
+0x02080002L, 0x00000800L, 0x00080800L, 0x02000002L,
+0x00080802L, 0x00000002L, 0x02000002L, 0x02080000L,
+0x02080802L, 0x00080800L, 0x02080000L, 0x02000802L,
+0x02000000L, 0x00000802L, 0x00080002L, 0x00000000L,
+0x00080000L, 0x02000000L, 0x02000802L, 0x02080800L,
+0x00000002L, 0x02080002L, 0x00000800L, 0x00080802L
+]
+SP2 = [
+0x40108010L, 0x00000000L, 0x00108000L, 0x40100000L,
+0x40000010L, 0x00008010L, 0x40008000L, 0x00108000L,
+0x00008000L, 0x40100010L, 0x00000010L, 0x40008000L,
+0x00100010L, 0x40108000L, 0x40100000L, 0x00000010L,
+0x00100000L, 0x40008010L, 0x40100010L, 0x00008000L,
+0x00108010L, 0x40000000L, 0x00000000L, 0x00100010L,
+0x40008010L, 0x00108010L, 0x40108000L, 0x40000010L,
+0x40000000L, 0x00100000L, 0x00008010L, 0x40108010L,
+0x00100010L, 0x40108000L, 0x40008000L, 0x00108010L,
+0x40108010L, 0x00100010L, 0x40000010L, 0x00000000L,
+0x40000000L, 0x00008010L, 0x00100000L, 0x40100010L,
+0x00008000L, 0x40000000L, 0x00108010L, 0x40008010L,
+0x40108000L, 0x00008000L, 0x00000000L, 0x40000010L,
+0x00000010L, 0x40108010L, 0x00108000L, 0x40100000L,
+0x40100010L, 0x00100000L, 0x00008010L, 0x40008000L,
+0x40008010L, 0x00000010L, 0x40100000L, 0x00108000L
+]
+SP3 = [
+0x04000001L, 0x04040100L, 0x00000100L, 0x04000101L,
+0x00040001L, 0x04000000L, 0x04000101L, 0x00040100L,
+0x04000100L, 0x00040000L, 0x04040000L, 0x00000001L,
+0x04040101L, 0x00000101L, 0x00000001L, 0x04040001L,
+0x00000000L, 0x00040001L, 0x04040100L, 0x00000100L,
+0x00000101L, 0x04040101L, 0x00040000L, 0x04000001L,
+0x04040001L, 0x04000100L, 0x00040101L, 0x04040000L,
+0x00040100L, 0x00000000L, 0x04000000L, 0x00040101L,
+0x04040100L, 0x00000100L, 0x00000001L, 0x00040000L,
+0x00000101L, 0x00040001L, 0x04040000L, 0x04000101L,
+0x00000000L, 0x04040100L, 0x00040100L, 0x04040001L,
+0x00040001L, 0x04000000L, 0x04040101L, 0x00000001L,
+0x00040101L, 0x04000001L, 0x04000000L, 0x04040101L,
+0x00040000L, 0x04000100L, 0x04000101L, 0x00040100L,
+0x04000100L, 0x00000000L, 0x04040001L, 0x00000101L,
+0x04000001L, 0x00040101L, 0x00000100L, 0x04040000L
+]
+SP4 = [
+0x00401008L, 0x10001000L, 0x00000008L, 0x10401008L,
+0x00000000L, 0x10400000L, 0x10001008L, 0x00400008L,
+0x10401000L, 0x10000008L, 0x10000000L, 0x00001008L,
+0x10000008L, 0x00401008L, 0x00400000L, 0x10000000L,
+0x10400008L, 0x00401000L, 0x00001000L, 0x00000008L,
+0x00401000L, 0x10001008L, 0x10400000L, 0x00001000L,
+0x00001008L, 0x00000000L, 0x00400008L, 0x10401000L,
+0x10001000L, 0x10400008L, 0x10401008L, 0x00400000L,
+0x10400008L, 0x00001008L, 0x00400000L, 0x10000008L,
+0x00401000L, 0x10001000L, 0x00000008L, 0x10400000L,
+0x10001008L, 0x00000000L, 0x00001000L, 0x00400008L,
+0x00000000L, 0x10400008L, 0x10401000L, 0x00001000L,
+0x10000000L, 0x10401008L, 0x00401008L, 0x00400000L,
+0x10401008L, 0x00000008L, 0x10001000L, 0x00401008L,
+0x00400008L, 0x00401000L, 0x10400000L, 0x10001008L,
+0x00001008L, 0x10000000L, 0x10000008L, 0x10401000L
+]
+SP5 = [
+0x08000000L, 0x00010000L, 0x00000400L, 0x08010420L,
+0x08010020L, 0x08000400L, 0x00010420L, 0x08010000L,
+0x00010000L, 0x00000020L, 0x08000020L, 0x00010400L,
+0x08000420L, 0x08010020L, 0x08010400L, 0x00000000L,
+0x00010400L, 0x08000000L, 0x00010020L, 0x00000420L,
+0x08000400L, 0x00010420L, 0x00000000L, 0x08000020L,
+0x00000020L, 0x08000420L, 0x08010420L, 0x00010020L,
+0x08010000L, 0x00000400L, 0x00000420L, 0x08010400L,
+0x08010400L, 0x08000420L, 0x00010020L, 0x08010000L,
+0x00010000L, 0x00000020L, 0x08000020L, 0x08000400L,
+0x08000000L, 0x00010400L, 0x08010420L, 0x00000000L,
+0x00010420L, 0x08000000L, 0x00000400L, 0x00010020L,
+0x08000420L, 0x00000400L, 0x00000000L, 0x08010420L,
+0x08010020L, 0x08010400L, 0x00000420L, 0x00010000L,
+0x00010400L, 0x08010020L, 0x08000400L, 0x00000420L,
+0x00000020L, 0x00010420L, 0x08010000L, 0x08000020L
+]
+SP6 = [
+0x80000040L, 0x00200040L, 0x00000000L, 0x80202000L,
+0x00200040L, 0x00002000L, 0x80002040L, 0x00200000L,
+0x00002040L, 0x80202040L, 0x00202000L, 0x80000000L,
+0x80002000L, 0x80000040L, 0x80200000L, 0x00202040L,
+0x00200000L, 0x80002040L, 0x80200040L, 0x00000000L,
+0x00002000L, 0x00000040L, 0x80202000L, 0x80200040L,
+0x80202040L, 0x80200000L, 0x80000000L, 0x00002040L,
+0x00000040L, 0x00202000L, 0x00202040L, 0x80002000L,
+0x00002040L, 0x80000000L, 0x80002000L, 0x00202040L,
+0x80202000L, 0x00200040L, 0x00000000L, 0x80002000L,
+0x80000000L, 0x00002000L, 0x80200040L, 0x00200000L,
+0x00200040L, 0x80202040L, 0x00202000L, 0x00000040L,
+0x80202040L, 0x00202000L, 0x00200000L, 0x80002040L,
+0x80000040L, 0x80200000L, 0x00202040L, 0x00000000L,
+0x00002000L, 0x80000040L, 0x80002040L, 0x80202000L,
+0x80200000L, 0x00002040L, 0x00000040L, 0x80200040L,
+]
+SP7 = [
+0x00004000L, 0x00000200L, 0x01000200L, 0x01000004L,
+0x01004204L, 0x00004004L, 0x00004200L, 0x00000000L,
+0x01000000L, 0x01000204L, 0x00000204L, 0x01004000L,
+0x00000004L, 0x01004200L, 0x01004000L, 0x00000204L,
+0x01000204L, 0x00004000L, 0x00004004L, 0x01004204L,
+0x00000000L, 0x01000200L, 0x01000004L, 0x00004200L,
+0x01004004L, 0x00004204L, 0x01004200L, 0x00000004L,
+0x00004204L, 0x01004004L, 0x00000200L, 0x01000000L,
+0x00004204L, 0x01004000L, 0x01004004L, 0x00000204L,
+0x00004000L, 0x00000200L, 0x01000000L, 0x01004004L,
+0x01000204L, 0x00004204L, 0x00004200L, 0x00000000L,
+0x00000200L, 0x01000004L, 0x00000004L, 0x01000200L,
+0x00000000L, 0x01000204L, 0x01000200L, 0x00004200L,
+0x00000204L, 0x00004000L, 0x01004204L, 0x01000000L,
+0x01004200L, 0x00000004L, 0x00004004L, 0x01004204L,
+0x01000004L, 0x01004200L, 0x01004000L, 0x00004004L,
+]
+SP8 = [
+0x20800080L, 0x20820000L, 0x00020080L, 0x00000000L,
+0x20020000L, 0x00800080L, 0x20800000L, 0x20820080L,
+0x00000080L, 0x20000000L, 0x00820000L, 0x00020080L,
+0x00820080L, 0x20020080L, 0x20000080L, 0x20800000L,
+0x00020000L, 0x00820080L, 0x00800080L, 0x20020000L,
+0x20820080L, 0x20000080L, 0x00000000L, 0x00820000L,
+0x20000000L, 0x00800000L, 0x20020080L, 0x20800080L,
+0x00800000L, 0x00020000L, 0x20820000L, 0x00000080L,
+0x00800000L, 0x00020000L, 0x20000080L, 0x20820080L,
+0x00020080L, 0x20000000L, 0x00000000L, 0x00820000L,
+0x20800080L, 0x20020080L, 0x20020000L, 0x00800080L,
+0x20820000L, 0x00000080L, 0x00800080L, 0x20020000L,
+0x20820080L, 0x00800000L, 0x20800000L, 0x20000080L,
+0x00820000L, 0x00020080L, 0x20020080L, 0x20800000L,
+0x00000080L, 0x20820000L, 0x00820080L, 0x00000000L,
+0x20000000L, 0x20800080L, 0x00020000L, 0x00820080L,
+]
+
+def new(key):
+    return DesCipher(key)
+
+block_size = 8
+key_size = 8
+
+#test only:
+if __name__ == '__main__':
+    des = DesCipher("\x01\x23\x45\x67\x89\xab\xcd\xef")
+    print ''.join(
+        "%02x" % ord(x) for x in des.encrypt("Now is t"))
+    
diff --git a/src/calibre/ebooks/lit/mssha1.py b/src/calibre/ebooks/lit/mssha1.py
index f6f7c33444..d61bd39094 100644
--- a/src/calibre/ebooks/lit/mssha1.py
+++ b/src/calibre/ebooks/lit/mssha1.py
@@ -123,7 +123,7 @@ K = [
     0xCA62C1D6L  # (60 <= t <= 79)
     ]
 
-class sha:
+class mssha1(object):
     "An implementation of the MD5 hash function in pure Python."
 
     def __init__(self):
@@ -186,7 +186,7 @@ class sha:
     def update(self, inBuf):
         """Add to the current message.
 
-        Update the sha object with the string arg. Repeated calls
+        Update the mssha1 object with the string arg. Repeated calls
         are equivalent to a single call with the concatenation of all
         the arguments, i.e. s.update(a); s.update(b) is equivalent
         to s.update(a+b).
@@ -308,12 +308,12 @@ digest_size = digestsize = 20
 blocksize = 1
 
 def new(arg=None):
-    """Return a new sha crypto object.
+    """Return a new mssha1 crypto object.
 
     If arg is present, the method call update(arg) is made.
     """
 
-    crypto = sha()
+    crypto = mssha1()
     if arg:
         crypto.update(arg)
 
diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py
index 4d149042cc..2608d63399 100644
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@@ -13,6 +13,8 @@ from calibre.ebooks.metadata import MetaInformation
 from calibre.ebooks.metadata.opf import OPFReader
 from calibre.ebooks.lit import LitError
 from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
+import calibre.ebooks.lit.mssha1 as mssha1
+import calibre.ebooks.lit.msdes as msdes
 
 OPF_DECL = """"<?xml version="1.0" encoding="UTF-8" ?>
 <!DOCTYPE package 
@@ -25,6 +27,9 @@ XHTML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
  "http://openebook.org/dtds/oeb-1.0.1/oebdoc101.dtd">
 """
 
+DESENCRYPT_GUID = "{67F6E4A2-60BF-11D3-8540-00C04F58C3CF}"
+LZXCOMPRESS_GUID = "{0A9007C6-4076-11D3-8789-0000F8105754}"
+
 def u32(bytes):
     return struct.unpack('<L', bytes[:4])[0]
 
@@ -45,6 +50,10 @@ def encint(bytes, remaining):
         if b & 0x80 == 0: break
     return val, bytes[pos:], remaining 
 
+def msguid(bytes):
+    values = struct.unpack("<LHHBBBBBBBB", bytes[:16])
+    return "{%08lX-%04X-%04X-%02X%02X-%02X%02X%02X%02X%02X%02X}" % values
+
 def read_utf8_char(bytes, pos):
     c = ord(bytes[pos])
     mask = 0x80
@@ -491,13 +500,11 @@ class LitFile(object):
             remaining = chunk_size - int32(piece[p+4:p+8]) - 48
             if remaining < 0:
                 raise LitError('AOLL remaining count is negative')
-            
             entries = u16(piece[p+chunk_size-2:])
             if entries <= 0:            
                 # Hopefully everything will work even without a correct entries
                 # count
                 entries = (2 ** 16) - 1
-            
             piece = piece[p+48:]
             i = 0
             while i < entries:
@@ -523,37 +530,33 @@ class LitFile(object):
                     self.read_meta(entry)
                 self.entries[name] = entry
                 i += 1
-            
-            if not hasattr(self, 'sections'):
+            if not hasattr(self, 'section_names'):
                 raise LitError('Lit file does not have a valid NameList')
-            
             if not hasattr(self, 'manifest'):
                 raise LitError('Lit file does not have a valid manifest')
+            self.read_drm()
 
-    @preserve
     def read_section_names(self, entry):
-        self._stream.seek(self.content_offset + entry.offset)
-        raw = self._stream.read(entry.size)
+        raw = self._read_content(entry.offset, entry.size)
         if len(raw) < 4:
             raise LitError('Invalid Namelist section')
         pos = 4
         self.num_sections = u16(raw[2:pos])
-        
-        self.sections = {}
+        self.section_names = [""]*self.num_sections
+        self.section_data = [None]*self.num_sections
         for section in range(self.num_sections):
             size = u16(raw[pos:pos+2])
             pos += 2
             size = size*2 + 2
             if pos + size > len(raw):
                 raise LitError('Invalid Namelist section')
-            self.sections[section] = raw[pos:pos+size].decode('utf-16-le')
-            pos += size                
+            self.section_names[section] = \
+                raw[pos:pos+size].decode('utf-16-le').rstrip('\000')
+            pos += size
 
-    @preserve
     def read_manifest(self, entry):
         self.manifest = []
-        self._stream.seek(self.content_offset + entry.offset)
-        raw = self._stream.read(entry.size)
+        raw = self._read_content(entry.offset, entry.size)
         pos = 0
         while pos < len(raw):
             size = ord(raw[pos])
@@ -595,19 +598,52 @@ class LitFile(object):
                                      offset, root, state))
                     i += 1
 
-    @preserve
     def read_meta(self, entry):
-        self._stream.seek(self.content_offset + entry.offset)
-        raw = self._stream.read(entry.size)
+        raw = self._read_content(entry.offset, entry.size)
         xml = OPF_DECL + unicode(UnBinary(raw, self.manifest))
         self.meta = xml
 
-    @preserve
-    def read_image(self, internal_name):
-        cover_entry = self.entries[internal_name]
-        self._stream.seek(self.content_offset + cover_entry.offset)
-        return self._stream.read(cover_entry.size)
+    def read_drm(self):
+        def exists_file(name):
+            try: self.get_file(name)
+            except KeyError: return False
+            return True
+        self.drmlevel = 0
+        if exists_file('/DRMStorage/Licenses/EUL'):
+            self.drmlevel = 5
+        elif exists_file('/DRMStorage/DRMBookplate'):
+            self.drmlevel = 3
+        elif exists_file('/DRMStorage/DRMSealed'):
+            self.drmlevel = 1
+        else:
+            return
+        des = msdes.new(self.calculate_deskey())
+        bookkey = des.decrypt(self.get_file('/DRMStorage/DRMSealed'))
+        if bookkey[0] != '\000':
+            raise LitError('Unable to decrypt title key!')
+        self.bookkey = bookkey[1:9]
 
+    def calculate_deskey(self):
+        hashfiles = ['/meta', '/DRMStorage/DRMSource']
+        if self.drmlevel == 3:
+            hashfiles.append('/DRMStorage/DRMBookplate')
+        prepad = 2
+        hash = mssha1.new()
+        for name in hashfiles:
+            data = self.get_file(name)
+            if prepad > 0:
+                data = ("\000" * prepad) + data
+                prepad = 0
+            postpad = 64 - (len(data) % 64)
+            if postpad < 64:
+                data = data + ("\000" * postpad)
+            hash.update(data)
+        digest = hash.digest()
+        key = [0] * 8
+        for i in xrange(0, len(digest)):
+            key[i % 8] ^= ord(digest[i])
+        return ''.join(chr(x) for x in key)
+        
     def get_file(self, name):
         entry = self.entries[name]
         if entry.section == 0:
@@ -615,6 +651,40 @@ class LitFile(object):
         section = self.get_section(entry.section)
         return section[entry.offset:entry.offset+entry.size]
 
+    def get_section(self, section):
+        data = self.section_data[section]
+        if not data:
+            data = self._get_section(section)
+            self.section_data[section] = data
+        return data
+
+    def _get_section(self, section):
+        name = self.section_names[section]
+        path = '::DataSpace/Storage/' + name
+        transform = self.get_file(path + '/Transform/List')
+        content = self.get_file(path + '/Content')
+        control = self.get_file(path + '/ControlData')
+        idx_transform = idx_control = 0
+        while (len(transform) - idx_transform) >= 16:
+            ndwords = int32(control[idx_control:]) + 1
+            if (idx_control + (ndwords * 4)) > len(control) or ndwords <= 0:
+                raise LitError("ControlData is too short")
+            guid = msguid(transform[idx_transform:])
+            if guid == DESENCRYPT_GUID:
+                content = self._decrypt(content)
+                idx_control += ndwords * 4
+            elif guid == LZXCOMPRESS_GUID:
+                raise LitError("LZX decompression not implemented")
+            else:
+                raise LitError("Unrecognized transform: %s." % repr(guid))
+            idx_transform += 16
+        return content
+
+    def _decrypt(self, content):
+        if self.drmlevel == 5:
+            raise LitError('Cannot extract content from a DRM protected ebook')
+        return msdes.new(self.bookkey).decrypt(content)
+
 def get_metadata(stream):
     try:
         litfile = LitFile(stream)
@@ -632,7 +702,7 @@ def get_metadata(stream):
                 ext = 'jpg'
             else:
                 ext = ext.lower()
-            cd = litfile.read_image(cover_item)
+            cd = litfile.get_file(cover_item)
             mi.cover_data = (ext, cd) if cd else (None, None)            
     except:
         title = stream.name if hasattr(stream, 'name') and stream.name else 'Unknown'

From 4eeae13b3508d743fcb2f007fe3b352b87c9acc5 Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Thu, 17 Jul 2008 23:14:59 -0400
Subject: [PATCH 5/7] Checkpoint before sleep

---
 src/calibre/ebooks/lit/lzxd.py | 138 +++++++++++++++++++++++++++++++++
 1 file changed, 138 insertions(+)
 create mode 100644 src/calibre/ebooks/lit/lzxd.py

diff --git a/src/calibre/ebooks/lit/lzxd.py b/src/calibre/ebooks/lit/lzxd.py
new file mode 100644
index 0000000000..a09daf012b
--- /dev/null
+++ b/src/calibre/ebooks/lit/lzxd.py
@@ -0,0 +1,138 @@
+import copy
+
+# some constants defined by the LZX specification
+MIN_MATCH = 2
+MAX_MATCH = 257
+NUM_CHARS = 256
+BLOCKTYPE_INVALID = 0  # also blocktypes 4-7 invalid
+BLOCKTYPE_VERBATIM = 1
+BLOCKTYPE_ALIGNED = 2
+BLOCKTYPE_UNCOMPRESSED = 3
+PRETREE_NUM_ELEMENTS = 20
+ALIGNED_NUM_ELEMENTS = 8  # aligned offset tree #elements
+NUM_PRIMARY_LENGTHS = 7  # this one missing from spec!
+NUM_SECONDARY_LENGTHS = 249  # length tree #elements
+
+# LZX huffman defines: tweak tablebits as desired
+PRETREE_MAXSYMBOLS = LZX_PRETREE_NUM_ELEMENTS
+PRETREE_TABLEBITS = 6
+MAINTREE_MAXSYMBOLS = LZX_NUM_CHARS + 50*8
+MAINTREE_TABLEBITS = 12
+LENGTH_MAXSYMBOLS = LZX_NUM_SECONDARY_LENGTHS+1
+LENGTH_TABLEBITS = 12
+ALIGNED_MAXSYMBOLS = LZX_ALIGNED_NUM_ELEMENTS
+ALIGNED_TABLEBITS = 7
+LENTABLE_SAFETY = 64  # table decoding overruns are allowed
+
+FRAME_SIZE = 32768  # the size of a frame in LZX
+
+
+class BitReader(object):
+    def __init__(self, data):
+        self.data, self.pos, self.nbits = \
+            data + "\x00\x00\x00\x00", 0, len(data) * 8
+        
+    def peek(self, n):
+        r, g = 0, 0
+        while g < n:
+            r = (r << 8) | ord(self.data[(self.pos + g) >> 3])
+            g = g + 8 - ((self.pos + g) & 7)
+        return (r >> (g - n)) & ((1 << n) - 1)
+    
+    def remove(self, n):
+        self.pos += n
+        return self.pos <= self.nbits
+    
+    def left(self):
+        return self.nbits - self.pos
+
+    def read(self, n):
+        val = self.peek(n)
+        self.remove(n)
+        return val
+
+class LzxError(Exception):
+    pass
+
+POSITION_BASE = [0]*51
+EXTRA_BITS = [0]*51
+
+def _static_init():
+    j = 0
+    for i in xrange(0, 51, 2):
+        EXTRA_BITS[i] = j
+        EXTRA_BITS[i + 1] = j
+        if i != 0 or j < 17): j += 1
+    j = 0
+    for i in xrange(0, 51, 1):
+        POSITION_BASE[i] = j
+        j += 1 << extra_bits[i]
+_static_init()
+
+class LzxDecompressor(object):
+    def __init__(self, window_bits, reset_interval=0x7fff):
+        # LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb)
+        if window_bits < 15 or window_bits > 21:
+            raise LzxError("Invalid window size")
+        
+        self.window_size = 1 << window_bits
+        self.window_posn = 0
+        self.frame_posn = 0
+        self.frame = 0
+        self.reset_interval = reset_interval
+        self.intel_filesize = 0
+        self.intel_curpos = 0
+        
+        # window bits:    15  16  17  18  19  20  21
+        # position slots: 30  32  34  36  38  42  50 
+        self.posn_solts = 50 if window_bits == 21 \
+            else 42 if window_bits == 20 else window_bits << 1
+        self.intel_started = 0
+        self.input_end = 0
+
+        # huffman code lengths
+        self.PRETREE_len = [0] * (PRETREE_MAXSYMBOLS + LENTABLE_SAFETY)
+        self.MAINTREE_len = [0] * (MAINTREE_MAXSYMBOLS + LENTABLE_SAFETY)
+        self.LENGTH_len = [0] * (LENGTH_MAXSYMBOLS + LENTABLE_SAFETY)
+        self.ALIGNED_len = [0] * (ALIGNED_MAXSYMBOLS + LENTABLE_SAFETY)
+
+        # huffman decoding tables
+        self.PRETREE_table = \
+            [0] * ((1 << PRETREE_TABLEBITS) + (PRETREE_MAXSYMBOLS * 2))
+        self.MAINTREE_table = \
+            [0] * ((1 << MAINTREE_TABLEBITS) + (MAINTREE_MAXSYMBOLS * 2))
+        self.LENGTH_table = \
+            [0] * ((1 << LENGTH_TABLEBITS) + (LENGTH_MAXSYMBOLS * 2))
+        self.ALIGNED_table = \
+            [0] * ((1 << ALIGNED_TABLEBITS) + (ALIGNED_MAXSYMBOLS * 2))
+
+        self.o_buf = self.i_buf = ''
+        
+        self._reset_state()
+
+    def _reset_state(self):
+        self.R0 = 1
+        self.R1 = 1
+        self.R2 = 1
+        self.header_read = 0
+        self.block_remaining = 0
+        self.block_type = BLOCKTYPE_INVALID
+
+        # initialise tables to 0 (because deltas will be applied to them)
+        for i in xrange(MAINTREE_MAXSYMBOLS): self.MAINTREE_len[i] = 0
+        for i in xrange(LENGTH_MAXSYMBOLS): self.LENGTH_len[i] = 0
+
+    def decompress(self, data, out_bytes):
+        return ''.join(self._decompress(data, out_bytes))
+        
+    def _decompress(self, data, out_bytes):
+        # easy answers
+        if out_bytes < 0:
+            raise LzxError('Negative desired output bytes')
+
+        # Initialize input and output
+        input = BitReader(data)
+        output = []
+        
+        
+        

From 11c6b0a44d6c819634594eb538d3d4feff7632fe Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Fri, 18 Jul 2008 00:15:13 -0400
Subject: [PATCH 6/7] Fixed trailing space issue

---
 src/calibre/ebooks/lrf/html/convert_from.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/lrf/html/convert_from.py b/src/calibre/ebooks/lrf/html/convert_from.py
index 15eede6d6c..17ffd05ee2 100644
--- a/src/calibre/ebooks/lrf/html/convert_from.py
+++ b/src/calibre/ebooks/lrf/html/convert_from.py
@@ -222,6 +222,7 @@ class HTMLConverter(object, LoggingInterface):
         self.memory = []        #: Used to ensure that duplicate CSS unhandled erros are not reported
         self.tops = {}          #: element representing the top of each HTML file in the LRF file
         self.previous_text = '' #: Used to figure out when to lstrip
+        self.stripped_space = ''
         self.preserve_block_style = False #: Used so that <p> tags in <blockquote> elements are handled properly
         self.avoid_page_break = False
         self.current_page = book.create_page()
@@ -864,11 +865,15 @@ class HTMLConverter(object, LoggingInterface):
         
         if collapse_whitespace:
             src = re.sub(r'\s{1,}', ' ', src)
+            if self.stripped_space and len(src) == len(src.lstrip(u' \n\r\t')):
+                src = self.stripped_space + src
+            src, orig = src.rstrip(u' \n\r\t'), src
+            self.stripped_space = orig[len(src):]
             if len(self.previous_text) != len(self.previous_text.rstrip(u' \n\r\t')):
                 src = src.lstrip(u' \n\r\t')
             if len(src):
                 self.previous_text = src
-                append_text(src)    
+                append_text(src)
         else:
             srcs = src.split('\n')
             for src in srcs[:-1]:

From bc6f3ab5de22ca0fdb70369e54c081f01b78e2fa Mon Sep 17 00:00:00 2001
From: "Marshall T. Vandegrift" <llasram@gmail.com>
Date: Fri, 18 Jul 2008 00:20:01 -0400
Subject: [PATCH 7/7] Reverted incorrect branch change

---
 src/calibre/ebooks/lrf/html/convert_from.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/src/calibre/ebooks/lrf/html/convert_from.py b/src/calibre/ebooks/lrf/html/convert_from.py
index 17ffd05ee2..15eede6d6c 100644
--- a/src/calibre/ebooks/lrf/html/convert_from.py
+++ b/src/calibre/ebooks/lrf/html/convert_from.py
@@ -222,7 +222,6 @@ class HTMLConverter(object, LoggingInterface):
         self.memory = []        #: Used to ensure that duplicate CSS unhandled erros are not reported
         self.tops = {}          #: element representing the top of each HTML file in the LRF file
         self.previous_text = '' #: Used to figure out when to lstrip
-        self.stripped_space = ''
         self.preserve_block_style = False #: Used so that <p> tags in <blockquote> elements are handled properly
         self.avoid_page_break = False
         self.current_page = book.create_page()
@@ -865,15 +864,11 @@ class HTMLConverter(object, LoggingInterface):
         
         if collapse_whitespace:
             src = re.sub(r'\s{1,}', ' ', src)
-            if self.stripped_space and len(src) == len(src.lstrip(u' \n\r\t')):
-                src = self.stripped_space + src
-            src, orig = src.rstrip(u' \n\r\t'), src
-            self.stripped_space = orig[len(src):]
             if len(self.previous_text) != len(self.previous_text.rstrip(u' \n\r\t')):
                 src = src.lstrip(u' \n\r\t')
             if len(src):
                 self.previous_text = src
-                append_text(src)
+                append_text(src)    
         else:
             srcs = src.split('\n')
             for src in srcs[:-1]: