From da29a58363f770f38f01e02e3cb4221331666c0a Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Fri, 11 Jul 2008 14:37:27 -0400 Subject: [PATCH 01/44] Integrated own cleanup patch --- src/calibre/ebooks/mobi/reader.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index dea87dbd8c..05093f3c1a 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -13,7 +13,7 @@ except ImportError: import Image as PILImage from calibre import __appname__ -from calibre.ebooks.BeautifulSoup import BeautifulSoup +from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag from calibre.ebooks.mobi import MobiError from calibre.ebooks.mobi.huffcdic import HuffReader from calibre.ebooks.mobi.palmdoc import decompress_doc @@ -165,13 +165,14 @@ class MobiReader(object): self.processed_html = self.processed_html.decode(self.book_header.codec, 'ignore') self.extract_images(processed_records, output_dir) self.replace_page_breaks() - self.cleanup() + self.cleanup_html() self.processed_html = re.compile('', re.IGNORECASE).sub( '\n\n', self.processed_html) soup = BeautifulSoup(self.processed_html.replace('> <', '>\n<')) + self.cleanup_soup(soup) guide = soup.find('guide') for elem in soup.findAll(['metadata', 'guide']): elem.extract() @@ -192,10 +193,29 @@ class MobiReader(object): if ncx: open(os.path.splitext(htmlfile)[0]+'.ncx', 'wb').write(ncx) - def cleanup(self): + def cleanup_html(self): self.processed_html = re.sub(r'
', '', self.processed_html) - self.processed_html = re.sub(r'<([^>]*) height="([^"]*)"', r'<\1 style="margin-top: \2"', self.processed_html) - self.processed_html = re.sub(r'<([^>]*) width="([^"]*)"', r'<\1 style="text-indent: \2"', self.processed_html) + + def cleanup_soup(self, soup): + for tag in soup.recursiveChildGenerator(): + if not isinstance(tag, Tag): continue + styles = [] + try: + styles.append(tag['style']) + except KeyError: + pass + try: + styles.append('margin-top: %s' % tag['height']) + del tag['height'] + except KeyError: + pass + try: + styles.append('text-indent: %s' % tag['width']) + del tag['width'] + except KeyError: + pass + if styles: + tag['style'] = '; '.join(styles) def create_opf(self, htmlfile, guide=None): mi = self.book_header.exth.mi From 615d5ea2795563f8af9dc34c2c2c03c84c9c9714 Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Wed, 16 Jul 2008 10:00:49 -0400 Subject: [PATCH 02/44] Checkpoint state to move to office --- src/calibre/ebooks/lit/maps/__init__.py | 7 +- src/calibre/ebooks/lit/maps/html.py | 1568 +++++++++++------------ src/calibre/ebooks/lit/maps/opf.py | 54 +- src/calibre/ebooks/lit/mssha1.py | 343 +++++ src/calibre/ebooks/lit/reader.py | 418 +++--- 5 files changed, 1352 insertions(+), 1038 deletions(-) create mode 100644 src/calibre/ebooks/lit/mssha1.py diff --git a/src/calibre/ebooks/lit/maps/__init__.py b/src/calibre/ebooks/lit/maps/__init__.py index eb99464d9b..2abab3efe9 100644 --- a/src/calibre/ebooks/lit/maps/__init__.py +++ b/src/calibre/ebooks/lit/maps/__init__.py @@ -1,5 +1,2 @@ -import calibre.ebooks.maps.opf as opf -import calibre.ebooks.maps.html as html - -OPF_MAP = opf.MAP -HTML_MAP = html.MAP +from calibre.ebooks.lit.maps.opf import MAP as OPF_MAP +from calibre.ebooks.lit.maps.html import MAP as HTML_MAP diff --git a/src/calibre/ebooks/lit/maps/html.py b/src/calibre/ebooks/lit/maps/html.py index 095b0bcc3e..de0286c764 100644 --- a/src/calibre/ebooks/lit/maps/html.py +++ b/src/calibre/ebooks/lit/maps/html.py @@ -1,786 +1,3 @@ -ATTRS0 = { - 0x8010 => "tabindex", - 0x8046 => "title", - 0x804b => "style", - 0x804d => "disabled", - 0x83ea => "class", - 0x83eb => "id", - 0x83fe => "datafld", - 0x83ff => "datasrc", - 0x8400 => "dataformatas", - 0x87d6 => "accesskey", - 0x9392 => "lang", - 0x93ed => "language", - 0x93fe => "dir", - 0x9771 => "onmouseover", - 0x9772 => "onmouseout", - 0x9773 => "onmousedown", - 0x9774 => "onmouseup", - 0x9775 => "onmousemove", - 0x9776 => "onkeydown", - 0x9777 => "onkeyup", - 0x9778 => "onkeypress", - 0x9779 => "onclick", - 0x977a => "ondblclick", - 0x977e => "onhelp", - 0x977f => "onfocus", - 0x9780 => "onblur", - 0x9783 => "onrowexit", - 0x9784 => "onrowenter", - 0x9786 => "onbeforeupdate", - 0x9787 => "onafterupdate", - 0x978a => "onreadystatechange", - 0x9790 => "onscroll", - 0x9794 => "ondragstart", - 0x9795 => "onresize", - 0x9796 => "onselectstart", - 0x9797 => "onerrorupdate", - 0x9799 => "ondatasetchanged", - 0x979a => "ondataavailable", - 0x979b => "ondatasetcomplete", - 0x979c => "onfilterchange", - 0x979f => "onlosecapture", - 0x97a0 => "onpropertychange", - 0x97a2 => "ondrag", - 0x97a3 => "ondragend", - 0x97a4 => "ondragenter", - 0x97a5 => "ondragover", - 0x97a6 => "ondragleave", - 0x97a7 => "ondrop", - 0x97a8 => "oncut", - 0x97a9 => "oncopy", - 0x97aa => "onpaste", - 0x97ab => "onbeforecut", - 0x97ac => "onbeforecopy", - 0x97ad => "onbeforepaste", - 0x97af => "onrowsdelete", - 0x97b0 => "onrowsinserted", - 0x97b1 => "oncellchange", - 0x97b2 => "oncontextmenu", - 0x97b6 => "onbeforeeditfocus", - } -ATTRS3 = { - 0x0001 => "href", - 0x03ec => "target", - 0x03ee => "rel", - 0x03ef => "rev", - 0x03f0 => "urn", - 0x03f1 => "methods", - 0x8001 => "name", - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS5 = { - 0x9399 => "clear", - } -ATTRS6 = { - 0x8001 => "name", - 0x8006 => "width", - 0x8007 => "height", - 0x804a => "align", - 0x8bbb => "classid", - 0x8bbc => "data", - 0x8bbf => "codebase", - 0x8bc0 => "codetype", - 0x8bc1 => "code", - 0x8bc2 => "type", - 0x8bc5 => "vspace", - 0x8bc6 => "hspace", - 0x978e => "onerror", - } -ATTRS7 = { - 0x0001 => "href", - 0x03ea => "shape", - 0x03eb => "coords", - 0x03ed => "target", - 0x03ee => "alt", - 0x03ef => "nohref", - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS8 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS9 = { - 0x03ec => "href", - 0x03ed => "target", - } -ATTRS10 = { - 0x938b => "color", - 0x939b => "face", - 0x93a3 => "size", - } -ATTRS12 = { - 0x03ea => "src", - 0x03eb => "loop", - 0x03ec => "volume", - 0x03ed => "balance", - } -ATTRS13 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS15 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x9399 => "clear", - } -ATTRS16 = { - 0x07db => "link", - 0x07dc => "alink", - 0x07dd => "vlink", - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x938a => "background", - 0x938b => "text", - 0x938e => "nowrap", - 0x93ae => "topmargin", - 0x93af => "rightmargin", - 0x93b0 => "bottommargin", - 0x93b1 => "leftmargin", - 0x93b6 => "bgproperties", - 0x93d8 => "scroll", - 0x977b => "onselect", - 0x9791 => "onload", - 0x9792 => "onunload", - 0x9798 => "onbeforeunload", - 0x97b3 => "onbeforeprint", - 0x97b4 => "onafterprint", - 0xfe0c => "bgcolor", - } -ATTRS17 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x9399 => "clear", - } -ATTRS18 = { - 0x07d1 => "type", - 0x8001 => "name", - } -ATTRS19 = { - 0x8046 => "title", - 0x8049 => "align", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x93a8 => "valign", - } -ATTRS20 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x9399 => "clear", - } -ATTRS21 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS22 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS23 = { - 0x03ea => "span", - 0x8006 => "width", - 0x8049 => "align", - 0x93a8 => "valign", - 0xfe0c => "bgcolor", - } -ATTRS24 = { - 0x03ea => "span", - 0x8006 => "width", - 0x8049 => "align", - 0x93a8 => "valign", - 0xfe0c => "bgcolor", - } -ATTRS27 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x938e => "nowrap", - } -ATTRS29 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS31 = { - 0x8046 => "title", - 0x8049 => "align", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x938e => "nowrap", - } -ATTRS32 = { - 0x03ea => "compact", - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS33 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x938e => "nowrap", - } -ATTRS34 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS35 = { - 0x8001 => "name", - 0x8006 => "width", - 0x8007 => "height", - 0x804a => "align", - 0x8bbd => "palette", - 0x8bbe => "pluginspage", - 0x8bbf => "codebase", - 0x8bbf => "src", - 0x8bc1 => "units", - 0x8bc2 => "type", - 0x8bc3 => "hidden", - } -ATTRS36 = { - 0x804a => "align", - } -ATTRS37 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x938b => "color", - 0x939b => "face", - 0x939c => "size", - } -ATTRS38 = { - 0x03ea => "action", - 0x03ec => "enctype", - 0x03ed => "method", - 0x03ef => "target", - 0x03f4 => "accept-charset", - 0x8001 => "name", - 0x977c => "onsubmit", - 0x977d => "onreset", - } -ATTRS39 = { - 0x8000 => "align", - 0x8001 => "name", - 0x8bb9 => "src", - 0x8bbb => "border", - 0x8bbc => "frameborder", - 0x8bbd => "framespacing", - 0x8bbe => "marginwidth", - 0x8bbf => "marginheight", - 0x8bc0 => "noresize", - 0x8bc1 => "scrolling", - 0x8fa2 => "bordercolor", - } -ATTRS40 = { - 0x03e9 => "rows", - 0x03ea => "cols", - 0x03eb => "border", - 0x03ec => "bordercolor", - 0x03ed => "frameborder", - 0x03ee => "framespacing", - 0x8001 => "name", - 0x9791 => "onload", - 0x9792 => "onunload", - 0x9798 => "onbeforeunload", - 0x97b3 => "onbeforeprint", - 0x97b4 => "onafterprint", - } -ATTRS42 = { - 0x8046 => "title", - 0x8049 => "align", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x9399 => "clear", - } -ATTRS43 = { - 0x8046 => "title", - 0x8049 => "align", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x9399 => "clear", - } -ATTRS44 = { - 0x8046 => "title", - 0x8049 => "align", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x9399 => "clear", - } -ATTRS45 = { - 0x8046 => "title", - 0x8049 => "align", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x9399 => "clear", - } -ATTRS46 = { - 0x8046 => "title", - 0x8049 => "align", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x9399 => "clear", - } -ATTRS47 = { - 0x8046 => "title", - 0x8049 => "align", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x9399 => "clear", - } -ATTRS49 = { - 0x03ea => "noshade", - 0x8006 => "width", - 0x8007 => "size", - 0x8046 => "title", - 0x8049 => "align", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x938b => "color", - } -ATTRS51 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS52 = { - 0x8001 => "name", - 0x8006 => "width", - 0x8007 => "height", - 0x804a => "align", - 0x8bb9 => "src", - 0x8bbb => "border", - 0x8bbc => "frameborder", - 0x8bbd => "framespacing", - 0x8bbe => "marginwidth", - 0x8bbf => "marginheight", - 0x8bc0 => "noresize", - 0x8bc1 => "scrolling", - 0x8fa2 => "vspace", - 0x8fa3 => "hspace", - } -ATTRS53 = { - 0x03eb => "alt", - 0x03ec => "src", - 0x03ed => "border", - 0x03ee => "vspace", - 0x03ef => "hspace", - 0x03f0 => "lowsrc", - 0x03f1 => "vrml", - 0x03f2 => "dynsrc", - 0x03f4 => "loop", - 0x03f6 => "start", - 0x07d3 => "ismap", - 0x07d9 => "usemap", - 0x8001 => "name", - 0x8006 => "width", - 0x8007 => "height", - 0x8046 => "title", - 0x804a => "align", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x978d => "onabort", - 0x978e => "onerror", - 0x9791 => "onload", - } -ATTRS54 = { - 0x07d1 => "type", - 0x07d3 => "size", - 0x07d4 => "maxlength", - 0x07d6 => "readonly", - 0x07d8 => "indeterminate", - 0x07da => "checked", - 0x07db => "alt", - 0x07dc => "src", - 0x07dd => "border", - 0x07de => "vspace", - 0x07df => "hspace", - 0x07e0 => "lowsrc", - 0x07e1 => "vrml", - 0x07e2 => "dynsrc", - 0x07e4 => "loop", - 0x07e5 => "start", - 0x8001 => "name", - 0x8006 => "width", - 0x8007 => "height", - 0x804a => "align", - 0x93ee => "value", - 0x977b => "onselect", - 0x978d => "onabort", - 0x978e => "onerror", - 0x978f => "onchange", - 0x9791 => "onload", - } -ATTRS56 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS57 = { - 0x03e9 => "for", - } -ATTRS58 = { - 0x804a => "align", - } -ATTRS59 = { - 0x03ea => "value", - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x939a => "type", - } -ATTRS60 = { - 0x03ee => "href", - 0x03ef => "rel", - 0x03f0 => "rev", - 0x03f1 => "type", - 0x03f9 => "media", - 0x03fa => "target", - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x978e => "onerror", - 0x9791 => "onload", - } -ATTRS61 = { - 0x9399 => "clear", - } -ATTRS62 = { - 0x8001 => "name", - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS63 = { - 0x1771 => "scrolldelay", - 0x1772 => "direction", - 0x1773 => "behavior", - 0x1774 => "scrollamount", - 0x1775 => "loop", - 0x1776 => "vspace", - 0x1777 => "hspace", - 0x1778 => "truespeed", - 0x8006 => "width", - 0x8007 => "height", - 0x9785 => "onbounce", - 0x978b => "onfinish", - 0x978c => "onstart", - 0xfe0c => "bgcolor", - } -ATTRS65 = { - 0x03ea => "http-equiv", - 0x03eb => "content", - 0x03ec => "url", - 0x03f6 => "charset", - 0x8001 => "name", - } -ATTRS66 = { - 0x03f5 => "n", - } -ATTRS71 = { - 0x8000 => "border", - 0x8000 => "usemap", - 0x8001 => "name", - 0x8006 => "width", - 0x8007 => "height", - 0x8046 => "title", - 0x804a => "align", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x8bbb => "classid", - 0x8bbc => "data", - 0x8bbf => "codebase", - 0x8bc0 => "codetype", - 0x8bc1 => "code", - 0x8bc2 => "type", - 0x8bc5 => "vspace", - 0x8bc6 => "hspace", - 0x978e => "onerror", - } -ATTRS72 = { - 0x03eb => "compact", - 0x03ec => "start", - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x939a => "type", - } -ATTRS73 = { - 0x03ea => "selected", - 0x03eb => "value", - } -ATTRS74 = { - 0x8046 => "title", - 0x8049 => "align", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x9399 => "clear", - } -ATTRS75 = { - 0x8000 => "name", - 0x8000 => "value", - 0x8000 => "type", - } -ATTRS76 = { - 0x9399 => "clear", - } -ATTRS77 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x9399 => "clear", - } -ATTRS78 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS82 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS83 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS84 = { - 0x03ea => "src", - 0x03ed => "for", - 0x03ee => "event", - 0x03f0 => "defer", - 0x03f2 => "type", - 0x978e => "onerror", - } -ATTRS85 = { - 0x03eb => "size", - 0x03ec => "multiple", - 0x8000 => "align", - 0x8001 => "name", - 0x978f => "onchange", - } -ATTRS86 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS87 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS88 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS89 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS90 = { - 0x03eb => "type", - 0x03ef => "media", - 0x8046 => "title", - 0x978e => "onerror", - 0x9791 => "onload", - } -ATTRS91 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS92 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS93 = { - 0x03ea => "cols", - 0x03eb => "border", - 0x03ec => "rules", - 0x03ed => "frame", - 0x03ee => "cellspacing", - 0x03ef => "cellpadding", - 0x03fa => "datapagesize", - 0x8006 => "width", - 0x8007 => "height", - 0x8046 => "title", - 0x804a => "align", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x938a => "background", - 0x93a5 => "bordercolor", - 0x93a6 => "bordercolorlight", - 0x93a7 => "bordercolordark", - 0xfe0c => "bgcolor", - } -ATTRS94 = { - 0x8049 => "align", - 0x93a8 => "valign", - 0xfe0c => "bgcolor", - } -ATTRS95 = { - 0x8049 => "align", - 0x93a8 => "valign", - } -ATTRS96 = { - 0x07d2 => "rowspan", - 0x07d3 => "colspan", - 0x8006 => "width", - 0x8007 => "height", - 0x8046 => "title", - 0x8049 => "align", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x938a => "background", - 0x938e => "nowrap", - 0x93a5 => "bordercolor", - 0x93a6 => "bordercolorlight", - 0x93a7 => "bordercolordark", - 0x93a8 => "valign", - 0xfe0c => "bgcolor", - } -ATTRS97 = { - 0x1b5a => "rows", - 0x1b5b => "cols", - 0x1b5c => "wrap", - 0x1b5d => "readonly", - 0x8001 => "name", - 0x977b => "onselect", - 0x978f => "onchange", - } -ATTRS98 = { - 0x8049 => "align", - 0x93a8 => "valign", - 0xfe0c => "bgcolor", - } -ATTRS99 = { - 0x07d2 => "rowspan", - 0x07d3 => "colspan", - 0x8006 => "width", - 0x8007 => "height", - 0x8046 => "title", - 0x8049 => "align", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x938a => "background", - 0x938e => "nowrap", - 0x93a5 => "bordercolor", - 0x93a6 => "bordercolorlight", - 0x93a7 => "bordercolordark", - 0x93a8 => "valign", - 0xfe0c => "bgcolor", - } -ATTRS100 = { - 0x8049 => "align", - 0x93a8 => "valign", - 0xfe0c => "bgcolor", - } -ATTRS102 = { - 0x8007 => "height", - 0x8046 => "title", - 0x8049 => "align", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x93a5 => "bordercolor", - 0x93a6 => "bordercolorlight", - 0x93a7 => "bordercolordark", - 0x93a8 => "valign", - 0xfe0c => "bgcolor", - } -ATTRS103 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS104 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS105 = { - 0x03eb => "compact", - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x939a => "type", - } -ATTRS106 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS108 = { - 0x9399 => "clear", - } - TAGS = [ None, None, @@ -893,6 +110,789 @@ TAGS = [ None, ] +ATTRS0 = { + 0x8010: "tabindex", + 0x8046: "title", + 0x804b: "style", + 0x804d: "disabled", + 0x83ea: "class", + 0x83eb: "id", + 0x83fe: "datafld", + 0x83ff: "datasrc", + 0x8400: "dataformatas", + 0x87d6: "accesskey", + 0x9392: "lang", + 0x93ed: "language", + 0x93fe: "dir", + 0x9771: "onmouseover", + 0x9772: "onmouseout", + 0x9773: "onmousedown", + 0x9774: "onmouseup", + 0x9775: "onmousemove", + 0x9776: "onkeydown", + 0x9777: "onkeyup", + 0x9778: "onkeypress", + 0x9779: "onclick", + 0x977a: "ondblclick", + 0x977e: "onhelp", + 0x977f: "onfocus", + 0x9780: "onblur", + 0x9783: "onrowexit", + 0x9784: "onrowenter", + 0x9786: "onbeforeupdate", + 0x9787: "onafterupdate", + 0x978a: "onreadystatechange", + 0x9790: "onscroll", + 0x9794: "ondragstart", + 0x9795: "onresize", + 0x9796: "onselectstart", + 0x9797: "onerrorupdate", + 0x9799: "ondatasetchanged", + 0x979a: "ondataavailable", + 0x979b: "ondatasetcomplete", + 0x979c: "onfilterchange", + 0x979f: "onlosecapture", + 0x97a0: "onpropertychange", + 0x97a2: "ondrag", + 0x97a3: "ondragend", + 0x97a4: "ondragenter", + 0x97a5: "ondragover", + 0x97a6: "ondragleave", + 0x97a7: "ondrop", + 0x97a8: "oncut", + 0x97a9: "oncopy", + 0x97aa: "onpaste", + 0x97ab: "onbeforecut", + 0x97ac: "onbeforecopy", + 0x97ad: "onbeforepaste", + 0x97af: "onrowsdelete", + 0x97b0: "onrowsinserted", + 0x97b1: "oncellchange", + 0x97b2: "oncontextmenu", + 0x97b6: "onbeforeeditfocus", + } +ATTRS3 = { + 0x0001: "href", + 0x03ec: "target", + 0x03ee: "rel", + 0x03ef: "rev", + 0x03f0: "urn", + 0x03f1: "methods", + 0x8001: "name", + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS5 = { + 0x9399: "clear", + } +ATTRS6 = { + 0x8001: "name", + 0x8006: "width", + 0x8007: "height", + 0x804a: "align", + 0x8bbb: "classid", + 0x8bbc: "data", + 0x8bbf: "codebase", + 0x8bc0: "codetype", + 0x8bc1: "code", + 0x8bc2: "type", + 0x8bc5: "vspace", + 0x8bc6: "hspace", + 0x978e: "onerror", + } +ATTRS7 = { + 0x0001: "href", + 0x03ea: "shape", + 0x03eb: "coords", + 0x03ed: "target", + 0x03ee: "alt", + 0x03ef: "nohref", + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS8 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS9 = { + 0x03ec: "href", + 0x03ed: "target", + } +ATTRS10 = { + 0x938b: "color", + 0x939b: "face", + 0x93a3: "size", + } +ATTRS12 = { + 0x03ea: "src", + 0x03eb: "loop", + 0x03ec: "volume", + 0x03ed: "balance", + } +ATTRS13 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS15 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x9399: "clear", + } +ATTRS16 = { + 0x07db: "link", + 0x07dc: "alink", + 0x07dd: "vlink", + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x938a: "background", + 0x938b: "text", + 0x938e: "nowrap", + 0x93ae: "topmargin", + 0x93af: "rightmargin", + 0x93b0: "bottommargin", + 0x93b1: "leftmargin", + 0x93b6: "bgproperties", + 0x93d8: "scroll", + 0x977b: "onselect", + 0x9791: "onload", + 0x9792: "onunload", + 0x9798: "onbeforeunload", + 0x97b3: "onbeforeprint", + 0x97b4: "onafterprint", + 0xfe0c: "bgcolor", + } +ATTRS17 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x9399: "clear", + } +ATTRS18 = { + 0x07d1: "type", + 0x8001: "name", + } +ATTRS19 = { + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x93a8: "valign", + } +ATTRS20 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x9399: "clear", + } +ATTRS21 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS22 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS23 = { + 0x03ea: "span", + 0x8006: "width", + 0x8049: "align", + 0x93a8: "valign", + 0xfe0c: "bgcolor", + } +ATTRS24 = { + 0x03ea: "span", + 0x8006: "width", + 0x8049: "align", + 0x93a8: "valign", + 0xfe0c: "bgcolor", + } +ATTRS27 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x938e: "nowrap", + } +ATTRS29 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS31 = { + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x938e: "nowrap", + } +ATTRS32 = { + 0x03ea: "compact", + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS33 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x938e: "nowrap", + } +ATTRS34 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS35 = { + 0x8001: "name", + 0x8006: "width", + 0x8007: "height", + 0x804a: "align", + 0x8bbd: "palette", + 0x8bbe: "pluginspage", + 0x8bbf: "codebase", + 0x8bbf: "src", + 0x8bc1: "units", + 0x8bc2: "type", + 0x8bc3: "hidden", + } +ATTRS36 = { + 0x804a: "align", + } +ATTRS37 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x938b: "color", + 0x939b: "face", + 0x939c: "size", + } +ATTRS38 = { + 0x03ea: "action", + 0x03ec: "enctype", + 0x03ed: "method", + 0x03ef: "target", + 0x03f4: "accept-charset", + 0x8001: "name", + 0x977c: "onsubmit", + 0x977d: "onreset", + } +ATTRS39 = { + 0x8000: "align", + 0x8001: "name", + 0x8bb9: "src", + 0x8bbb: "border", + 0x8bbc: "frameborder", + 0x8bbd: "framespacing", + 0x8bbe: "marginwidth", + 0x8bbf: "marginheight", + 0x8bc0: "noresize", + 0x8bc1: "scrolling", + 0x8fa2: "bordercolor", + } +ATTRS40 = { + 0x03e9: "rows", + 0x03ea: "cols", + 0x03eb: "border", + 0x03ec: "bordercolor", + 0x03ed: "frameborder", + 0x03ee: "framespacing", + 0x8001: "name", + 0x9791: "onload", + 0x9792: "onunload", + 0x9798: "onbeforeunload", + 0x97b3: "onbeforeprint", + 0x97b4: "onafterprint", + } +ATTRS42 = { + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x9399: "clear", + } +ATTRS43 = { + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x9399: "clear", + } +ATTRS44 = { + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x9399: "clear", + } +ATTRS45 = { + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x9399: "clear", + } +ATTRS46 = { + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x9399: "clear", + } +ATTRS47 = { + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x9399: "clear", + } +ATTRS49 = { + 0x03ea: "noshade", + 0x8006: "width", + 0x8007: "size", + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x938b: "color", + } +ATTRS51 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS52 = { + 0x8001: "name", + 0x8006: "width", + 0x8007: "height", + 0x804a: "align", + 0x8bb9: "src", + 0x8bbb: "border", + 0x8bbc: "frameborder", + 0x8bbd: "framespacing", + 0x8bbe: "marginwidth", + 0x8bbf: "marginheight", + 0x8bc0: "noresize", + 0x8bc1: "scrolling", + 0x8fa2: "vspace", + 0x8fa3: "hspace", + } +ATTRS53 = { + 0x03eb: "alt", + 0x03ec: "src", + 0x03ed: "border", + 0x03ee: "vspace", + 0x03ef: "hspace", + 0x03f0: "lowsrc", + 0x03f1: "vrml", + 0x03f2: "dynsrc", + 0x03f4: "loop", + 0x03f6: "start", + 0x07d3: "ismap", + 0x07d9: "usemap", + 0x8001: "name", + 0x8006: "width", + 0x8007: "height", + 0x8046: "title", + 0x804a: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x978d: "onabort", + 0x978e: "onerror", + 0x9791: "onload", + } +ATTRS54 = { + 0x07d1: "type", + 0x07d3: "size", + 0x07d4: "maxlength", + 0x07d6: "readonly", + 0x07d8: "indeterminate", + 0x07da: "checked", + 0x07db: "alt", + 0x07dc: "src", + 0x07dd: "border", + 0x07de: "vspace", + 0x07df: "hspace", + 0x07e0: "lowsrc", + 0x07e1: "vrml", + 0x07e2: "dynsrc", + 0x07e4: "loop", + 0x07e5: "start", + 0x8001: "name", + 0x8006: "width", + 0x8007: "height", + 0x804a: "align", + 0x93ee: "value", + 0x977b: "onselect", + 0x978d: "onabort", + 0x978e: "onerror", + 0x978f: "onchange", + 0x9791: "onload", + } +ATTRS56 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS57 = { + 0x03e9: "for", + } +ATTRS58 = { + 0x804a: "align", + } +ATTRS59 = { + 0x03ea: "value", + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x939a: "type", + } +ATTRS60 = { + 0x03ee: "href", + 0x03ef: "rel", + 0x03f0: "rev", + 0x03f1: "type", + 0x03f9: "media", + 0x03fa: "target", + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x978e: "onerror", + 0x9791: "onload", + } +ATTRS61 = { + 0x9399: "clear", + } +ATTRS62 = { + 0x8001: "name", + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS63 = { + 0x1771: "scrolldelay", + 0x1772: "direction", + 0x1773: "behavior", + 0x1774: "scrollamount", + 0x1775: "loop", + 0x1776: "vspace", + 0x1777: "hspace", + 0x1778: "truespeed", + 0x8006: "width", + 0x8007: "height", + 0x9785: "onbounce", + 0x978b: "onfinish", + 0x978c: "onstart", + 0xfe0c: "bgcolor", + } +ATTRS65 = { + 0x03ea: "http-equiv", + 0x03eb: "content", + 0x03ec: "url", + 0x03f6: "charset", + 0x8001: "name", + } +ATTRS66 = { + 0x03f5: "n", + } +ATTRS71 = { + 0x8000: "border", + 0x8000: "usemap", + 0x8001: "name", + 0x8006: "width", + 0x8007: "height", + 0x8046: "title", + 0x804a: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x8bbb: "classid", + 0x8bbc: "data", + 0x8bbf: "codebase", + 0x8bc0: "codetype", + 0x8bc1: "code", + 0x8bc2: "type", + 0x8bc5: "vspace", + 0x8bc6: "hspace", + 0x978e: "onerror", + } +ATTRS72 = { + 0x03eb: "compact", + 0x03ec: "start", + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x939a: "type", + } +ATTRS73 = { + 0x03ea: "selected", + 0x03eb: "value", + } +ATTRS74 = { + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x9399: "clear", + } +ATTRS75 = { + 0x8000: "name", + 0x8000: "value", + 0x8000: "type", + } +ATTRS76 = { + 0x9399: "clear", + } +ATTRS77 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x9399: "clear", + } +ATTRS78 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS82 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS83 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS84 = { + 0x03ea: "src", + 0x03ed: "for", + 0x03ee: "event", + 0x03f0: "defer", + 0x03f2: "type", + 0x978e: "onerror", + } +ATTRS85 = { + 0x03eb: "size", + 0x03ec: "multiple", + 0x8000: "align", + 0x8001: "name", + 0x978f: "onchange", + } +ATTRS86 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS87 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS88 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS89 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS90 = { + 0x03eb: "type", + 0x03ef: "media", + 0x8046: "title", + 0x978e: "onerror", + 0x9791: "onload", + } +ATTRS91 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS92 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS93 = { + 0x03ea: "cols", + 0x03eb: "border", + 0x03ec: "rules", + 0x03ed: "frame", + 0x03ee: "cellspacing", + 0x03ef: "cellpadding", + 0x03fa: "datapagesize", + 0x8006: "width", + 0x8007: "height", + 0x8046: "title", + 0x804a: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x938a: "background", + 0x93a5: "bordercolor", + 0x93a6: "bordercolorlight", + 0x93a7: "bordercolordark", + 0xfe0c: "bgcolor", + } +ATTRS94 = { + 0x8049: "align", + 0x93a8: "valign", + 0xfe0c: "bgcolor", + } +ATTRS95 = { + 0x8049: "align", + 0x93a8: "valign", + } +ATTRS96 = { + 0x07d2: "rowspan", + 0x07d3: "colspan", + 0x8006: "width", + 0x8007: "height", + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x938a: "background", + 0x938e: "nowrap", + 0x93a5: "bordercolor", + 0x93a6: "bordercolorlight", + 0x93a7: "bordercolordark", + 0x93a8: "valign", + 0xfe0c: "bgcolor", + } +ATTRS97 = { + 0x1b5a: "rows", + 0x1b5b: "cols", + 0x1b5c: "wrap", + 0x1b5d: "readonly", + 0x8001: "name", + 0x977b: "onselect", + 0x978f: "onchange", + } +ATTRS98 = { + 0x8049: "align", + 0x93a8: "valign", + 0xfe0c: "bgcolor", + } +ATTRS99 = { + 0x07d2: "rowspan", + 0x07d3: "colspan", + 0x8006: "width", + 0x8007: "height", + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x938a: "background", + 0x938e: "nowrap", + 0x93a5: "bordercolor", + 0x93a6: "bordercolorlight", + 0x93a7: "bordercolordark", + 0x93a8: "valign", + 0xfe0c: "bgcolor", + } +ATTRS100 = { + 0x8049: "align", + 0x93a8: "valign", + 0xfe0c: "bgcolor", + } +ATTRS102 = { + 0x8007: "height", + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x93a5: "bordercolor", + 0x93a6: "bordercolorlight", + 0x93a7: "bordercolordark", + 0x93a8: "valign", + 0xfe0c: "bgcolor", + } +ATTRS103 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS104 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS105 = { + 0x03eb: "compact", + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x939a: "type", + } +ATTRS106 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS108 = { + 0x9399: "clear", + } + TAGS_ATTRS = [ None, None, @@ -1005,4 +1005,4 @@ TAGS_ATTRS = [ None, ] -MAP = (TAGS, TAGS_ATTRS, ATTRS0) +MAP = (TAGS, ATTRS0, TAGS_ATTRS) diff --git a/src/calibre/ebooks/lit/maps/opf.py b/src/calibre/ebooks/lit/maps/opf.py index a39e6bf8e8..cc1acc4dfa 100644 --- a/src/calibre/ebooks/lit/maps/opf.py +++ b/src/calibre/ebooks/lit/maps/opf.py @@ -1,28 +1,3 @@ -ATTRS = { - 0x0001 => "href", - 0x0002 => "%never-used", - 0x0003 => "%guid", - 0x0004 => "%minimum_level", - 0x0005 => "%attr5", - 0x0006 => "id", - 0x0007 => "href", - 0x0008 => "media-type", - 0x0009 => "fallback", - 0x000A => "idref", - 0x000B => "xmlns:dc", - 0x000C => "xmlns:oebpackage", - 0x000D => "role", - 0x000E => "file-as", - 0x000F => "event", - 0x0010 => "scheme", - 0x0011 => "title", - 0x0012 => "type", - 0x0013 => "unique-identifier", - 0x0014 => "name", - 0x0015 => "content", - 0x0016 => "xml:lang", - } - TAGS = [ None, "package", @@ -69,6 +44,31 @@ TAGS = [ None, ] -TAGS_ATTR = [{} for i in xrange(43)] +ATTRS = { + 0x0001: "href", + 0x0002: "%never-used", + 0x0003: "%guid", + 0x0004: "%minimum_level", + 0x0005: "%attr5", + 0x0006: "id", + 0x0007: "href", + 0x0008: "media-type", + 0x0009: "fallback", + 0x000A: "idref", + 0x000B: "xmlns:dc", + 0x000C: "xmlns:oebpackage", + 0x000D: "role", + 0x000E: "file-as", + 0x000F: "event", + 0x0010: "scheme", + 0x0011: "title", + 0x0012: "type", + 0x0013: "unique-identifier", + 0x0014: "name", + 0x0015: "content", + 0x0016: "xml:lang", + } -MAP = (TAGS, TAGS_ATTRS, ATTRS0) +TAGS_ATTRS = [{} for i in xrange(43)] + +MAP = (TAGS, ATTRS, TAGS_ATTRS) diff --git a/src/calibre/ebooks/lit/mssha1.py b/src/calibre/ebooks/lit/mssha1.py new file mode 100644 index 0000000000..f6f7c33444 --- /dev/null +++ b/src/calibre/ebooks/lit/mssha1.py @@ -0,0 +1,343 @@ +#!/usr/bin/env python +# -*- coding: iso-8859-1 + +"""A sample implementation of SHA-1 in pure Python. + + Framework adapted from Dinu Gherman's MD5 implementation by + J. Hallén and L. Creighton. SHA-1 implementation based directly on + the text of the NIST standard FIPS PUB 180-1. +""" + + +__date__ = '2004-11-17' +__version__ = 0.91 # Modernised by J. Hallén and L. Creighton for Pypy + + +import struct, copy + + +# ====================================================================== +# Bit-Manipulation helpers +# +# _long2bytes() was contributed by Barry Warsaw +# and is reused here with tiny modifications. +# ====================================================================== + +def _long2bytesBigEndian(n, blocksize=0): + """Convert a long integer to a byte string. + + If optional blocksize is given and greater than zero, pad the front + of the byte string with binary zeros so that the length is a multiple + of blocksize. + """ + + # After much testing, this algorithm was deemed to be the fastest. + s = '' + pack = struct.pack + while n > 0: + s = pack('>I', n & 0xffffffffL) + s + n = n >> 32 + + # Strip off leading zeros. + for i in range(len(s)): + if s[i] != '\000': + break + else: + # Only happens when n == 0. + s = '\000' + i = 0 + + s = s[i:] + + # Add back some pad bytes. This could be done more efficiently + # w.r.t. the de-padding being done above, but sigh... + if blocksize > 0 and len(s) % blocksize: + s = (blocksize - len(s) % blocksize) * '\000' + s + + return s + + +def _bytelist2longBigEndian(list): + "Transform a list of characters into a list of longs." + + imax = len(list)/4 + hl = [0L] * imax + + j = 0 + i = 0 + while i < imax: + b0 = long(ord(list[j])) << 24 + b1 = long(ord(list[j+1])) << 16 + b2 = long(ord(list[j+2])) << 8 + b3 = long(ord(list[j+3])) + hl[i] = b0 | b1 | b2 | b3 + i = i+1 + j = j+4 + + return hl + + +def _rotateLeft(x, n): + "Rotate x (32 bit) left n bits circularly." + + return (x << n) | (x >> (32-n)) + + +# ====================================================================== +# The SHA transformation functions +# +# ====================================================================== + +def f0_19(B, C, D): + return (B & (C ^ D)) ^ D + +def f20_39(B, C, D): + return B ^ C ^ D + +def f40_59(B, C, D): + return ((B | C) & D) | (B & C) + +def f60_79(B, C, D): + return B ^ C ^ D + +def f6_42(B, C, D): + return (B + C) ^ C + +f = [f0_19]*20 + [f20_39]*20 + [f40_59]*20 + [f60_79]*20 +f[3] = f20_39 +f[6] = f6_42 +f[10] = f20_39 +f[15] = f20_39 +f[26] = f0_19 +f[31] = f40_59 +f[42] = f6_42 +f[51] = f20_39 +f[68] = f0_19 + + +# Constants to be used +K = [ + 0x5A827999L, # ( 0 <= t <= 19) + 0x6ED9EBA1L, # (20 <= t <= 39) + 0x8F1BBCDCL, # (40 <= t <= 59) + 0xCA62C1D6L # (60 <= t <= 79) + ] + +class sha: + "An implementation of the MD5 hash function in pure Python." + + def __init__(self): + "Initialisation." + + # Initial message length in bits(!). + self.length = 0L + self.count = [0, 0] + + # Initial empty message as a sequence of bytes (8 bit characters). + self.input = [] + + # Call a separate init function, that can be used repeatedly + # to start from scratch on the same object. + self.init() + + + def init(self): + "Initialize the message-digest and set all fields to zero." + + self.length = 0L + self.input = [] + + # Initial 160 bit message digest (5 times 32 bit). + self.H0 = 0x32107654L + self.H1 = 0x23016745L + self.H2 = 0xC4E680A2L + self.H3 = 0xDC679823L + self.H4 = 0xD0857A34L + + def _transform(self, W): + for t in range(16, 80): + W.append(_rotateLeft( + W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16], 1) & 0xffffffffL) + + A = self.H0 + B = self.H1 + C = self.H2 + D = self.H3 + E = self.H4 + + for t in xrange(0, 80): + TEMP = _rotateLeft(A, 5) + f[t](B, C, D) + E + W[t] + K[t/20] + E = D + D = C + C = _rotateLeft(B, 30) & 0xffffffffL + B = A + A = TEMP & 0xffffffffL + + self.H0 = (self.H0 + A) & 0xffffffffL + self.H1 = (self.H1 + B) & 0xffffffffL + self.H2 = (self.H2 + C) & 0xffffffffL + self.H3 = (self.H3 + D) & 0xffffffffL + self.H4 = (self.H4 + E) & 0xffffffffL + + + # Down from here all methods follow the Python Standard Library + # API of the sha module. + + def update(self, inBuf): + """Add to the current message. + + Update the sha object with the string arg. Repeated calls + are equivalent to a single call with the concatenation of all + the arguments, i.e. s.update(a); s.update(b) is equivalent + to s.update(a+b). + + The hash is immediately calculated for all full blocks. The final + calculation is made in digest(). It will calculate 1-2 blocks, + depending on how much padding we have to add. This allows us to + keep an intermediate value for the hash, so that we only need to + make minimal recalculation if we call update() to add more data + to the hashed string. + """ + + leninBuf = long(len(inBuf)) + + # Compute number of bytes mod 64. + index = (self.count[1] >> 3) & 0x3FL + + # Update number of bits. + self.count[1] = self.count[1] + (leninBuf << 3) + if self.count[1] < (leninBuf << 3): + self.count[0] = self.count[0] + 1 + self.count[0] = self.count[0] + (leninBuf >> 29) + + partLen = 64 - index + + if leninBuf >= partLen: + self.input[index:] = list(inBuf[:partLen]) + self._transform(_bytelist2longBigEndian(self.input)) + i = partLen + while i + 63 < leninBuf: + self._transform(_bytelist2longBigEndian(list(inBuf[i:i+64]))) + i = i + 64 + else: + self.input = list(inBuf[i:leninBuf]) + else: + i = 0 + self.input = self.input + list(inBuf) + + + def digest(self): + """Terminate the message-digest computation and return digest. + + Return the digest of the strings passed to the update() + method so far. This is a 16-byte string which may contain + non-ASCII characters, including null bytes. + """ + + H0 = self.H0 + H1 = self.H1 + H2 = self.H2 + H3 = self.H3 + H4 = self.H4 + input = [] + self.input + count = [] + self.count + + index = (self.count[1] >> 3) & 0x3fL + + if index < 56: + padLen = 56 - index + else: + padLen = 120 - index + + padding = ['\200'] + ['\000'] * 63 + self.update(padding[:padLen]) + + # Append length (before padding). + bits = _bytelist2longBigEndian(self.input[:56]) + count + + self._transform(bits) + + # Store state in digest. + digest = _long2bytesBigEndian(self.H0, 4) + \ + _long2bytesBigEndian(self.H1, 4) + \ + _long2bytesBigEndian(self.H2, 4) + \ + _long2bytesBigEndian(self.H3, 4) + \ + _long2bytesBigEndian(self.H4, 4) + + self.H0 = H0 + self.H1 = H1 + self.H2 = H2 + self.H3 = H3 + self.H4 = H4 + self.input = input + self.count = count + + return digest + + + def hexdigest(self): + """Terminate and return digest in HEX form. + + Like digest() except the digest is returned as a string of + length 32, containing only hexadecimal digits. This may be + used to exchange the value safely in email or other non- + binary environments. + """ + return ''.join(['%02x' % ord(c) for c in self.digest()]) + + def copy(self): + """Return a clone object. + + Return a copy ('clone') of the md5 object. This can be used + to efficiently compute the digests of strings that share + a common initial substring. + """ + + return copy.deepcopy(self) + + +# ====================================================================== +# Mimic Python top-level functions from standard library API +# for consistency with the md5 module of the standard library. +# ====================================================================== + +# These are mandatory variables in the module. They have constant values +# in the SHA standard. + +digest_size = digestsize = 20 +blocksize = 1 + +def new(arg=None): + """Return a new sha crypto object. + + If arg is present, the method call update(arg) is made. + """ + + crypto = sha() + if arg: + crypto.update(arg) + + return crypto + +if __name__ == '__main__': + def main(): + import sys + file = None + if len(sys.argv) > 2: + print "usage: %s [FILE]" % sys.argv[0] + return + elif len(sys.argv) < 2: + file = sys.stdin + else: + file = open(sys.argv[1], 'rb') + context = new() + data = file.read(16384) + while data: + context.update(data) + data = file.read(16384) + file.close() + digest = context.hexdigest().upper() + for i in xrange(0, 40, 8): + print digest[i:i+8], + print + main() diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py index 1a0f42f8db..711aef6586 100644 --- a/src/calibre/ebooks/lit/reader.py +++ b/src/calibre/ebooks/lit/reader.py @@ -5,6 +5,7 @@ Support for reading the metadata from a lit file. ''' import sys, struct, cStringIO, os +import functools from itertools import repeat from calibre import relpath @@ -13,6 +14,31 @@ from calibre.ebooks.metadata.opf import OPFReader from calibre.ebooks.lit import LitError from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP +OPF_DECL = """" + +""" +XHTML_DECL = """ + +""" + +class DirectoryEntry(object): + def __init__(self, name, section, offset, size): + self.name = name + self.section = section + self.offset = offset + self.size = size + + def __repr__(self): + return "" \ + % (self.name, self.section, self.offset, self.size) + + def __str__(self): + return repr(self) + def u32(bytes): return struct.unpack('') index = self.binary_to_text(base=index, depth=depth+1) - is_goingdown = 0 + is_goingdown = False if not tag_name: raise LitError('Tag ends before it begins.') self.buf.write('') @@ -222,7 +248,7 @@ class UnBinary(object): if not in_censorship: self.buf.write(c) count -= 1 - elif count == 0: + if count == 0: if not in_censorship: self.buf.write('"') in_censorship = False @@ -268,7 +294,7 @@ class UnBinary(object): href += c count -= 1 if count == 0: - doc, m, frag = href.partition('#') + doc, m, frag = href[1:].partition('#') path = self.item_path(doc) if m and frag: path += m + frag @@ -297,100 +323,74 @@ class ManifestItem(object): def __repr__(self): return self.internal + u'->' + self.path +def preserve(function): + def wrapper(self, *args, **kwargs): + opos = self._stream.tell() + try: + return function(self, *args, **kwargs) + finally: + self._stream.seek(opos) + functools.update_wrapper(wrapper, function) + return wrapper + class LitFile(object): PIECE_SIZE = 16 def magic(): + @preserve def fget(self): - val = None - opos = self._stream.tell() - try: - self._stream.seek(0) - val = self._stream.read(8) - finally: - self._stream.seek(opos) - return val + self._stream.seek(0) + return self._stream.read(8) return property(fget=fget) magic = magic() def version(): def fget(self): - val = None - opos = self._stream.tell() - try: - self._stream.seek(8) - val = u32(self._stream.read(4)) - finally: - self._stream.seek(opos) - return val + self._stream.seek(8) + return u32(self._stream.read(4)) return property(fget=fget) version = version() def hdr_len(): + @preserve def fget(self): - val = None - opos = self._stream.tell() - try: - self._stream.seek(12) - val = int32(self._stream.read(4)) - finally: - self._stream.seek(opos) - return val + self._stream.seek(12) + return int32(self._stream.read(4)) return property(fget=fget) hdr_len = hdr_len() def num_pieces(): + @preserve def fget(self): - val = None - opos = self._stream.tell() - try: - self._stream.seek(16) - val = int32(self._stream.read(4)) - finally: - self._stream.seek(opos) - return val + self._stream.seek(16) + return int32(self._stream.read(4)) return property(fget=fget) num_pieces = num_pieces() def sec_hdr_len(): + @preserve def fget(self): - val = None - opos = self._stream.tell() - try: - self._stream.seek(20) - val = int32(self._stream.read(4)) - finally: - self._stream.seek(opos) - return val + self._stream.seek(20) + return int32(self._stream.read(4)) return property(fget=fget) sec_hdr_len = sec_hdr_len() def guid(): + @preserve def fget(self): - val = None - opos = self._stream.tell() - try: - self._stream.seek(24) - val = self._stream.read(16) - finally: - self._stream.seek(opos) - return val + self._stream.seek(24) + return self._stream.read(16) return property(fget=fget) guid = guid() def header(): + @preserve def fget(self): - val = None - opos = self._stream.tell() - try: - size = self.hdr_len \ - + (self.num_pieces * self.PIECE_SIZE) \ - + self.sec_hdr_len - self._stream.seek(0) - val = self._stream.read(size) - finally: - self._stream.seek(opos) - return val + size = self.hdr_len \ + + (self.num_pieces * self.PIECE_SIZE) \ + + self.sec_hdr_len + self._stream.seek(0) + return self._stream.read(size) return property(fget=fget) header = header() @@ -402,70 +402,64 @@ class LitFile(object): raise LitError('Unknown LIT version %d'%(self.version,)) self.read_secondary_header() self.read_header_pieces() - - def read_secondary_header(self): - opos = self._stream.tell() - try: - self._stream.seek(self.hdr_len + self.num_pieces*self.PIECE_SIZE) - bytes = self._stream.read(self.sec_hdr_len) - offset = int32(bytes[4:]) - while offset < len(bytes): - blocktype = bytes[offset:offset+4] - blockver = u32(bytes[offset+4:]) - if blocktype == 'CAOL': - if blockver != 2: - raise LitError( - 'Unknown CAOL block format %d' % blockver) - self.creator_id = u32(bytes[offset+12:]) - self.entry_chunklen = u32(bytes[offset+20:]) - self.count_chunklen = u32(bytes[offset+24:]) - self.entry_unknown = u32(bytes[offset+28:]) - self.count_unknown = u32(bytes[offset+32:]) - offset += 48 - elif blocktype == 'ITSF': - if blockver != 4: - raise LitError( - 'Unknown ITSF block format %d' % blockver) - if u32(bytes[offset+4+16:]): - raise LitError('This file has a 64bit content offset') - self.content_offset = u32(bytes[offset+16:]) - self.timestamp = u32(bytes[offset+24:]) - self.language_id = u32(bytes[offset+28:]) - offset += 48 - if not hasattr(self, 'content_offset'): - raise LitError('Could not figure out the content offset') - finally: - self._stream.seek(opos) + @preserve + def read_secondary_header(self): + self._stream.seek(self.hdr_len + self.num_pieces*self.PIECE_SIZE) + bytes = self._stream.read(self.sec_hdr_len) + offset = int32(bytes[4:]) + while offset < len(bytes): + blocktype = bytes[offset:offset+4] + blockver = u32(bytes[offset+4:]) + if blocktype == 'CAOL': + if blockver != 2: + raise LitError( + 'Unknown CAOL block format %d' % blockver) + self.creator_id = u32(bytes[offset+12:]) + self.entry_chunklen = u32(bytes[offset+20:]) + self.count_chunklen = u32(bytes[offset+24:]) + self.entry_unknown = u32(bytes[offset+28:]) + self.count_unknown = u32(bytes[offset+32:]) + offset += 48 + elif blocktype == 'ITSF': + if blockver != 4: + raise LitError( + 'Unknown ITSF block format %d' % blockver) + if u32(bytes[offset+4+16:]): + raise LitError('This file has a 64bit content offset') + self.content_offset = u32(bytes[offset+16:]) + self.timestamp = u32(bytes[offset+24:]) + self.language_id = u32(bytes[offset+28:]) + offset += 48 + if not hasattr(self, 'content_offset'): + raise LitError('Could not figure out the content offset') + + @preserve def read_header_pieces(self): - opos = self._stream.tell() - try: - src = self.header[self.hdr_len:] - for i in range(self.num_pieces): - piece = src[i*self.PIECE_SIZE:(i+1)*self.PIECE_SIZE] - if u32(piece[4:]) != 0 or u32(piece[12:]) != 0: - raise LitError('Piece %s has 64bit value' % repr(piece)) - offset, size = u32(piece), int32(piece[8:]) - self._stream.seek(offset) - piece = self._stream.read(size) - if i == 0: - continue # Dont need this piece - elif i == 1: - if u32(piece[8:]) != self.entry_chunklen or \ - u32(piece[12:]) != self.entry_unknown: - raise LitError('Secondary header does not match piece') - self.read_directory(piece) - elif i == 2: - if u32(piece[8:]) != self.count_chunklen or \ - u32(piece[12:]) != self.count_unknown: - raise LitError('Secondary header does not match piece') - continue # No data needed from this piece - elif i == 3: - self.piece3_guid = piece - elif i == 4: - self.piece4_guid = piece - finally: - self._stream.seek(opos) + src = self.header[self.hdr_len:] + for i in range(self.num_pieces): + piece = src[i*self.PIECE_SIZE:(i+1)*self.PIECE_SIZE] + if u32(piece[4:]) != 0 or u32(piece[12:]) != 0: + raise LitError('Piece %s has 64bit value' % repr(piece)) + offset, size = u32(piece), int32(piece[8:]) + self._stream.seek(offset) + piece = self._stream.read(size) + if i == 0: + continue # Dont need this piece + elif i == 1: + if u32(piece[8:]) != self.entry_chunklen or \ + u32(piece[12:]) != self.entry_unknown: + raise LitError('Secondary header does not match piece') + self.read_directory(piece) + elif i == 2: + if u32(piece[8:]) != self.count_chunklen or \ + u32(piece[12:]) != self.count_unknown: + raise LitError('Secondary header does not match piece') + continue # No data needed from this piece + elif i == 3: + self.piece3_guid = piece + elif i == 4: + self.piece4_guid = piece def read_directory(self, piece): self.entries = [] @@ -521,108 +515,88 @@ class LitFile(object): if not hasattr(self, 'manifest'): raise LitError('Lit file does not have a valid manifest') - - def read_section_names(self, entry): - opos = self._stream.tell() - try: - self._stream.seek(self.content_offset + entry.offset) - raw = self._stream.read(entry.size) - if len(raw) < 4: - raise LitError('Invalid Namelist section') - pos = 4 - self.num_sections = u16(raw[2:pos]) - - self.sections = {} - for section in range(self.num_sections): - size = u16(raw[pos:pos+2]) - pos += 2 - size = size*2 + 2 - if pos + size > len(raw): - raise LitError('Invalid Namelist section') - self.sections[section] = raw[pos:pos+size].decode('utf-16-le') - pos += size - finally: - self._stream.seek(opos) - - def read_manifest(self, entry): - opos = self._stream.tell() - try: - self.manifest = [] - self._stream.seek(self.content_offset + entry.offset) - raw = self._stream.read(entry.size) - pos = 0 - while pos < len(raw): - size = ord(raw[pos]) - if size == 0: break - pos += 1 - root = raw[pos:pos+size].decode('utf8') - pos += size - if pos >= len(raw): - raise LitError('Truncated manifest.') - for state in ['spine', 'not spine', 'css', 'images']: - num_files = int32(raw[pos:pos+4]) - pos += 4 - if num_files == 0: continue - - i = 0 - while i < num_files: - if pos+5 >= len(raw): - raise LitError('Truncated manifest.') - offset = u32(raw[pos:pos+4]) - pos += 4 - - slen = ord(raw[pos]) - pos += 1 - internal = raw[pos:pos+slen].decode('utf8') - pos += slen - - slen = ord(raw[pos]) - pos += 1 - original = raw[pos:pos+slen].decode('utf8') - pos += slen - - slen = ord(raw[pos]) - pos += 1 - mime_type = raw[pos:pos+slen].decode('utf8') - pos += slen + 1 - - self.manifest.append( - ManifestItem(original, internal, mime_type, - offset, root, state)) - i += 1 - finally: - self._stream.seek(opos) - - def read_meta(self, entry): - opos = self._stream.tell() - try: - self._stream.seek(self.content_offset + entry.offset) - raw = self._stream.read(entry.size) - xml = \ -'''\ - - -'''+\ - unicode(UnBinary(raw, self.manifest)) - self.meta = xml - finally: - self._stream.seek(opos) - + @preserve + def read_section_names(self, entry): + self._stream.seek(self.content_offset + entry.offset) + raw = self._stream.read(entry.size) + if len(raw) < 4: + raise LitError('Invalid Namelist section') + pos = 4 + self.num_sections = u16(raw[2:pos]) + + self.sections = {} + for section in range(self.num_sections): + size = u16(raw[pos:pos+2]) + pos += 2 + size = size*2 + 2 + if pos + size > len(raw): + raise LitError('Invalid Namelist section') + self.sections[section] = raw[pos:pos+size].decode('utf-16-le') + pos += size + + @preserve + def read_manifest(self, entry): + self.manifest = [] + self._stream.seek(self.content_offset + entry.offset) + raw = self._stream.read(entry.size) + pos = 0 + while pos < len(raw): + size = ord(raw[pos]) + if size == 0: break + pos += 1 + root = raw[pos:pos+size].decode('utf8') + pos += size + if pos >= len(raw): + raise LitError('Truncated manifest.') + for state in ['spine', 'not spine', 'css', 'images']: + num_files = int32(raw[pos:pos+4]) + pos += 4 + if num_files == 0: continue + + i = 0 + while i < num_files: + if pos+5 >= len(raw): + raise LitError('Truncated manifest.') + offset = u32(raw[pos:pos+4]) + pos += 4 + + slen = ord(raw[pos]) + pos += 1 + internal = raw[pos:pos+slen].decode('utf8') + pos += slen + + slen = ord(raw[pos]) + pos += 1 + original = raw[pos:pos+slen].decode('utf8') + pos += slen + + slen = ord(raw[pos]) + pos += 1 + mime_type = raw[pos:pos+slen].decode('utf8') + pos += slen + 1 + + self.manifest.append( + ManifestItem(original, internal, mime_type, + offset, root, state)) + i += 1 + + @preserve + def read_meta(self, entry): + self._stream.seek(self.content_offset + entry.offset) + raw = self._stream.read(entry.size) + xml = OPF_DECL + unicode(UnBinary(raw, self.manifest)) + self.meta = xml + + @preserve def read_image(self, internal_name): cover_entry = None for entry in self.entries: if internal_name in entry.name: cover_entry = entry break - opos = self._stream.tell() - try: - self._stream.seek(self.content_offset + cover_entry.offset) - return self._stream.read(cover_entry.size) - finally: - self._stream.seek(opos) + self._stream.seek(self.content_offset + cover_entry.offset) + return self._stream.read(cover_entry.size) def get_metadata(stream): try: From a48282500fb831dc8019a69068700a98c6d8a90d Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Wed, 16 Jul 2008 15:00:47 -0400 Subject: [PATCH 03/44] Checkpoint for changing computers --- src/calibre/ebooks/lit/reader.py | 65 ++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 24 deletions(-) diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py index 711aef6586..4d149042cc 100644 --- a/src/calibre/ebooks/lit/reader.py +++ b/src/calibre/ebooks/lit/reader.py @@ -25,20 +25,6 @@ XHTML_DECL = """ "http://openebook.org/dtds/oeb-1.0.1/oebdoc101.dtd"> """ -class DirectoryEntry(object): - def __init__(self, name, section, offset, size): - self.name = name - self.section = section - self.offset = offset - self.size = size - - def __repr__(self): - return "" \ - % (self.name, self.section, self.offset, self.size) - - def __str__(self): - return repr(self) - def u32(bytes): return struct.unpack('' + self.path + return "ManifestItem(internal='%s', path='%s')" \ + % (repr(self.internal), repr(self.path)) def preserve(function): def wrapper(self, *args, **kwargs): @@ -382,6 +382,7 @@ class LitFile(object): return self._stream.read(16) return property(fget=fget) guid = guid() + def header(): @preserve @@ -403,6 +404,19 @@ class LitFile(object): self.read_secondary_header() self.read_header_pieces() + @preserve + def __len__(self): + self._stream.seek(0, 2) + return self._stream.tell() + + @preserve + def _read_raw(self, offset, size): + self._stream.seek(offset) + return self._stream.read(size) + + def _read_content(self, offset, size): + return self._read_raw(self.content_offset + offset, size) + @preserve def read_secondary_header(self): self._stream.seek(self.hdr_len + self.num_pieces*self.PIECE_SIZE) @@ -462,7 +476,7 @@ class LitFile(object): self.piece4_guid = piece def read_directory(self, piece): - self.entries = [] + self.entries = {} if not piece.startswith('IFCM'): raise LitError('Header piece #1 is not main directory.') chunk_size, num_chunks = int32(piece[8:12]), int32(piece[24:28]) @@ -507,7 +521,7 @@ class LitFile(object): self.read_manifest(entry) elif name == '/meta': self.read_meta(entry) - self.entries.append(entry) + self.entries[name] = entry i += 1 if not hasattr(self, 'sections'): @@ -590,14 +604,17 @@ class LitFile(object): @preserve def read_image(self, internal_name): - cover_entry = None - for entry in self.entries: - if internal_name in entry.name: - cover_entry = entry - break + cover_entry = self.entries[internal_name] self._stream.seek(self.content_offset + cover_entry.offset) return self._stream.read(cover_entry.size) + def get_file(self, name): + entry = self.entries[name] + if entry.section == 0: + return self._read_content(entry.offset, entry.size) + section = self.get_section(entry.section) + return section[entry.offset:entry.offset+entry.size] + def get_metadata(stream): try: litfile = LitFile(stream) From 9cf4508547a499d7174dfb90cabd5945ba3b356d Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Thu, 17 Jul 2008 19:33:30 -0400 Subject: [PATCH 04/44] Checkpoint for switching computers --- src/calibre/ebooks/lit/msdes.py | 481 +++++++++++++++++++++++++++++++ src/calibre/ebooks/lit/mssha1.py | 8 +- src/calibre/ebooks/lit/reader.py | 118 ++++++-- 3 files changed, 579 insertions(+), 28 deletions(-) create mode 100644 src/calibre/ebooks/lit/msdes.py diff --git a/src/calibre/ebooks/lit/msdes.py b/src/calibre/ebooks/lit/msdes.py new file mode 100644 index 0000000000..5bc67b09bb --- /dev/null +++ b/src/calibre/ebooks/lit/msdes.py @@ -0,0 +1,481 @@ +# Re-modified for use in MS LIT decryption. Un-reversed the bytebit[] array. +# Substituted Microsoft's absurd modified S-boxes. Modified the encrypt/decrypt +# methods to handle more than one block at a time. +# +# And lo, all the previous notices follow: + +# Modified DES encryption for VNC password authentication. +# Ported from realvnc's java viewer by +# I chose this package name because it is not compatible with the +# original DES algorithm, e.g. found pycrypto. +# +# (C) 2003 chris +# Released as free software under the Python License. +# +# You're free to use it for commercial and noncommercial +# application, modify and redistribute it as long as the +# copyright notices are intact. There are no warranties, not +# even that it does what it says to do ;-) +# +# Original notice following: + +# This DES class has been extracted from package Acme.Crypto for use in VNC. +# The bytebit[] array has been reversed so that the most significant bit +# in each byte of the key is ignored, not the least significant. Also the +# unnecessary odd parity code has been removed. +# +# These changes are: +# Copyright (C) 1999 AT&T Laboratories Cambridge. All Rights Reserved. +# +# This software is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# + +# DesCipher - the DES encryption method +# +# The meat of this code is by Dave Zimmerman , and is: +# +# Copyright (c) 1996 Widget Workshop, Inc. All Rights Reserved. +# +# Permission to use, copy, modify, and distribute this software +# and its documentation for NON-COMMERCIAL or COMMERCIAL purposes and +# without fee is hereby granted, provided that this copyright notice is kept +# intact. +# +# WIDGET WORKSHOP MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY +# OF THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +# TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE, OR NON-INFRINGEMENT. WIDGET WORKSHOP SHALL NOT BE LIABLE +# FOR ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR +# DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES. +# +# THIS SOFTWARE IS NOT DESIGNED OR INTENDED FOR USE OR RESALE AS ON-LINE +# CONTROL EQUIPMENT IN HAZARDOUS ENVIRONMENTS REQUIRING FAIL-SAFE +# PERFORMANCE, SUCH AS IN THE OPERATION OF NUCLEAR FACILITIES, AIRCRAFT +# NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL, DIRECT LIFE +# SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH THE FAILURE OF THE +# SOFTWARE COULD LEAD DIRECTLY TO DEATH, PERSONAL INJURY, OR SEVERE +# PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH RISK ACTIVITIES"). WIDGET WORKSHOP +# SPECIFICALLY DISCLAIMS ANY EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR +# HIGH RISK ACTIVITIES. +# +# +# The rest is: +# +# Copyright (C) 1996 by Jef Poskanzer . All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# Visit the ACME Labs Java page for up-to-date versions of this and other +# fine Java utilities: http://www.acme.com/java/ + + +#/ The DES encryption method. +#

+# This is surprisingly fast, for pure Java. On a SPARC 20, wrapped +# in Acme.Crypto.EncryptedOutputStream or Acme.Crypto.EncryptedInputStream, +# it does around 7000 bytes/second. +#

+# Most of this code is by Dave Zimmerman , and is +# Copyright (c) 1996 Widget Workshop, Inc. See the source file for details. +#

+# Fetch the software.
+# Fetch the entire Acme package. +#

+# @see Des3Cipher +# @see EncryptedOutputStream +# @see EncryptedInputStream + +import struct + +class DesCipher: + # Constructor, byte-array key. + def __init__(self, key): + self.setKey(key) + + #/ Set the key. + def setKey(self, key): + self.encryptKeys = self.deskey([ord(x) for x in key], 1) + self.decryptKeys = self.deskey([ord(x) for x in key], 0) + + # Turn an 8-byte key into internal keys. + def deskey(self, keyBlock, encrypting): + #~ int i, j, l, m, n; + pc1m = [0]*56 #new int[56]; + pcr = [0]*56 #new int[56]; + kn = [0]*32 #new int[32]; + + for j in range(56): + l = pc1[j] + m = l & 07 + pc1m[j] = ((keyBlock[l >> 3] & bytebit[m]) != 0) + for i in range(16): + if encrypting: + m = i << 1 + else: + m = (15-i) << 1 + n = m + 1 + kn[m] = kn[n] = 0 + for j in range(28): + l = j + totrot[i] + if l < 28: + pcr[j] = pc1m[l] + else: + pcr[j] = pc1m[l - 28] + for j in range(28, 56): + l = j + totrot[i] + if l < 56: + pcr[j] = pc1m[l] + else: + pcr[j] = pc1m[l - 28] + for j in range(24): + if pcr[pc2[j]] != 0: + kn[m] |= bigbyte[j] + if pcr[pc2[j+24]] != 0: + kn[n] |= bigbyte[j] + return self.cookey(kn) + + def cookey(self, raw): + #~ int raw0, raw1; + #~ int rawi, KnLi; + #~ int i; + KnL = [0]*32 + + rawi = 0 + KnLi = 0 + for i in range(16): + raw0 = raw[rawi] + rawi += 1 + raw1 = raw[rawi] + rawi += 1 + KnL[KnLi] = (raw0 & 0x00fc0000L) << 6 + KnL[KnLi] |= (raw0 & 0x00000fc0L) << 10 + KnL[KnLi] |= (raw1 & 0x00fc0000L) >> 10 + KnL[KnLi] |= (raw1 & 0x00000fc0L) >> 6 + KnLi += 1 + KnL[KnLi] = (raw0 & 0x0003f000L) << 12 + KnL[KnLi] |= (raw0 & 0x0000003fL) << 16 + KnL[KnLi] |= (raw1 & 0x0003f000L) >> 4 + KnL[KnLi] |= (raw1 & 0x0000003fL) + KnLi += 1 + return KnL + + # Block encryption routines. + + #/ Encrypt a block of eight bytes. + def encrypt(self, clearText): + if len(clearText) % 8 != 0: + raise TypeError, "length must be multiple of block size" + result = [] + while clearText: + result.append(struct.pack( + ">LL", *self.des(struct.unpack(">LL", clearText[:8]), + self.encryptKeys))) + clearText = clearText[8:] + return ''.join(result) + + #/ Decrypt a block of eight bytes. + def decrypt(self, cipherText): + if len(cipherText) % 8 != 0: + raise TypeError, "length must be multiple of block size" + result = [] + while cipherText: + result.append(struct.pack( + ">LL", *self.des(struct.unpack(">LL", cipherText[:8]), + self.decryptKeys))) + cipherText = cipherText[8:] + return ''.join(result) + + # The DES function. + def des(self, (leftt, right), keys): + #~ int fval, work, right, leftt; + #~ int round + keysi = 0 + + work = ((leftt >> 4) ^ right) & 0x0f0f0f0fL + right ^= work + leftt ^= (work << 4) & 0xffffffffL + + work = ((leftt >> 16) ^ right) & 0x0000ffffL + right ^= work + leftt ^= (work << 16) & 0xffffffffL + + work = ((right >> 2) ^ leftt) & 0x33333333L + leftt ^= work + right ^= (work << 2) & 0xffffffffL + + work = ((right >> 8) ^ leftt) & 0x00ff00ffL + leftt ^= work + right ^= (work << 8) & 0xffffffffL + right = ((right << 1) | ((right >> 31) & 1)) & 0xffffffffL + + work = (leftt ^ right) & 0xaaaaaaaaL + leftt ^= work + right ^= work + leftt = ((leftt << 1) | ((leftt >> 31) & 1)) & 0xffffffffL + + for round in range(8): + work = ((right << 28) | (right >> 4)) & 0xffffffffL + work ^= keys[keysi] + keysi += 1 + fval = SP7[ work & 0x0000003fL ] + fval |= SP5[(work >> 8) & 0x0000003fL ] + fval |= SP3[(work >> 16) & 0x0000003fL ] + fval |= SP1[(work >> 24) & 0x0000003fL ] + work = right ^ keys[keysi] + keysi += 1 + fval |= SP8[ work & 0x0000003fL ] + fval |= SP6[(work >> 8) & 0x0000003fL ] + fval |= SP4[(work >> 16) & 0x0000003fL ] + fval |= SP2[(work >> 24) & 0x0000003fL ] + leftt ^= fval + work = ((leftt << 28) | (leftt >> 4)) & 0xffffffffL + work ^= keys[keysi] + keysi += 1 + fval = SP7[ work & 0x0000003fL ] + fval |= SP5[(work >> 8) & 0x0000003fL ] + fval |= SP3[(work >> 16) & 0x0000003fL ] + fval |= SP1[(work >> 24) & 0x0000003fL ] + work = leftt ^ keys[keysi] + keysi += 1 + fval |= SP8[ work & 0x0000003fL ] + fval |= SP6[(work >> 8) & 0x0000003fL ] + fval |= SP4[(work >> 16) & 0x0000003fL ] + fval |= SP2[(work >> 24) & 0x0000003fL ] + right ^= fval + + right = ((right << 31) | (right >> 1)) & 0xffffffffL + work = (leftt ^ right) & 0xaaaaaaaaL + leftt ^= work + right ^= work + leftt = ((leftt << 31) | (leftt >> 1)) & 0xffffffffL + work = ((leftt >> 8) ^ right) & 0x00ff00ffL + right ^= work + leftt ^= (work << 8) & 0xffffffffL + work = ((leftt >> 2) ^ right) & 0x33333333L + right ^= work + leftt ^= (work << 2) & 0xffffffffL + work = ((right >> 16) ^ leftt) & 0x0000ffffL + leftt ^= work + right ^= (work << 16) & 0xffffffffL + work = ((right >> 4) ^ leftt) & 0x0f0f0f0fL + leftt ^= work + right ^= (work << 4) & 0xffffffffL + return right, leftt + +# Tables, permutations, S-boxes, etc. + +bytebit = [0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01] + +bigbyte = [ + 0x800000, 0x400000, 0x200000, 0x100000, + 0x080000, 0x040000, 0x020000, 0x010000, + 0x008000, 0x004000, 0x002000, 0x001000, + 0x000800, 0x000400, 0x000200, 0x000100, + 0x000080, 0x000040, 0x000020, 0x000010, + 0x000008, 0x000004, 0x000002, 0x000001 +] + +pc1 = [ + 56, 48, 40, 32, 24, 16, 8, + 0, 57, 49, 41, 33, 25, 17, + 9, 1, 58, 50, 42, 34, 26, + 18, 10, 2, 59, 51, 43, 35, + 62, 54, 46, 38, 30, 22, 14, + 6, 61, 53, 45, 37, 29, 21, + 13, 5, 60, 52, 44, 36, 28, + 20, 12, 4, 27, 19, 11, 3 +] + +totrot = [ + 1, 2, 4, 6, 8, 10, 12, 14, 15, 17, 19, 21, 23, 25, 27, 28 +] + +pc2 = [ + 13, 16, 10, 23, 0, 4, + 2, 27, 14, 5, 20, 9, + 22, 18, 11, 3 , 25, 7, + 15, 6, 26, 19, 12, 1, + 40, 51, 30, 36, 46, 54, + 29, 39, 50, 44, 32, 47, + 43, 48, 38, 55, 33, 52, + 45, 41, 49, 35, 28, 31, +] + +SP1 = [ +0x02080800L, 0x00080000L, 0x02000002L, 0x02080802L, +0x02000000L, 0x00080802L, 0x00080002L, 0x02000002L, +0x00080802L, 0x02080800L, 0x02080000L, 0x00000802L, +0x02000802L, 0x02000000L, 0x00000000L, 0x00080002L, +0x00080000L, 0x00000002L, 0x02000800L, 0x00080800L, +0x02080802L, 0x02080000L, 0x00000802L, 0x02000800L, +0x00000002L, 0x00000800L, 0x00080800L, 0x02080002L, +0x00000800L, 0x02000802L, 0x02080002L, 0x00000000L, +0x00000000L, 0x02080802L, 0x02000800L, 0x00080002L, +0x02080800L, 0x00080000L, 0x00000802L, 0x02000800L, +0x02080002L, 0x00000800L, 0x00080800L, 0x02000002L, +0x00080802L, 0x00000002L, 0x02000002L, 0x02080000L, +0x02080802L, 0x00080800L, 0x02080000L, 0x02000802L, +0x02000000L, 0x00000802L, 0x00080002L, 0x00000000L, +0x00080000L, 0x02000000L, 0x02000802L, 0x02080800L, +0x00000002L, 0x02080002L, 0x00000800L, 0x00080802L +] +SP2 = [ +0x40108010L, 0x00000000L, 0x00108000L, 0x40100000L, +0x40000010L, 0x00008010L, 0x40008000L, 0x00108000L, +0x00008000L, 0x40100010L, 0x00000010L, 0x40008000L, +0x00100010L, 0x40108000L, 0x40100000L, 0x00000010L, +0x00100000L, 0x40008010L, 0x40100010L, 0x00008000L, +0x00108010L, 0x40000000L, 0x00000000L, 0x00100010L, +0x40008010L, 0x00108010L, 0x40108000L, 0x40000010L, +0x40000000L, 0x00100000L, 0x00008010L, 0x40108010L, +0x00100010L, 0x40108000L, 0x40008000L, 0x00108010L, +0x40108010L, 0x00100010L, 0x40000010L, 0x00000000L, +0x40000000L, 0x00008010L, 0x00100000L, 0x40100010L, +0x00008000L, 0x40000000L, 0x00108010L, 0x40008010L, +0x40108000L, 0x00008000L, 0x00000000L, 0x40000010L, +0x00000010L, 0x40108010L, 0x00108000L, 0x40100000L, +0x40100010L, 0x00100000L, 0x00008010L, 0x40008000L, +0x40008010L, 0x00000010L, 0x40100000L, 0x00108000L +] +SP3 = [ +0x04000001L, 0x04040100L, 0x00000100L, 0x04000101L, +0x00040001L, 0x04000000L, 0x04000101L, 0x00040100L, +0x04000100L, 0x00040000L, 0x04040000L, 0x00000001L, +0x04040101L, 0x00000101L, 0x00000001L, 0x04040001L, +0x00000000L, 0x00040001L, 0x04040100L, 0x00000100L, +0x00000101L, 0x04040101L, 0x00040000L, 0x04000001L, +0x04040001L, 0x04000100L, 0x00040101L, 0x04040000L, +0x00040100L, 0x00000000L, 0x04000000L, 0x00040101L, +0x04040100L, 0x00000100L, 0x00000001L, 0x00040000L, +0x00000101L, 0x00040001L, 0x04040000L, 0x04000101L, +0x00000000L, 0x04040100L, 0x00040100L, 0x04040001L, +0x00040001L, 0x04000000L, 0x04040101L, 0x00000001L, +0x00040101L, 0x04000001L, 0x04000000L, 0x04040101L, +0x00040000L, 0x04000100L, 0x04000101L, 0x00040100L, +0x04000100L, 0x00000000L, 0x04040001L, 0x00000101L, +0x04000001L, 0x00040101L, 0x00000100L, 0x04040000L +] +SP4 = [ +0x00401008L, 0x10001000L, 0x00000008L, 0x10401008L, +0x00000000L, 0x10400000L, 0x10001008L, 0x00400008L, +0x10401000L, 0x10000008L, 0x10000000L, 0x00001008L, +0x10000008L, 0x00401008L, 0x00400000L, 0x10000000L, +0x10400008L, 0x00401000L, 0x00001000L, 0x00000008L, +0x00401000L, 0x10001008L, 0x10400000L, 0x00001000L, +0x00001008L, 0x00000000L, 0x00400008L, 0x10401000L, +0x10001000L, 0x10400008L, 0x10401008L, 0x00400000L, +0x10400008L, 0x00001008L, 0x00400000L, 0x10000008L, +0x00401000L, 0x10001000L, 0x00000008L, 0x10400000L, +0x10001008L, 0x00000000L, 0x00001000L, 0x00400008L, +0x00000000L, 0x10400008L, 0x10401000L, 0x00001000L, +0x10000000L, 0x10401008L, 0x00401008L, 0x00400000L, +0x10401008L, 0x00000008L, 0x10001000L, 0x00401008L, +0x00400008L, 0x00401000L, 0x10400000L, 0x10001008L, +0x00001008L, 0x10000000L, 0x10000008L, 0x10401000L +] +SP5 = [ +0x08000000L, 0x00010000L, 0x00000400L, 0x08010420L, +0x08010020L, 0x08000400L, 0x00010420L, 0x08010000L, +0x00010000L, 0x00000020L, 0x08000020L, 0x00010400L, +0x08000420L, 0x08010020L, 0x08010400L, 0x00000000L, +0x00010400L, 0x08000000L, 0x00010020L, 0x00000420L, +0x08000400L, 0x00010420L, 0x00000000L, 0x08000020L, +0x00000020L, 0x08000420L, 0x08010420L, 0x00010020L, +0x08010000L, 0x00000400L, 0x00000420L, 0x08010400L, +0x08010400L, 0x08000420L, 0x00010020L, 0x08010000L, +0x00010000L, 0x00000020L, 0x08000020L, 0x08000400L, +0x08000000L, 0x00010400L, 0x08010420L, 0x00000000L, +0x00010420L, 0x08000000L, 0x00000400L, 0x00010020L, +0x08000420L, 0x00000400L, 0x00000000L, 0x08010420L, +0x08010020L, 0x08010400L, 0x00000420L, 0x00010000L, +0x00010400L, 0x08010020L, 0x08000400L, 0x00000420L, +0x00000020L, 0x00010420L, 0x08010000L, 0x08000020L +] +SP6 = [ +0x80000040L, 0x00200040L, 0x00000000L, 0x80202000L, +0x00200040L, 0x00002000L, 0x80002040L, 0x00200000L, +0x00002040L, 0x80202040L, 0x00202000L, 0x80000000L, +0x80002000L, 0x80000040L, 0x80200000L, 0x00202040L, +0x00200000L, 0x80002040L, 0x80200040L, 0x00000000L, +0x00002000L, 0x00000040L, 0x80202000L, 0x80200040L, +0x80202040L, 0x80200000L, 0x80000000L, 0x00002040L, +0x00000040L, 0x00202000L, 0x00202040L, 0x80002000L, +0x00002040L, 0x80000000L, 0x80002000L, 0x00202040L, +0x80202000L, 0x00200040L, 0x00000000L, 0x80002000L, +0x80000000L, 0x00002000L, 0x80200040L, 0x00200000L, +0x00200040L, 0x80202040L, 0x00202000L, 0x00000040L, +0x80202040L, 0x00202000L, 0x00200000L, 0x80002040L, +0x80000040L, 0x80200000L, 0x00202040L, 0x00000000L, +0x00002000L, 0x80000040L, 0x80002040L, 0x80202000L, +0x80200000L, 0x00002040L, 0x00000040L, 0x80200040L, +] +SP7 = [ +0x00004000L, 0x00000200L, 0x01000200L, 0x01000004L, +0x01004204L, 0x00004004L, 0x00004200L, 0x00000000L, +0x01000000L, 0x01000204L, 0x00000204L, 0x01004000L, +0x00000004L, 0x01004200L, 0x01004000L, 0x00000204L, +0x01000204L, 0x00004000L, 0x00004004L, 0x01004204L, +0x00000000L, 0x01000200L, 0x01000004L, 0x00004200L, +0x01004004L, 0x00004204L, 0x01004200L, 0x00000004L, +0x00004204L, 0x01004004L, 0x00000200L, 0x01000000L, +0x00004204L, 0x01004000L, 0x01004004L, 0x00000204L, +0x00004000L, 0x00000200L, 0x01000000L, 0x01004004L, +0x01000204L, 0x00004204L, 0x00004200L, 0x00000000L, +0x00000200L, 0x01000004L, 0x00000004L, 0x01000200L, +0x00000000L, 0x01000204L, 0x01000200L, 0x00004200L, +0x00000204L, 0x00004000L, 0x01004204L, 0x01000000L, +0x01004200L, 0x00000004L, 0x00004004L, 0x01004204L, +0x01000004L, 0x01004200L, 0x01004000L, 0x00004004L, +] +SP8 = [ +0x20800080L, 0x20820000L, 0x00020080L, 0x00000000L, +0x20020000L, 0x00800080L, 0x20800000L, 0x20820080L, +0x00000080L, 0x20000000L, 0x00820000L, 0x00020080L, +0x00820080L, 0x20020080L, 0x20000080L, 0x20800000L, +0x00020000L, 0x00820080L, 0x00800080L, 0x20020000L, +0x20820080L, 0x20000080L, 0x00000000L, 0x00820000L, +0x20000000L, 0x00800000L, 0x20020080L, 0x20800080L, +0x00800000L, 0x00020000L, 0x20820000L, 0x00000080L, +0x00800000L, 0x00020000L, 0x20000080L, 0x20820080L, +0x00020080L, 0x20000000L, 0x00000000L, 0x00820000L, +0x20800080L, 0x20020080L, 0x20020000L, 0x00800080L, +0x20820000L, 0x00000080L, 0x00800080L, 0x20020000L, +0x20820080L, 0x00800000L, 0x20800000L, 0x20000080L, +0x00820000L, 0x00020080L, 0x20020080L, 0x20800000L, +0x00000080L, 0x20820000L, 0x00820080L, 0x00000000L, +0x20000000L, 0x20800080L, 0x00020000L, 0x00820080L, +] + +def new(key): + return DesCipher(key) + +block_size = 8 +key_size = 8 + +#test only: +if __name__ == '__main__': + des = DesCipher("\x01\x23\x45\x67\x89\xab\xcd\xef") + print ''.join( + "%02x" % ord(x) for x in des.encrypt("Now is t")) + diff --git a/src/calibre/ebooks/lit/mssha1.py b/src/calibre/ebooks/lit/mssha1.py index f6f7c33444..d61bd39094 100644 --- a/src/calibre/ebooks/lit/mssha1.py +++ b/src/calibre/ebooks/lit/mssha1.py @@ -123,7 +123,7 @@ K = [ 0xCA62C1D6L # (60 <= t <= 79) ] -class sha: +class mssha1(object): "An implementation of the MD5 hash function in pure Python." def __init__(self): @@ -186,7 +186,7 @@ class sha: def update(self, inBuf): """Add to the current message. - Update the sha object with the string arg. Repeated calls + Update the mssha1 object with the string arg. Repeated calls are equivalent to a single call with the concatenation of all the arguments, i.e. s.update(a); s.update(b) is equivalent to s.update(a+b). @@ -308,12 +308,12 @@ digest_size = digestsize = 20 blocksize = 1 def new(arg=None): - """Return a new sha crypto object. + """Return a new mssha1 crypto object. If arg is present, the method call update(arg) is made. """ - crypto = sha() + crypto = mssha1() if arg: crypto.update(arg) diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py index 4d149042cc..2608d63399 100644 --- a/src/calibre/ebooks/lit/reader.py +++ b/src/calibre/ebooks/lit/reader.py @@ -13,6 +13,8 @@ from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata.opf import OPFReader from calibre.ebooks.lit import LitError from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP +import calibre.ebooks.lit.mssha1 as mssha1 +import calibre.ebooks.lit.msdes as msdes OPF_DECL = """" "http://openebook.org/dtds/oeb-1.0.1/oebdoc101.dtd"> """ +DESENCRYPT_GUID = "{67F6E4A2-60BF-11D3-8540-00C04F58C3CF}" +LZXCOMPRESS_GUID = "{0A9007C6-4076-11D3-8789-0000F8105754}" + def u32(bytes): return struct.unpack(' len(raw): raise LitError('Invalid Namelist section') - self.sections[section] = raw[pos:pos+size].decode('utf-16-le') - pos += size + self.section_names[section] = \ + raw[pos:pos+size].decode('utf-16-le').rstrip('\000') + pos += size - @preserve def read_manifest(self, entry): self.manifest = [] - self._stream.seek(self.content_offset + entry.offset) - raw = self._stream.read(entry.size) + raw = self._read_content(entry.offset, entry.size) pos = 0 while pos < len(raw): size = ord(raw[pos]) @@ -595,19 +598,52 @@ class LitFile(object): offset, root, state)) i += 1 - @preserve def read_meta(self, entry): - self._stream.seek(self.content_offset + entry.offset) - raw = self._stream.read(entry.size) + raw = self._read_content(entry.offset, entry.size) xml = OPF_DECL + unicode(UnBinary(raw, self.manifest)) self.meta = xml - @preserve - def read_image(self, internal_name): - cover_entry = self.entries[internal_name] - self._stream.seek(self.content_offset + cover_entry.offset) - return self._stream.read(cover_entry.size) + def read_drm(self): + def exists_file(name): + try: self.get_file(name) + except KeyError: return False + return True + self.drmlevel = 0 + if exists_file('/DRMStorage/Licenses/EUL'): + self.drmlevel = 5 + elif exists_file('/DRMStorage/DRMBookplate'): + self.drmlevel = 3 + elif exists_file('/DRMStorage/DRMSealed'): + self.drmlevel = 1 + else: + return + des = msdes.new(self.calculate_deskey()) + bookkey = des.decrypt(self.get_file('/DRMStorage/DRMSealed')) + if bookkey[0] != '\000': + raise LitError('Unable to decrypt title key!') + self.bookkey = bookkey[1:9] + def calculate_deskey(self): + hashfiles = ['/meta', '/DRMStorage/DRMSource'] + if self.drmlevel == 3: + hashfiles.append('/DRMStorage/DRMBookplate') + prepad = 2 + hash = mssha1.new() + for name in hashfiles: + data = self.get_file(name) + if prepad > 0: + data = ("\000" * prepad) + data + prepad = 0 + postpad = 64 - (len(data) % 64) + if postpad < 64: + data = data + ("\000" * postpad) + hash.update(data) + digest = hash.digest() + key = [0] * 8 + for i in xrange(0, len(digest)): + key[i % 8] ^= ord(digest[i]) + return ''.join(chr(x) for x in key) + def get_file(self, name): entry = self.entries[name] if entry.section == 0: @@ -615,6 +651,40 @@ class LitFile(object): section = self.get_section(entry.section) return section[entry.offset:entry.offset+entry.size] + def get_section(self, section): + data = self.section_data[section] + if not data: + data = self._get_section(section) + self.section_data[section] = data + return data + + def _get_section(self, section): + name = self.section_names[section] + path = '::DataSpace/Storage/' + name + transform = self.get_file(path + '/Transform/List') + content = self.get_file(path + '/Content') + control = self.get_file(path + '/ControlData') + idx_transform = idx_control = 0 + while (len(transform) - idx_transform) >= 16: + ndwords = int32(control[idx_control:]) + 1 + if (idx_control + (ndwords * 4)) > len(control) or ndwords <= 0: + raise LitError("ControlData is too short") + guid = msguid(transform[idx_transform:]) + if guid == DESENCRYPT_GUID: + content = self._decrypt(content) + idx_control += ndwords * 4 + elif guid == LZXCOMPRESS_GUID: + raise LitError("LZX decompression not implemented") + else: + raise LitError("Unrecognized transform: %s." % repr(guid)) + idx_transform += 16 + return content + + def _decrypt(self, content): + if self.drmlevel == 5: + raise LitError('Cannot extract content from a DRM protected ebook') + return msdes.new(self.bookkey).decrypt(content) + def get_metadata(stream): try: litfile = LitFile(stream) @@ -632,7 +702,7 @@ def get_metadata(stream): ext = 'jpg' else: ext = ext.lower() - cd = litfile.read_image(cover_item) + cd = litfile.get_file(cover_item) mi.cover_data = (ext, cd) if cd else (None, None) except: title = stream.name if hasattr(stream, 'name') and stream.name else 'Unknown' From 4eeae13b3508d743fcb2f007fe3b352b87c9acc5 Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Thu, 17 Jul 2008 23:14:59 -0400 Subject: [PATCH 05/44] Checkpoint before sleep --- src/calibre/ebooks/lit/lzxd.py | 138 +++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 src/calibre/ebooks/lit/lzxd.py diff --git a/src/calibre/ebooks/lit/lzxd.py b/src/calibre/ebooks/lit/lzxd.py new file mode 100644 index 0000000000..a09daf012b --- /dev/null +++ b/src/calibre/ebooks/lit/lzxd.py @@ -0,0 +1,138 @@ +import copy + +# some constants defined by the LZX specification +MIN_MATCH = 2 +MAX_MATCH = 257 +NUM_CHARS = 256 +BLOCKTYPE_INVALID = 0 # also blocktypes 4-7 invalid +BLOCKTYPE_VERBATIM = 1 +BLOCKTYPE_ALIGNED = 2 +BLOCKTYPE_UNCOMPRESSED = 3 +PRETREE_NUM_ELEMENTS = 20 +ALIGNED_NUM_ELEMENTS = 8 # aligned offset tree #elements +NUM_PRIMARY_LENGTHS = 7 # this one missing from spec! +NUM_SECONDARY_LENGTHS = 249 # length tree #elements + +# LZX huffman defines: tweak tablebits as desired +PRETREE_MAXSYMBOLS = LZX_PRETREE_NUM_ELEMENTS +PRETREE_TABLEBITS = 6 +MAINTREE_MAXSYMBOLS = LZX_NUM_CHARS + 50*8 +MAINTREE_TABLEBITS = 12 +LENGTH_MAXSYMBOLS = LZX_NUM_SECONDARY_LENGTHS+1 +LENGTH_TABLEBITS = 12 +ALIGNED_MAXSYMBOLS = LZX_ALIGNED_NUM_ELEMENTS +ALIGNED_TABLEBITS = 7 +LENTABLE_SAFETY = 64 # table decoding overruns are allowed + +FRAME_SIZE = 32768 # the size of a frame in LZX + + +class BitReader(object): + def __init__(self, data): + self.data, self.pos, self.nbits = \ + data + "\x00\x00\x00\x00", 0, len(data) * 8 + + def peek(self, n): + r, g = 0, 0 + while g < n: + r = (r << 8) | ord(self.data[(self.pos + g) >> 3]) + g = g + 8 - ((self.pos + g) & 7) + return (r >> (g - n)) & ((1 << n) - 1) + + def remove(self, n): + self.pos += n + return self.pos <= self.nbits + + def left(self): + return self.nbits - self.pos + + def read(self, n): + val = self.peek(n) + self.remove(n) + return val + +class LzxError(Exception): + pass + +POSITION_BASE = [0]*51 +EXTRA_BITS = [0]*51 + +def _static_init(): + j = 0 + for i in xrange(0, 51, 2): + EXTRA_BITS[i] = j + EXTRA_BITS[i + 1] = j + if i != 0 or j < 17): j += 1 + j = 0 + for i in xrange(0, 51, 1): + POSITION_BASE[i] = j + j += 1 << extra_bits[i] +_static_init() + +class LzxDecompressor(object): + def __init__(self, window_bits, reset_interval=0x7fff): + # LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb) + if window_bits < 15 or window_bits > 21: + raise LzxError("Invalid window size") + + self.window_size = 1 << window_bits + self.window_posn = 0 + self.frame_posn = 0 + self.frame = 0 + self.reset_interval = reset_interval + self.intel_filesize = 0 + self.intel_curpos = 0 + + # window bits: 15 16 17 18 19 20 21 + # position slots: 30 32 34 36 38 42 50 + self.posn_solts = 50 if window_bits == 21 \ + else 42 if window_bits == 20 else window_bits << 1 + self.intel_started = 0 + self.input_end = 0 + + # huffman code lengths + self.PRETREE_len = [0] * (PRETREE_MAXSYMBOLS + LENTABLE_SAFETY) + self.MAINTREE_len = [0] * (MAINTREE_MAXSYMBOLS + LENTABLE_SAFETY) + self.LENGTH_len = [0] * (LENGTH_MAXSYMBOLS + LENTABLE_SAFETY) + self.ALIGNED_len = [0] * (ALIGNED_MAXSYMBOLS + LENTABLE_SAFETY) + + # huffman decoding tables + self.PRETREE_table = \ + [0] * ((1 << PRETREE_TABLEBITS) + (PRETREE_MAXSYMBOLS * 2)) + self.MAINTREE_table = \ + [0] * ((1 << MAINTREE_TABLEBITS) + (MAINTREE_MAXSYMBOLS * 2)) + self.LENGTH_table = \ + [0] * ((1 << LENGTH_TABLEBITS) + (LENGTH_MAXSYMBOLS * 2)) + self.ALIGNED_table = \ + [0] * ((1 << ALIGNED_TABLEBITS) + (ALIGNED_MAXSYMBOLS * 2)) + + self.o_buf = self.i_buf = '' + + self._reset_state() + + def _reset_state(self): + self.R0 = 1 + self.R1 = 1 + self.R2 = 1 + self.header_read = 0 + self.block_remaining = 0 + self.block_type = BLOCKTYPE_INVALID + + # initialise tables to 0 (because deltas will be applied to them) + for i in xrange(MAINTREE_MAXSYMBOLS): self.MAINTREE_len[i] = 0 + for i in xrange(LENGTH_MAXSYMBOLS): self.LENGTH_len[i] = 0 + + def decompress(self, data, out_bytes): + return ''.join(self._decompress(data, out_bytes)) + + def _decompress(self, data, out_bytes): + # easy answers + if out_bytes < 0: + raise LzxError('Negative desired output bytes') + + # Initialize input and output + input = BitReader(data) + output = [] + + + From 11c6b0a44d6c819634594eb538d3d4feff7632fe Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Fri, 18 Jul 2008 00:15:13 -0400 Subject: [PATCH 06/44] Fixed trailing space issue --- src/calibre/ebooks/lrf/html/convert_from.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/lrf/html/convert_from.py b/src/calibre/ebooks/lrf/html/convert_from.py index 15eede6d6c..17ffd05ee2 100644 --- a/src/calibre/ebooks/lrf/html/convert_from.py +++ b/src/calibre/ebooks/lrf/html/convert_from.py @@ -222,6 +222,7 @@ class HTMLConverter(object, LoggingInterface): self.memory = [] #: Used to ensure that duplicate CSS unhandled erros are not reported self.tops = {} #: element representing the top of each HTML file in the LRF file self.previous_text = '' #: Used to figure out when to lstrip + self.stripped_space = '' self.preserve_block_style = False #: Used so that

tags in

elements are handled properly self.avoid_page_break = False self.current_page = book.create_page() @@ -864,11 +865,15 @@ class HTMLConverter(object, LoggingInterface): if collapse_whitespace: src = re.sub(r'\s{1,}', ' ', src) + if self.stripped_space and len(src) == len(src.lstrip(u' \n\r\t')): + src = self.stripped_space + src + src, orig = src.rstrip(u' \n\r\t'), src + self.stripped_space = orig[len(src):] if len(self.previous_text) != len(self.previous_text.rstrip(u' \n\r\t')): src = src.lstrip(u' \n\r\t') if len(src): self.previous_text = src - append_text(src) + append_text(src) else: srcs = src.split('\n') for src in srcs[:-1]: From bc6f3ab5de22ca0fdb70369e54c081f01b78e2fa Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Fri, 18 Jul 2008 00:20:01 -0400 Subject: [PATCH 07/44] Reverted incorrect branch change --- src/calibre/ebooks/lrf/html/convert_from.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/calibre/ebooks/lrf/html/convert_from.py b/src/calibre/ebooks/lrf/html/convert_from.py index 17ffd05ee2..15eede6d6c 100644 --- a/src/calibre/ebooks/lrf/html/convert_from.py +++ b/src/calibre/ebooks/lrf/html/convert_from.py @@ -222,7 +222,6 @@ class HTMLConverter(object, LoggingInterface): self.memory = [] #: Used to ensure that duplicate CSS unhandled erros are not reported self.tops = {} #: element representing the top of each HTML file in the LRF file self.previous_text = '' #: Used to figure out when to lstrip - self.stripped_space = '' self.preserve_block_style = False #: Used so that

tags in

elements are handled properly self.avoid_page_break = False self.current_page = book.create_page() @@ -865,15 +864,11 @@ class HTMLConverter(object, LoggingInterface): if collapse_whitespace: src = re.sub(r'\s{1,}', ' ', src) - if self.stripped_space and len(src) == len(src.lstrip(u' \n\r\t')): - src = self.stripped_space + src - src, orig = src.rstrip(u' \n\r\t'), src - self.stripped_space = orig[len(src):] if len(self.previous_text) != len(self.previous_text.rstrip(u' \n\r\t')): src = src.lstrip(u' \n\r\t') if len(src): self.previous_text = src - append_text(src) + append_text(src) else: srcs = src.split('\n') for src in srcs[:-1]: From 1e78860f4f3b414a70cfdc04b0dcb1435fea22f8 Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Fri, 18 Jul 2008 16:34:41 -0400 Subject: [PATCH 08/44] Switched LZX to C extension --- src/calibre/ebooks/lit/lzxd.py | 138 --- src/calibre/utils/lzx-setup.py | 5 + src/calibre/utils/lzx/lzx.h | 169 ++++ src/calibre/utils/lzx/lzxd.c | 905 ++++++++++++++++++ src/calibre/utils/lzx/lzxglue.c | 172 ++++ src/calibre/utils/lzx/lzxmodule.c | 206 ++++ src/calibre/utils/lzx/mspack.h | 1482 +++++++++++++++++++++++++++++ src/calibre/utils/lzx/system.h | 66 ++ 8 files changed, 3005 insertions(+), 138 deletions(-) delete mode 100644 src/calibre/ebooks/lit/lzxd.py create mode 100644 src/calibre/utils/lzx-setup.py create mode 100644 src/calibre/utils/lzx/lzx.h create mode 100644 src/calibre/utils/lzx/lzxd.c create mode 100644 src/calibre/utils/lzx/lzxglue.c create mode 100644 src/calibre/utils/lzx/lzxmodule.c create mode 100644 src/calibre/utils/lzx/mspack.h create mode 100644 src/calibre/utils/lzx/system.h diff --git a/src/calibre/ebooks/lit/lzxd.py b/src/calibre/ebooks/lit/lzxd.py deleted file mode 100644 index a09daf012b..0000000000 --- a/src/calibre/ebooks/lit/lzxd.py +++ /dev/null @@ -1,138 +0,0 @@ -import copy - -# some constants defined by the LZX specification -MIN_MATCH = 2 -MAX_MATCH = 257 -NUM_CHARS = 256 -BLOCKTYPE_INVALID = 0 # also blocktypes 4-7 invalid -BLOCKTYPE_VERBATIM = 1 -BLOCKTYPE_ALIGNED = 2 -BLOCKTYPE_UNCOMPRESSED = 3 -PRETREE_NUM_ELEMENTS = 20 -ALIGNED_NUM_ELEMENTS = 8 # aligned offset tree #elements -NUM_PRIMARY_LENGTHS = 7 # this one missing from spec! -NUM_SECONDARY_LENGTHS = 249 # length tree #elements - -# LZX huffman defines: tweak tablebits as desired -PRETREE_MAXSYMBOLS = LZX_PRETREE_NUM_ELEMENTS -PRETREE_TABLEBITS = 6 -MAINTREE_MAXSYMBOLS = LZX_NUM_CHARS + 50*8 -MAINTREE_TABLEBITS = 12 -LENGTH_MAXSYMBOLS = LZX_NUM_SECONDARY_LENGTHS+1 -LENGTH_TABLEBITS = 12 -ALIGNED_MAXSYMBOLS = LZX_ALIGNED_NUM_ELEMENTS -ALIGNED_TABLEBITS = 7 -LENTABLE_SAFETY = 64 # table decoding overruns are allowed - -FRAME_SIZE = 32768 # the size of a frame in LZX - - -class BitReader(object): - def __init__(self, data): - self.data, self.pos, self.nbits = \ - data + "\x00\x00\x00\x00", 0, len(data) * 8 - - def peek(self, n): - r, g = 0, 0 - while g < n: - r = (r << 8) | ord(self.data[(self.pos + g) >> 3]) - g = g + 8 - ((self.pos + g) & 7) - return (r >> (g - n)) & ((1 << n) - 1) - - def remove(self, n): - self.pos += n - return self.pos <= self.nbits - - def left(self): - return self.nbits - self.pos - - def read(self, n): - val = self.peek(n) - self.remove(n) - return val - -class LzxError(Exception): - pass - -POSITION_BASE = [0]*51 -EXTRA_BITS = [0]*51 - -def _static_init(): - j = 0 - for i in xrange(0, 51, 2): - EXTRA_BITS[i] = j - EXTRA_BITS[i + 1] = j - if i != 0 or j < 17): j += 1 - j = 0 - for i in xrange(0, 51, 1): - POSITION_BASE[i] = j - j += 1 << extra_bits[i] -_static_init() - -class LzxDecompressor(object): - def __init__(self, window_bits, reset_interval=0x7fff): - # LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb) - if window_bits < 15 or window_bits > 21: - raise LzxError("Invalid window size") - - self.window_size = 1 << window_bits - self.window_posn = 0 - self.frame_posn = 0 - self.frame = 0 - self.reset_interval = reset_interval - self.intel_filesize = 0 - self.intel_curpos = 0 - - # window bits: 15 16 17 18 19 20 21 - # position slots: 30 32 34 36 38 42 50 - self.posn_solts = 50 if window_bits == 21 \ - else 42 if window_bits == 20 else window_bits << 1 - self.intel_started = 0 - self.input_end = 0 - - # huffman code lengths - self.PRETREE_len = [0] * (PRETREE_MAXSYMBOLS + LENTABLE_SAFETY) - self.MAINTREE_len = [0] * (MAINTREE_MAXSYMBOLS + LENTABLE_SAFETY) - self.LENGTH_len = [0] * (LENGTH_MAXSYMBOLS + LENTABLE_SAFETY) - self.ALIGNED_len = [0] * (ALIGNED_MAXSYMBOLS + LENTABLE_SAFETY) - - # huffman decoding tables - self.PRETREE_table = \ - [0] * ((1 << PRETREE_TABLEBITS) + (PRETREE_MAXSYMBOLS * 2)) - self.MAINTREE_table = \ - [0] * ((1 << MAINTREE_TABLEBITS) + (MAINTREE_MAXSYMBOLS * 2)) - self.LENGTH_table = \ - [0] * ((1 << LENGTH_TABLEBITS) + (LENGTH_MAXSYMBOLS * 2)) - self.ALIGNED_table = \ - [0] * ((1 << ALIGNED_TABLEBITS) + (ALIGNED_MAXSYMBOLS * 2)) - - self.o_buf = self.i_buf = '' - - self._reset_state() - - def _reset_state(self): - self.R0 = 1 - self.R1 = 1 - self.R2 = 1 - self.header_read = 0 - self.block_remaining = 0 - self.block_type = BLOCKTYPE_INVALID - - # initialise tables to 0 (because deltas will be applied to them) - for i in xrange(MAINTREE_MAXSYMBOLS): self.MAINTREE_len[i] = 0 - for i in xrange(LENGTH_MAXSYMBOLS): self.LENGTH_len[i] = 0 - - def decompress(self, data, out_bytes): - return ''.join(self._decompress(data, out_bytes)) - - def _decompress(self, data, out_bytes): - # easy answers - if out_bytes < 0: - raise LzxError('Negative desired output bytes') - - # Initialize input and output - input = BitReader(data) - output = [] - - - diff --git a/src/calibre/utils/lzx-setup.py b/src/calibre/utils/lzx-setup.py new file mode 100644 index 0000000000..87e523b9c3 --- /dev/null +++ b/src/calibre/utils/lzx-setup.py @@ -0,0 +1,5 @@ +from distutils.core import setup, Extension + +setup(name="lzx", version="1.0", + ext_modules=[Extension('lzx', sources=['lzx/lzxmodule.c', 'lzx/lzxd.c'], + include_dirs=['lzx'])]) diff --git a/src/calibre/utils/lzx/lzx.h b/src/calibre/utils/lzx/lzx.h new file mode 100644 index 0000000000..15ae17c0aa --- /dev/null +++ b/src/calibre/utils/lzx/lzx.h @@ -0,0 +1,169 @@ +/* This file is part of libmspack. + * (C) 2003-2004 Stuart Caie. + * + * The LZX method was created by Jonathan Forbes and Tomi Poutanen, adapted + * by Microsoft Corporation. + * + * libmspack is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License (LGPL) version 2.1 + * + * For further details, see the file COPYING.LIB distributed with libmspack + */ + +#include + +#ifndef MSPACK_LZX_H +#define MSPACK_LZX_H 1 + +/* LZX compression / decompression definitions */ + +/* some constants defined by the LZX specification */ +#define LZX_MIN_MATCH (2) +#define LZX_MAX_MATCH (257) +#define LZX_NUM_CHARS (256) +#define LZX_BLOCKTYPE_INVALID (0) /* also blocktypes 4-7 invalid */ +#define LZX_BLOCKTYPE_VERBATIM (1) +#define LZX_BLOCKTYPE_ALIGNED (2) +#define LZX_BLOCKTYPE_UNCOMPRESSED (3) +#define LZX_PRETREE_NUM_ELEMENTS (20) +#define LZX_ALIGNED_NUM_ELEMENTS (8) /* aligned offset tree #elements */ +#define LZX_NUM_PRIMARY_LENGTHS (7) /* this one missing from spec! */ +#define LZX_NUM_SECONDARY_LENGTHS (249) /* length tree #elements */ + +/* LZX huffman defines: tweak tablebits as desired */ +#define LZX_PRETREE_MAXSYMBOLS (LZX_PRETREE_NUM_ELEMENTS) +#define LZX_PRETREE_TABLEBITS (6) +#define LZX_MAINTREE_MAXSYMBOLS (LZX_NUM_CHARS + 50*8) +#define LZX_MAINTREE_TABLEBITS (12) +#define LZX_LENGTH_MAXSYMBOLS (LZX_NUM_SECONDARY_LENGTHS+1) +#define LZX_LENGTH_TABLEBITS (12) +#define LZX_ALIGNED_MAXSYMBOLS (LZX_ALIGNED_NUM_ELEMENTS) +#define LZX_ALIGNED_TABLEBITS (7) +#define LZX_LENTABLE_SAFETY (64) /* table decoding overruns are allowed */ + +#define LZX_FRAME_SIZE (32768) /* the size of a frame in LZX */ + +struct lzxd_stream { + struct mspack_system *sys; /* I/O routines */ + struct mspack_file *input; /* input file handle */ + struct mspack_file *output; /* output file handle */ + + off_t offset; /* number of bytes actually output */ + off_t length; /* overall decompressed length of stream */ + + unsigned char *window; /* decoding window */ + unsigned int window_size; /* window size */ + unsigned int window_posn; /* decompression offset within window */ + unsigned int frame_posn; /* current frame offset within in window */ + unsigned int frame; /* the number of 32kb frames processed */ + unsigned int reset_interval; /* which frame do we reset the compressor? */ + + unsigned int R0, R1, R2; /* for the LRU offset system */ + unsigned int block_length; /* uncompressed length of this LZX block */ + unsigned int block_remaining; /* uncompressed bytes still left to decode */ + + signed int intel_filesize; /* magic header value used for transform */ + signed int intel_curpos; /* current offset in transform space */ + + unsigned char intel_started; /* has intel E8 decoding started? */ + unsigned char block_type; /* type of the current block */ + unsigned char header_read; /* have we started decoding at all yet? */ + unsigned char posn_slots; /* how many posn slots in stream? */ + unsigned char input_end; /* have we reached the end of input? */ + + int error; + + /* I/O buffering */ + unsigned char *inbuf, *i_ptr, *i_end, *o_ptr, *o_end; + unsigned int bit_buffer, bits_left, inbuf_size; + + /* huffman code lengths */ + unsigned char PRETREE_len [LZX_PRETREE_MAXSYMBOLS + LZX_LENTABLE_SAFETY]; + unsigned char MAINTREE_len [LZX_MAINTREE_MAXSYMBOLS + LZX_LENTABLE_SAFETY]; + unsigned char LENGTH_len [LZX_LENGTH_MAXSYMBOLS + LZX_LENTABLE_SAFETY]; + unsigned char ALIGNED_len [LZX_ALIGNED_MAXSYMBOLS + LZX_LENTABLE_SAFETY]; + + /* huffman decoding tables */ + unsigned short PRETREE_table [(1 << LZX_PRETREE_TABLEBITS) + + (LZX_PRETREE_MAXSYMBOLS * 2)]; + unsigned short MAINTREE_table[(1 << LZX_MAINTREE_TABLEBITS) + + (LZX_MAINTREE_MAXSYMBOLS * 2)]; + unsigned short LENGTH_table [(1 << LZX_LENGTH_TABLEBITS) + + (LZX_LENGTH_MAXSYMBOLS * 2)]; + unsigned short ALIGNED_table [(1 << LZX_ALIGNED_TABLEBITS) + + (LZX_ALIGNED_MAXSYMBOLS * 2)]; + + /* this is used purely for doing the intel E8 transform */ + unsigned char e8_buf[LZX_FRAME_SIZE]; +}; + +/* allocates LZX decompression state for decoding the given stream. + * + * - returns NULL if window_bits is outwith the range 15 to 21 (inclusive). + * + * - uses system->alloc() to allocate memory + * + * - returns NULL if not enough memory + * + * - window_bits is the size of the LZX window, from 32Kb (15) to 2Mb (21). + * + * - reset_interval is how often the bitstream is reset, measured in + * multiples of 32Kb bytes output. For CAB LZX streams, this is always 0 + * (does not occur). + * + * - input_buffer_size is how many bytes to use as an input bitstream buffer + * + * - output_length is the length in bytes of the entirely decompressed + * output stream, if known in advance. It is used to correctly perform + * the Intel E8 transformation, which must stop 6 bytes before the very + * end of the decompressed stream. It is not otherwise used or adhered + * to. If the full decompressed length is known in advance, set it here. + * If it is NOT known, use the value 0, and call lzxd_set_output_length() + * once it is known. If never set, 4 of the final 6 bytes of the output + * stream may be incorrect. + */ +extern struct lzxd_stream *lzxd_init(struct mspack_system *system, + struct mspack_file *input, + struct mspack_file *output, + int window_bits, + int reset_interval, + int input_buffer_size, + off_t output_length); + +/* see description of output_length in lzxd_init() */ +extern void lzxd_set_output_length(struct lzxd_stream *lzx, + off_t output_length); + +/* decompresses, or decompresses more of, an LZX stream. + * + * - out_bytes of data will be decompressed and the function will return + * with an MSPACK_ERR_OK return code. + * + * - decompressing will stop as soon as out_bytes is reached. if the true + * amount of bytes decoded spills over that amount, they will be kept for + * a later invocation of lzxd_decompress(). + * + * - the output bytes will be passed to the system->write() function given in + * lzxd_init(), using the output file handle given in lzxd_init(). More + * than one call may be made to system->write(). + * + * - LZX will read input bytes as necessary using the system->read() function + * given in lzxd_init(), using the input file handle given in lzxd_init(). + * This will continue until system->read() returns 0 bytes, or an error. + * input streams should convey an "end of input stream" by refusing to + * supply all the bytes that LZX asks for when they reach the end of the + * stream, rather than return an error code. + * + * - if an error code other than MSPACK_ERR_OK is returned, the stream should + * be considered unusable and lzxd_decompress() should not be called again + * on this stream. + */ +extern int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes); + +/* frees all state associated with an LZX data stream + * + * - calls system->free() using the system pointer given in lzxd_init() + */ +void lzxd_free(struct lzxd_stream *lzx); + +#endif diff --git a/src/calibre/utils/lzx/lzxd.c b/src/calibre/utils/lzx/lzxd.c new file mode 100644 index 0000000000..337af441fd --- /dev/null +++ b/src/calibre/utils/lzx/lzxd.c @@ -0,0 +1,905 @@ +/* This file is part of libmspack. + * (C) 2003-2004 Stuart Caie. + * + * The LZX method was created by Jonathan Forbes and Tomi Poutanen, adapted + * by Microsoft Corporation. + * + * libmspack is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License (LGPL) version 2.1 + * + * For further details, see the file COPYING.LIB distributed with libmspack + */ + +/* LZX decompression implementation */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include + +/* Microsoft's LZX document and their implementation of the + * com.ms.util.cab Java package do not concur. + * + * In the LZX document, there is a table showing the correlation between + * window size and the number of position slots. It states that the 1MB + * window = 40 slots and the 2MB window = 42 slots. In the implementation, + * 1MB = 42 slots, 2MB = 50 slots. The actual calculation is 'find the + * first slot whose position base is equal to or more than the required + * window size'. This would explain why other tables in the document refer + * to 50 slots rather than 42. + * + * The constant NUM_PRIMARY_LENGTHS used in the decompression pseudocode + * is not defined in the specification. + * + * The LZX document does not state the uncompressed block has an + * uncompressed length field. Where does this length field come from, so + * we can know how large the block is? The implementation has it as the 24 + * bits following after the 3 blocktype bits, before the alignment + * padding. + * + * The LZX document states that aligned offset blocks have their aligned + * offset huffman tree AFTER the main and length trees. The implementation + * suggests that the aligned offset tree is BEFORE the main and length + * trees. + * + * The LZX document decoding algorithm states that, in an aligned offset + * block, if an extra_bits value is 1, 2 or 3, then that number of bits + * should be read and the result added to the match offset. This is + * correct for 1 and 2, but not 3, where just a huffman symbol (using the + * aligned tree) should be read. + * + * Regarding the E8 preprocessing, the LZX document states 'No translation + * may be performed on the last 6 bytes of the input block'. This is + * correct. However, the pseudocode provided checks for the *E8 leader* + * up to the last 6 bytes. If the leader appears between -10 and -7 bytes + * from the end, this would cause the next four bytes to be modified, at + * least one of which would be in the last 6 bytes, which is not allowed + * according to the spec. + * + * The specification states that the huffman trees must always contain at + * least one element. However, many CAB files contain blocks where the + * length tree is completely empty (because there are no matches), and + * this is expected to succeed. + */ + + +/* LZX decompressor input macros + * + * STORE_BITS stores bitstream state in lzxd_stream structure + * RESTORE_BITS restores bitstream state from lzxd_stream structure + * READ_BITS(var,n) takes N bits from the buffer and puts them in var + * ENSURE_BITS(n) ensures there are at least N bits in the bit buffer. + * PEEK_BITS(n) extracts without removing N bits from the bit buffer + * REMOVE_BITS(n) removes N bits from the bit buffer + * + * These bit access routines work by using the area beyond the MSB and the + * LSB as a free source of zeroes when shifting. This avoids having to + * mask any bits. So we have to know the bit width of the bit buffer + * variable. + * + * The bit buffer datatype should be at least 32 bits wide: it must be + * possible to ENSURE_BITS(16), so it must be possible to add 16 new bits + * to the bit buffer when the bit buffer already has 1 to 15 bits left. + */ + +#if HAVE_LIMITS_H +# include +#endif +#ifndef CHAR_BIT +# define CHAR_BIT (8) +#endif +#define BITBUF_WIDTH (sizeof(bit_buffer) * CHAR_BIT) + +#define STORE_BITS do { \ + lzx->i_ptr = i_ptr; \ + lzx->i_end = i_end; \ + lzx->bit_buffer = bit_buffer; \ + lzx->bits_left = bits_left; \ +} while (0) + +#define RESTORE_BITS do { \ + i_ptr = lzx->i_ptr; \ + i_end = lzx->i_end; \ + bit_buffer = lzx->bit_buffer; \ + bits_left = lzx->bits_left; \ +} while (0) + +#define ENSURE_BITS(nbits) \ + while (bits_left < (nbits)) { \ + if (i_ptr >= i_end) { \ + if (lzxd_read_input(lzx)) return lzx->error; \ + i_ptr = lzx->i_ptr; \ + i_end = lzx->i_end; \ + } \ + bit_buffer |= ((i_ptr[1] << 8) | i_ptr[0]) \ + << (BITBUF_WIDTH - 16 - bits_left); \ + bits_left += 16; \ + i_ptr += 2; \ + } + +#define PEEK_BITS(nbits) (bit_buffer >> (BITBUF_WIDTH - (nbits))) + +#define REMOVE_BITS(nbits) ((bit_buffer <<= (nbits)), (bits_left -= (nbits))) + +#define READ_BITS(val, nbits) do { \ + ENSURE_BITS(nbits); \ + (val) = PEEK_BITS(nbits); \ + REMOVE_BITS(nbits); \ +} while (0) + +static int lzxd_read_input(struct lzxd_stream *lzx) { + int read = lzx->sys->read(lzx->input, &lzx->inbuf[0], (int)lzx->inbuf_size); + if (read < 0) return lzx->error = MSPACK_ERR_READ; + + /* huff decode's ENSURE_BYTES(16) might overrun the input stream, even + * if those bits aren't used, so fake 2 more bytes */ + if (read == 0) { + if (lzx->input_end) { + D(("out of input bytes")) + return lzx->error = MSPACK_ERR_READ; + } + else { + read = 2; + lzx->inbuf[0] = lzx->inbuf[1] = 0; + lzx->input_end = 1; + } + } + + lzx->i_ptr = &lzx->inbuf[0]; + lzx->i_end = &lzx->inbuf[read]; + + return MSPACK_ERR_OK; +} + +/* Huffman decoding macros */ + +/* READ_HUFFSYM(tablename, var) decodes one huffman symbol from the + * bitstream using the stated table and puts it in var. + */ +#define READ_HUFFSYM(tbl, var) do { \ + /* huffman symbols can be up to 16 bits long */ \ + ENSURE_BITS(16); \ + /* immediate table lookup of [tablebits] bits of the code */ \ + sym = lzx->tbl##_table[PEEK_BITS(LZX_##tbl##_TABLEBITS)]; \ + /* is the symbol is longer than [tablebits] bits? (i=node index) */ \ + if (sym >= LZX_##tbl##_MAXSYMBOLS) { \ + /* decode remaining bits by tree traversal */ \ + i = 1 << (BITBUF_WIDTH - LZX_##tbl##_TABLEBITS); \ + do { \ + /* one less bit. error if we run out of bits before decode */ \ + i >>= 1; \ + if (i == 0) { \ + D(("out of bits in huffman decode")) \ + return lzx->error = MSPACK_ERR_DECRUNCH; \ + } \ + /* double node index and add 0 (left branch) or 1 (right) */ \ + sym <<= 1; sym |= (bit_buffer & i) ? 1 : 0; \ + /* hop to next node index / decoded symbol */ \ + sym = lzx->tbl##_table[sym]; \ + /* while we are still in node indicies, not decoded symbols */ \ + } while (sym >= LZX_##tbl##_MAXSYMBOLS); \ + } \ + /* result */ \ + (var) = sym; \ + /* look up the code length of that symbol and discard those bits */ \ + i = lzx->tbl##_len[sym]; \ + REMOVE_BITS(i); \ +} while (0) + +/* BUILD_TABLE(tbl) builds a huffman lookup table from code lengths */ +#define BUILD_TABLE(tbl) \ + if (make_decode_table(LZX_##tbl##_MAXSYMBOLS, LZX_##tbl##_TABLEBITS, \ + &lzx->tbl##_len[0], &lzx->tbl##_table[0])) \ + { \ + D(("failed to build %s table", #tbl)) \ + return lzx->error = MSPACK_ERR_DECRUNCH; \ + } + +/* make_decode_table(nsyms, nbits, length[], table[]) + * + * This function was coded by David Tritscher. It builds a fast huffman + * decoding table from a canonical huffman code lengths table. + * + * nsyms = total number of symbols in this huffman tree. + * nbits = any symbols with a code length of nbits or less can be decoded + * in one lookup of the table. + * length = A table to get code lengths from [0 to syms-1] + * table = The table to fill up with decoded symbols and pointers. + * + * Returns 0 for OK or 1 for error + */ + +static int make_decode_table(unsigned int nsyms, unsigned int nbits, + unsigned char *length, unsigned short *table) +{ + register unsigned short sym; + register unsigned int leaf, fill; + register unsigned char bit_num; + unsigned int pos = 0; /* the current position in the decode table */ + unsigned int table_mask = 1 << nbits; + unsigned int bit_mask = table_mask >> 1; /* don't do 0 length codes */ + unsigned int next_symbol = bit_mask; /* base of allocation for long codes */ + + /* fill entries for codes short enough for a direct mapping */ + for (bit_num = 1; bit_num <= nbits; bit_num++) { + for (sym = 0; sym < nsyms; sym++) { + if (length[sym] != bit_num) continue; + leaf = pos; + if((pos += bit_mask) > table_mask) return 1; /* table overrun */ + /* fill all possible lookups of this symbol with the symbol itself */ + for (fill = bit_mask; fill-- > 0;) table[leaf++] = sym; + } + bit_mask >>= 1; + } + + /* full table already? */ + if (pos == table_mask) return 0; + + /* clear the remainder of the table */ + for (sym = pos; sym < table_mask; sym++) table[sym] = 0xFFFF; + + /* allow codes to be up to nbits+16 long, instead of nbits */ + pos <<= 16; + table_mask <<= 16; + bit_mask = 1 << 15; + + for (bit_num = nbits+1; bit_num <= 16; bit_num++) { + for (sym = 0; sym < nsyms; sym++) { + if (length[sym] != bit_num) continue; + + leaf = pos >> 16; + for (fill = 0; fill < bit_num - nbits; fill++) { + /* if this path hasn't been taken yet, 'allocate' two entries */ + if (table[leaf] == 0xFFFF) { + table[(next_symbol << 1)] = 0xFFFF; + table[(next_symbol << 1) + 1] = 0xFFFF; + table[leaf] = next_symbol++; + } + /* follow the path and select either left or right for next bit */ + leaf = table[leaf] << 1; + if ((pos >> (15-fill)) & 1) leaf++; + } + table[leaf] = sym; + + if ((pos += bit_mask) > table_mask) return 1; /* table overflow */ + } + bit_mask >>= 1; + } + + /* full table? */ + if (pos == table_mask) return 0; + + /* either erroneous table, or all elements are 0 - let's find out. */ + for (sym = 0; sym < nsyms; sym++) if (length[sym]) return 1; + return 0; +} + + +/* READ_LENGTHS(tablename, first, last) reads in code lengths for symbols + * first to last in the given table. The code lengths are stored in their + * own special LZX way. + */ +#define READ_LENGTHS(tbl, first, last) do { \ + STORE_BITS; \ + if (lzxd_read_lens(lzx, &lzx->tbl##_len[0], (first), \ + (unsigned int)(last))) return lzx->error; \ + RESTORE_BITS; \ +} while (0) + +static int lzxd_read_lens(struct lzxd_stream *lzx, unsigned char *lens, + unsigned int first, unsigned int last) +{ + /* bit buffer and huffman symbol decode variables */ + register unsigned int bit_buffer; + register int bits_left, i; + register unsigned short sym; + unsigned char *i_ptr, *i_end; + + unsigned int x, y; + int z; + + RESTORE_BITS; + + /* read lengths for pretree (20 symbols, lengths stored in fixed 4 bits) */ + for (x = 0; x < 20; x++) { + READ_BITS(y, 4); + lzx->PRETREE_len[x] = y; + } + BUILD_TABLE(PRETREE); + + for (x = first; x < last; ) { + READ_HUFFSYM(PRETREE, z); + if (z == 17) { + /* code = 17, run of ([read 4 bits]+4) zeros */ + READ_BITS(y, 4); y += 4; + while (y--) lens[x++] = 0; + } + else if (z == 18) { + /* code = 18, run of ([read 5 bits]+20) zeros */ + READ_BITS(y, 5); y += 20; + while (y--) lens[x++] = 0; + } + else if (z == 19) { + /* code = 19, run of ([read 1 bit]+4) [read huffman symbol] */ + READ_BITS(y, 1); y += 4; + READ_HUFFSYM(PRETREE, z); + z = lens[x] - z; if (z < 0) z += 17; + while (y--) lens[x++] = z; + } + else { + /* code = 0 to 16, delta current length entry */ + z = lens[x] - z; if (z < 0) z += 17; + lens[x++] = z; + } + } + + STORE_BITS; + + return MSPACK_ERR_OK; +} + +/* LZX static data tables: + * + * LZX uses 'position slots' to represent match offsets. For every match, + * a small 'position slot' number and a small offset from that slot are + * encoded instead of one large offset. + * + * position_base[] is an index to the position slot bases + * + * extra_bits[] states how many bits of offset-from-base data is needed. + */ +static unsigned int position_base[51]; +static unsigned char extra_bits[51]; + +static void lzxd_static_init(void) { + int i, j; + + for (i = 0, j = 0; i < 51; i += 2) { + extra_bits[i] = j; /* 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7... */ + extra_bits[i+1] = j; + if ((i != 0) && (j < 17)) j++; /* 0,0,1,2,3,4...15,16,17,17,17,17... */ + } + + for (i = 0, j = 0; i < 51; i++) { + position_base[i] = j; /* 0,1,2,3,4,6,8,12,16,24,32,... */ + j += 1 << extra_bits[i]; /* 1,1,1,1,2,2,4,4,8,8,16,16,32,32,... */ + } +} + +static void lzxd_reset_state(struct lzxd_stream *lzx) { + int i; + + lzx->R0 = 1; + lzx->R1 = 1; + lzx->R2 = 1; + lzx->header_read = 0; + lzx->block_remaining = 0; + lzx->block_type = LZX_BLOCKTYPE_INVALID; + + /* initialise tables to 0 (because deltas will be applied to them) */ + for (i = 0; i < LZX_MAINTREE_MAXSYMBOLS; i++) lzx->MAINTREE_len[i] = 0; + for (i = 0; i < LZX_LENGTH_MAXSYMBOLS; i++) lzx->LENGTH_len[i] = 0; +} + +/*-------- main LZX code --------*/ + +struct lzxd_stream *lzxd_init(struct mspack_system *system, + struct mspack_file *input, + struct mspack_file *output, + int window_bits, + int reset_interval, + int input_buffer_size, + off_t output_length) +{ + unsigned int window_size = 1 << window_bits; + struct lzxd_stream *lzx; + + if (!system) return NULL; + + /* LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb) */ + if (window_bits < 15 || window_bits > 21) return NULL; + + input_buffer_size = (input_buffer_size + 1) & -2; + if (!input_buffer_size) return NULL; + + /* initialise static data */ + lzxd_static_init(); + + /* allocate decompression state */ + if (!(lzx = system->alloc(system, sizeof(struct lzxd_stream)))) { + return NULL; + } + + /* allocate decompression window and input buffer */ + lzx->window = system->alloc(system, (size_t) window_size); + lzx->inbuf = system->alloc(system, (size_t) input_buffer_size); + if (!lzx->window || !lzx->inbuf) { + system->free(lzx->window); + system->free(lzx->inbuf); + system->free(lzx); + return NULL; + } + + /* initialise decompression state */ + lzx->sys = system; + lzx->input = input; + lzx->output = output; + lzx->offset = 0; + lzx->length = output_length; + + lzx->inbuf_size = input_buffer_size; + lzx->window_size = 1 << window_bits; + lzx->window_posn = 0; + lzx->frame_posn = 0; + lzx->frame = 0; + lzx->reset_interval = reset_interval; + lzx->intel_filesize = 0; + lzx->intel_curpos = 0; + + /* window bits: 15 16 17 18 19 20 21 + * position slots: 30 32 34 36 38 42 50 */ + lzx->posn_slots = ((window_bits == 21) ? 50 : + ((window_bits == 20) ? 42 : (window_bits << 1))); + lzx->intel_started = 0; + lzx->input_end = 0; + + lzx->error = MSPACK_ERR_OK; + + lzx->i_ptr = lzx->i_end = &lzx->inbuf[0]; + lzx->o_ptr = lzx->o_end = &lzx->e8_buf[0]; + lzx->bit_buffer = lzx->bits_left = 0; + + lzxd_reset_state(lzx); + return lzx; +} + +void lzxd_set_output_length(struct lzxd_stream *lzx, off_t out_bytes) { + if (lzx) lzx->length = out_bytes; +} + +int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) { + /* bitstream reading and huffman variables */ + register unsigned int bit_buffer; + register int bits_left, i=0; + register unsigned short sym; + unsigned char *i_ptr, *i_end; + + int match_length, length_footer, extra, verbatim_bits, bytes_todo; + int this_run, main_element, aligned_bits, j; + unsigned char *window, *runsrc, *rundest, buf[12]; + unsigned int frame_size=0, end_frame, match_offset, window_posn; + unsigned int R0, R1, R2; + + /* easy answers */ + if (!lzx || (out_bytes < 0)) return MSPACK_ERR_ARGS; + if (lzx->error) return lzx->error; + + /* flush out any stored-up bytes before we begin */ + i = lzx->o_end - lzx->o_ptr; + if ((off_t) i > out_bytes) i = (int) out_bytes; + if (i) { + if (lzx->sys->write(lzx->output, lzx->o_ptr, i) != i) { + return lzx->error = MSPACK_ERR_WRITE; + } + lzx->o_ptr += i; + lzx->offset += i; + out_bytes -= i; + } + if (out_bytes == 0) return MSPACK_ERR_OK; + + /* restore local state */ + RESTORE_BITS; + window = lzx->window; + window_posn = lzx->window_posn; + R0 = lzx->R0; + R1 = lzx->R1; + R2 = lzx->R2; + + end_frame = (unsigned int)((lzx->offset + out_bytes) / LZX_FRAME_SIZE) + 1; + + while (lzx->frame < end_frame) { + /* have we reached the reset interval? (if there is one?) */ + if (lzx->reset_interval && ((lzx->frame % lzx->reset_interval) == 0)) { + if (lzx->block_remaining) { + D(("%d bytes remaining at reset interval", lzx->block_remaining)) + return lzx->error = MSPACK_ERR_DECRUNCH; + } + + /* re-read the intel header and reset the huffman lengths */ + lzxd_reset_state(lzx); + } + + /* read header if necessary */ + if (!lzx->header_read) { + /* read 1 bit. if bit=0, intel filesize = 0. + * if bit=1, read intel filesize (32 bits) */ + j = 0; READ_BITS(i, 1); if (i) { READ_BITS(i, 16); READ_BITS(j, 16); } + lzx->intel_filesize = (i << 16) | j; + lzx->header_read = 1; + } + + /* calculate size of frame: all frames are 32k except the final frame + * which is 32kb or less. this can only be calculated when lzx->length + * has been filled in. */ + frame_size = LZX_FRAME_SIZE; + if (lzx->length && (lzx->length - lzx->offset) < (off_t)frame_size) { + frame_size = lzx->length - lzx->offset; + } + + /* decode until one more frame is available */ + bytes_todo = lzx->frame_posn + frame_size - window_posn; + while (bytes_todo > 0) { + /* initialise new block, if one is needed */ + if (lzx->block_remaining == 0) { + /* realign if previous block was an odd-sized UNCOMPRESSED block */ + if ((lzx->block_type == LZX_BLOCKTYPE_UNCOMPRESSED) && + (lzx->block_length & 1)) + { + if (i_ptr == i_end) { + if (lzxd_read_input(lzx)) return lzx->error; + i_ptr = lzx->i_ptr; + i_end = lzx->i_end; + } + i_ptr++; + } + + /* read block type (3 bits) and block length (24 bits) */ + READ_BITS(lzx->block_type, 3); + READ_BITS(i, 16); READ_BITS(j, 8); + lzx->block_remaining = lzx->block_length = (i << 8) | j; + /*D(("new block t%d len %u", lzx->block_type, lzx->block_length))*/ + + /* read individual block headers */ + switch (lzx->block_type) { + case LZX_BLOCKTYPE_ALIGNED: + /* read lengths of and build aligned huffman decoding tree */ + for (i = 0; i < 8; i++) { READ_BITS(j, 3); lzx->ALIGNED_len[i] = j; } + BUILD_TABLE(ALIGNED); + /* no break -- rest of aligned header is same as verbatim */ + case LZX_BLOCKTYPE_VERBATIM: + /* read lengths of and build main huffman decoding tree */ + READ_LENGTHS(MAINTREE, 0, 256); + READ_LENGTHS(MAINTREE, 256, LZX_NUM_CHARS + (lzx->posn_slots << 3)); + BUILD_TABLE(MAINTREE); + /* if the literal 0xE8 is anywhere in the block... */ + if (lzx->MAINTREE_len[0xE8] != 0) lzx->intel_started = 1; + /* read lengths of and build lengths huffman decoding tree */ + READ_LENGTHS(LENGTH, 0, LZX_NUM_SECONDARY_LENGTHS); + BUILD_TABLE(LENGTH); + break; + + case LZX_BLOCKTYPE_UNCOMPRESSED: + /* because we can't assume otherwise */ + lzx->intel_started = 1; + + /* read 1-16 (not 0-15) bits to align to bytes */ + ENSURE_BITS(16); + if (bits_left > 16) i_ptr -= 2; + bits_left = 0; bit_buffer = 0; + + /* read 12 bytes of stored R0 / R1 / R2 values */ + for (rundest = &buf[0], i = 0; i < 12; i++) { + if (i_ptr == i_end) { + if (lzxd_read_input(lzx)) return lzx->error; + i_ptr = lzx->i_ptr; + i_end = lzx->i_end; + } + *rundest++ = *i_ptr++; + } + R0 = buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24); + R1 = buf[4] | (buf[5] << 8) | (buf[6] << 16) | (buf[7] << 24); + R2 = buf[8] | (buf[9] << 8) | (buf[10] << 16) | (buf[11] << 24); + break; + + default: + D(("bad block type")) + return lzx->error = MSPACK_ERR_DECRUNCH; + } + } + + /* decode more of the block: + * run = min(what's available, what's needed) */ + this_run = lzx->block_remaining; + if (this_run > bytes_todo) this_run = bytes_todo; + + /* assume we decode exactly this_run bytes, for now */ + bytes_todo -= this_run; + lzx->block_remaining -= this_run; + + /* decode at least this_run bytes */ + switch (lzx->block_type) { + case LZX_BLOCKTYPE_VERBATIM: + while (this_run > 0) { + READ_HUFFSYM(MAINTREE, main_element); + if (main_element < LZX_NUM_CHARS) { + /* literal: 0 to LZX_NUM_CHARS-1 */ + window[window_posn++] = main_element; + this_run--; + } + else { + /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */ + main_element -= LZX_NUM_CHARS; + + /* get match length */ + match_length = main_element & LZX_NUM_PRIMARY_LENGTHS; + if (match_length == LZX_NUM_PRIMARY_LENGTHS) { + READ_HUFFSYM(LENGTH, length_footer); + match_length += length_footer; + } + match_length += LZX_MIN_MATCH; + + /* get match offset */ + switch ((match_offset = (main_element >> 3))) { + case 0: match_offset = R0; break; + case 1: match_offset = R1; R1=R0; R0 = match_offset; break; + case 2: match_offset = R2; R2=R0; R0 = match_offset; break; + case 3: match_offset = 1; R2=R1; R1=R0; R0 = match_offset; break; + default: + extra = extra_bits[match_offset]; + READ_BITS(verbatim_bits, extra); + match_offset = position_base[match_offset] - 2 + verbatim_bits; + R2 = R1; R1 = R0; R0 = match_offset; + } + + if ((window_posn + match_length) > lzx->window_size) { + D(("match ran over window wrap")) + return lzx->error = MSPACK_ERR_DECRUNCH; + } + + /* copy match */ + rundest = &window[window_posn]; + i = match_length; + /* does match offset wrap the window? */ + if (match_offset > window_posn) { + /* j = length from match offset to end of window */ + j = match_offset - window_posn; + if (j > (int) lzx->window_size) { + D(("match offset beyond window boundaries")) + return lzx->error = MSPACK_ERR_DECRUNCH; + } + runsrc = &window[lzx->window_size - j]; + if (j < i) { + /* if match goes over the window edge, do two copy runs */ + i -= j; while (j-- > 0) *rundest++ = *runsrc++; + runsrc = window; + } + while (i-- > 0) *rundest++ = *runsrc++; + } + else { + runsrc = rundest - match_offset; + while (i-- > 0) *rundest++ = *runsrc++; + } + + this_run -= match_length; + window_posn += match_length; + } + } /* while (this_run > 0) */ + break; + + case LZX_BLOCKTYPE_ALIGNED: + while (this_run > 0) { + READ_HUFFSYM(MAINTREE, main_element); + if (main_element < LZX_NUM_CHARS) { + /* literal: 0 to LZX_NUM_CHARS-1 */ + window[window_posn++] = main_element; + this_run--; + } + else { + /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */ + main_element -= LZX_NUM_CHARS; + + /* get match length */ + match_length = main_element & LZX_NUM_PRIMARY_LENGTHS; + if (match_length == LZX_NUM_PRIMARY_LENGTHS) { + READ_HUFFSYM(LENGTH, length_footer); + match_length += length_footer; + } + match_length += LZX_MIN_MATCH; + + /* get match offset */ + switch ((match_offset = (main_element >> 3))) { + case 0: match_offset = R0; break; + case 1: match_offset = R1; R1 = R0; R0 = match_offset; break; + case 2: match_offset = R2; R2 = R0; R0 = match_offset; break; + default: + extra = extra_bits[match_offset]; + match_offset = position_base[match_offset] - 2; + if (extra > 3) { + /* verbatim and aligned bits */ + extra -= 3; + READ_BITS(verbatim_bits, extra); + match_offset += (verbatim_bits << 3); + READ_HUFFSYM(ALIGNED, aligned_bits); + match_offset += aligned_bits; + } + else if (extra == 3) { + /* aligned bits only */ + READ_HUFFSYM(ALIGNED, aligned_bits); + match_offset += aligned_bits; + } + else if (extra > 0) { /* extra==1, extra==2 */ + /* verbatim bits only */ + READ_BITS(verbatim_bits, extra); + match_offset += verbatim_bits; + } + else /* extra == 0 */ { + /* ??? not defined in LZX specification! */ + match_offset = 1; + } + /* update repeated offset LRU queue */ + R2 = R1; R1 = R0; R0 = match_offset; + } + + if ((window_posn + match_length) > lzx->window_size) { + D(("match ran over window wrap")) + return lzx->error = MSPACK_ERR_DECRUNCH; + } + + /* copy match */ + rundest = &window[window_posn]; + i = match_length; + /* does match offset wrap the window? */ + if (match_offset > window_posn) { + /* j = length from match offset to end of window */ + j = match_offset - window_posn; + if (j > (int) lzx->window_size) { + D(("match offset beyond window boundaries")) + return lzx->error = MSPACK_ERR_DECRUNCH; + } + runsrc = &window[lzx->window_size - j]; + if (j < i) { + /* if match goes over the window edge, do two copy runs */ + i -= j; while (j-- > 0) *rundest++ = *runsrc++; + runsrc = window; + } + while (i-- > 0) *rundest++ = *runsrc++; + } + else { + runsrc = rundest - match_offset; + while (i-- > 0) *rundest++ = *runsrc++; + } + + this_run -= match_length; + window_posn += match_length; + } + } /* while (this_run > 0) */ + break; + + case LZX_BLOCKTYPE_UNCOMPRESSED: + /* as this_run is limited not to wrap a frame, this also means it + * won't wrap the window (as the window is a multiple of 32k) */ + rundest = &window[window_posn]; + window_posn += this_run; + while (this_run > 0) { + if ((i = i_end - i_ptr)) { + if (i > this_run) i = this_run; + lzx->sys->copy(i_ptr, rundest, (size_t) i); + rundest += i; + i_ptr += i; + this_run -= i; + } + else { + if (lzxd_read_input(lzx)) return lzx->error; + i_ptr = lzx->i_ptr; + i_end = lzx->i_end; + } + } + break; + + default: + D(("Default Here.")); + return lzx->error = MSPACK_ERR_DECRUNCH; /* might as well */ + } + + /* did the final match overrun our desired this_run length? */ + if (this_run < 0) { + if ((unsigned int)(-this_run) > lzx->block_remaining) { + D(("overrun went past end of block by %d (%d remaining)", + -this_run, lzx->block_remaining )) + return lzx->error = MSPACK_ERR_DECRUNCH; + } + lzx->block_remaining -= -this_run; + } + } /* while (bytes_todo > 0) */ + + /* streams don't extend over frame boundaries */ + if ((window_posn - lzx->frame_posn) != frame_size) { + D(("decode beyond output frame limits! %d != %d", + window_posn - lzx->frame_posn, frame_size)) + /* Ignored */ +#if 0 + return lzx->error = MSPACK_ERR_DECRUNCH; +#endif + } + + /* re-align input bitstream */ + if (bits_left > 0) ENSURE_BITS(16); + if (bits_left & 15) REMOVE_BITS(bits_left & 15); + + /* check that we've used all of the previous frame first */ + if (lzx->o_ptr != lzx->o_end) { + D(("%d avail bytes, new %d frame", lzx->o_end-lzx->o_ptr, frame_size)) + return lzx->error = MSPACK_ERR_DECRUNCH; + } + + /* does this intel block _really_ need decoding? */ + if (lzx->intel_started && lzx->intel_filesize && + (lzx->frame <= 32768) && (frame_size > 10)) + { + unsigned char *data = &lzx->e8_buf[0]; + unsigned char *dataend = &lzx->e8_buf[frame_size - 10]; + signed int curpos = lzx->intel_curpos; + signed int filesize = lzx->intel_filesize; + signed int abs_off, rel_off; + + /* copy e8 block to the e8 buffer and tweak if needed */ + lzx->o_ptr = data; + lzx->sys->copy(&lzx->window[lzx->frame_posn], data, frame_size); + + while (data < dataend) { + if (*data++ != 0xE8) { curpos++; continue; } + abs_off = data[0] | (data[1]<<8) | (data[2]<<16) | (data[3]<<24); + if ((abs_off >= -curpos) && (abs_off < filesize)) { + rel_off = (abs_off >= 0) ? abs_off - curpos : abs_off + filesize; + data[0] = (unsigned char) rel_off; + data[1] = (unsigned char) (rel_off >> 8); + data[2] = (unsigned char) (rel_off >> 16); + data[3] = (unsigned char) (rel_off >> 24); + } + data += 4; + curpos += 5; + } + lzx->intel_curpos += frame_size; + } + else { + lzx->o_ptr = &lzx->window[lzx->frame_posn]; + if (lzx->intel_filesize) lzx->intel_curpos += frame_size; + } + lzx->o_end = &lzx->o_ptr[frame_size]; + + /* write a frame */ + i = (out_bytes < (off_t)frame_size) ? (unsigned int)out_bytes : frame_size; + if (lzx->sys->write(lzx->output, lzx->o_ptr, i) != i) { + return lzx->error = MSPACK_ERR_WRITE; + } + lzx->o_ptr += i; + lzx->offset += i; + out_bytes -= i; + + /* advance frame start position */ + lzx->frame_posn += frame_size; + lzx->frame++; + + /* wrap window / frame position pointers */ + if (window_posn == lzx->window_size) window_posn = 0; + if (lzx->frame_posn == lzx->window_size) lzx->frame_posn = 0; + + } /* while (lzx->frame < end_frame) */ + + if (out_bytes) { + D(("bytes left to output")) + return lzx->error = MSPACK_ERR_DECRUNCH; + } + + /* store local state */ + STORE_BITS; + lzx->window_posn = window_posn; + lzx->R0 = R0; + lzx->R1 = R1; + lzx->R2 = R2; + + return MSPACK_ERR_OK; +} + +void lzxd_free(struct lzxd_stream *lzx) { + struct mspack_system *sys; + if (lzx) { + sys = lzx->sys; + sys->free(lzx->inbuf); + sys->free(lzx->window); + sys->free(lzx); + } +} diff --git a/src/calibre/utils/lzx/lzxglue.c b/src/calibre/utils/lzx/lzxglue.c new file mode 100644 index 0000000000..7820c68cbf --- /dev/null +++ b/src/calibre/utils/lzx/lzxglue.c @@ -0,0 +1,172 @@ +/*--[lzxglue.c]---------------------------------------------------------------- + | Copyright (C) 2004 DRS + | + | This file is part of the "openclit" library for processing .LIT files. + | + | "Openclit" is free software; you can redistribute it and/or modify + | it under the terms of the GNU General Public License as published by + | the Free Software Foundation; either version 2 of the License, or + | (at your option) any later version. + | + | This program is distributed in the hope that it will be useful, + | but WITHOUT ANY WARRANTY; without even the implied warranty of + | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + | GNU General Public License for more details. + | + | You should have received a copy of the GNU General Public License + | along with this program; if not, write to the Free Software + | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + | + | The GNU General Public License may also be available at the following + | URL: http://www.gnu.org/licenses/gpl.html +*/ + +/* This provides a "glue" between Stuart Caie's libmspack library and the + * Openclit calls to the earlier LZX library. + * + * This way, I should be able to use the files unmodified. + */ +#include +#include +#include "litlib.h" +#include "mspack.h" +#include "lzx.h" + +typedef struct memory_file +{ + unsigned int magic; /* 0xB5 */ + void * buffer; + int total_bytes; + int current_bytes; +} memory_file; + + +void * glue_alloc(struct mspack_system *this, size_t bytes) +{ + void * p; + p = (void *)malloc(bytes); + if (p == NULL) { + lit_error(ERR_R|ERR_LIBC,"Malloc(%d) failed!", bytes); + } + return p; +} + +void glue_free(void * p) +{ + free(p); +} + +void glue_copy(void *src, void *dest, size_t bytes) +{ + memcpy(dest, src, bytes); +} + +struct mspack_file * glue_open(struct mspack_system *this, char *filename, + int mode) +{ + lit_error(0,"MSPACK_OPEN unsupported!"); + return NULL; +} + +void glue_close(struct mspack_file * file) { + return; +} + + +int glue_read(struct mspack_file * file, void * buffer, int bytes) +{ + memory_file * mem; + int remaining; + + mem = (memory_file *)file; + if (mem->magic != 0xB5) return -1; + + remaining = mem->total_bytes - mem->current_bytes; + if (!remaining) return 0; + if (bytes > remaining) bytes = remaining; + memcpy(buffer, (unsigned char *)mem->buffer+mem->current_bytes, bytes); + mem->current_bytes += bytes; + return bytes; +} + +int glue_write(struct mspack_file * file, void * buffer, int bytes) +{ + memory_file * mem; + int remaining; + + mem = (memory_file *)file; + if (mem->magic != 0xB5) return -1; + + remaining = mem->total_bytes - mem->current_bytes; + if (!remaining) return 0; + if (bytes > remaining) { + lit_error(0,"MSPACK_READ tried to write %d bytes, only %d left.", + bytes, remaining); + bytes = remaining; + } + memcpy((unsigned char *)mem->buffer+mem->current_bytes, buffer, bytes); + mem->current_bytes += bytes; + return bytes; +} + +struct mspack_system lzxglue_system = +{ + glue_open, + glue_close, + glue_read, /* Read */ + glue_write, /* Write */ + NULL, /* Seek */ + NULL, /* Tell */ + NULL, /* Message */ + glue_alloc, + glue_free, + glue_copy, + NULL /* Termination */ +}; + +int LZXwindow; +struct lzxd_stream * lzx_stream = NULL; + + +/* Can't really init here,don't know enough */ +int LZXinit(int window) +{ + LZXwindow = window; + lzx_stream = NULL; + + return 0; +} + +/* Doesn't exist. Oh well, reinitialize state every time anyway */ +void LZXreset(void) +{ + return; +} + +int LZXdecompress(unsigned char *inbuf, unsigned char *outbuf, + unsigned int inlen, unsigned int outlen) +{ + int err; + memory_file source; + memory_file dest; + + source.magic = 0xB5; + source.buffer = inbuf; + source.current_bytes = 0; + source.total_bytes = inlen; + + dest.magic = 0xB5; + dest.buffer = outbuf; + dest.current_bytes = 0; + dest.total_bytes = outlen; + + lzx_stream = lzxd_init(&lzxglue_system, (struct mspack_file *)&source, + (struct mspack_file *)&dest, LZXwindow, + 0x7fff /* Never reset, I do it */, 4096, outlen); + err = -1; + if (lzx_stream) err = lzxd_decompress(lzx_stream, outlen); + + lzxd_free(lzx_stream); + lzx_stream = NULL; + return err; +} diff --git a/src/calibre/utils/lzx/lzxmodule.c b/src/calibre/utils/lzx/lzxmodule.c new file mode 100644 index 0000000000..44cc91c11d --- /dev/null +++ b/src/calibre/utils/lzx/lzxmodule.c @@ -0,0 +1,206 @@ +#include + +#include +#include + +static char lzx_doc[] = +"Provide basic LZX decompression using the code from libmspack."; + +static PyObject *LzxError = NULL; + +typedef struct memory_file { + unsigned int magic; /* 0xB5 */ + void * buffer; + int total_bytes; + int current_bytes; +} memory_file; + +void * +glue_alloc(struct mspack_system *this, size_t bytes) +{ + void *p = NULL; + p = (void *)malloc(bytes); + if (p == NULL) { + return (void *)PyErr_NoMemory(); + } + return p; +} + +void +glue_free(void *p) +{ + free(p); +} + +void +glue_copy(void *src, void *dest, size_t bytes) +{ + memcpy(dest, src, bytes); +} + +struct mspack_file * +glue_open(struct mspack_system *this, char *filename, int mode) +{ + PyErr_SetString(LzxError, "MSPACK_OPEN unsupported"); + return NULL; +} + +void +glue_close(struct mspack_file *file) +{ + return; +} + +int +glue_read(struct mspack_file *file, void * buffer, int bytes) +{ + memory_file *mem; + int remaining; + + mem = (memory_file *)file; + if (mem->magic != 0xB5) return -1; + + remaining = mem->total_bytes - mem->current_bytes; + if (!remaining) return 0; + if (bytes > remaining) bytes = remaining; + memcpy(buffer, (unsigned char *)mem->buffer + mem->current_bytes, bytes); + mem->current_bytes += bytes; + + return bytes; +} + +int +glue_write(struct mspack_file * file, void * buffer, int bytes) +{ + memory_file *mem; + int remaining; + + mem = (memory_file *)file; + if (mem->magic != 0xB5) return -1; + + remaining = mem->total_bytes - mem->current_bytes; + if (!remaining) return 0; + if (bytes > remaining) { + PyErr_SetString(LzxError, + "MSPACK_WRITE tried to write beyond end of buffer"); + bytes = remaining; + } + memcpy((unsigned char *)mem->buffer + mem->current_bytes, buffer, bytes); + mem->current_bytes += bytes; + return bytes; +} + +struct mspack_system lzxglue_system = { + glue_open, + glue_close, + glue_read, /* Read */ + glue_write, /* Write */ + NULL, /* Seek */ + NULL, /* Tell */ + NULL, /* Message */ + glue_alloc, + glue_free, + glue_copy, + NULL /* Termination */ +}; + + +int LZXwindow = 0; +struct lzxd_stream * lzx_stream = NULL; + +/* Can't really init here, don't know enough */ +static PyObject * +init(PyObject *self, PyObject *args) +{ + int window = 0; + + if (!PyArg_ParseTuple(args, "i", &window)) { + return NULL; + } + + LZXwindow = window; + lzx_stream = NULL; + + Py_RETURN_NONE; +} + +/* Doesn't exist. Oh well, reinitialize state every time anyway */ +static PyObject * +reset(PyObject *self, PyObject *args) +{ + if (!PyArg_ParseTuple(args, "")) { + return NULL; + } + + Py_RETURN_NONE; +} + +//int LZXdecompress(unsigned char *inbuf, unsigned char *outbuf, +// unsigned int inlen, unsigned int outlen) +static PyObject * +decompress(PyObject *self, PyObject *args) +{ + unsigned char *inbuf; + unsigned char *outbuf; + unsigned int inlen; + unsigned int outlen; + int err; + memory_file source; + memory_file dest; + PyObject *retval = NULL; + + if (!PyArg_ParseTuple(args, "s#I", &inbuf, &inlen, &outlen)) { + return NULL; + } + + retval = PyString_FromStringAndSize(NULL, outlen); + if (retval == NULL) { + return NULL; + } + outbuf = (unsigned char *)PyString_AS_STRING(retval); + + source.magic = 0xB5; + source.buffer = inbuf; + source.current_bytes = 0; + source.total_bytes = inlen; + + dest.magic = 0xB5; + dest.buffer = outbuf; + dest.current_bytes = 0; + dest.total_bytes = outlen; + + lzx_stream = lzxd_init(&lzxglue_system, (struct mspack_file *)&source, + (struct mspack_file *)&dest, LZXwindow, + 0x7fff /* Never reset, I do it */, 4096, outlen); + err = -1; + if (lzx_stream) err = lzxd_decompress(lzx_stream, outlen); + + lzxd_free(lzx_stream); + lzx_stream = NULL; + + if (err != MSPACK_ERR_OK) { + Py_DECREF(retval); + PyErr_SetString(LzxError, "LZX decompression failed"); + } + + return retval; +} + +static PyMethodDef lzx_methods[] = { + { "init", &init, METH_VARARGS, "Initialize the LZX decompressor" }, + { "reset", &reset, METH_VARARGS, "Reset the LZX decompressor" }, + { "decompress", &decompress, METH_VARARGS, "Run the LZX decompressor" }, + { NULL, NULL } +}; + +PyMODINIT_FUNC +initlzx(void) +{ + PyObject *m; + + m = Py_InitModule3("lzx", lzx_methods, lzx_doc); + if (m == NULL) return; + LzxError = PyErr_NewException("lzx.LzxError", NULL, NULL); + Py_INCREF(LzxError); + PyModule_AddObject(m, "LzxError", LzxError); +} diff --git a/src/calibre/utils/lzx/mspack.h b/src/calibre/utils/lzx/mspack.h new file mode 100644 index 0000000000..b48623fed0 --- /dev/null +++ b/src/calibre/utils/lzx/mspack.h @@ -0,0 +1,1482 @@ +/* libmspack -- a library for working with Microsoft compression formats. + * (C) 2003-2004 Stuart Caie + * + * libmspack is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License (LGPL) version 2.1 + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/** \mainpage + * + * \section intro Introduction + * + * libmspack is a library which provides compressors and decompressors, + * archivers and dearchivers for Microsoft compression formats. + * + * \section formats Formats supported + * + * The following file formats are supported: + * - SZDD files, which use LZSS compression + * - KWAJ files, which use LZSS, LZSS+Huffman or deflate compression + * - .HLP (MS Help) files, which use LZSS compression + * - .CAB (MS Cabinet) files, which use deflate, LZX or Quantum compression + * - .CHM (HTML Help) files, which use LZX compression + * - .LIT (MS EBook) files, which use LZX compression and DES encryption + * + * To determine the capabilities of the library, and the binary + * compatibility version of any particular compressor or decompressor, use + * the mspack_version() function. The UNIX library interface version is + * defined as the highest-versioned library component. + * + * \section starting Getting started + * + * The macro MSPACK_SYS_SELFTEST() should be used to ensure the library can + * be used. In particular, it checks if the caller is using 32-bit file I/O + * when the library is compiled for 64-bit file I/O and vice versa. + * + * If compiled normally, the library includes basic file I/O and memory + * management functionality using the standard C library. This can be + * customised and replaced entirely by creating a mspack_system structure. + * + * A compressor or decompressor for the required format must be + * instantiated before it can be used. Each construction function takes + * one parameter, which is either a pointer to a custom mspack_system + * structure, or NULL to use the default. The instantiation returned, if + * not NULL, contains function pointers (methods) to work with the given + * file format. + * + * For compression: + * - mspack_create_cab_compressor() creates a mscab_compressor + * - mspack_create_chm_compressor() creates a mschm_compressor + * - mspack_create_lit_compressor() creates a mslit_compressor + * - mspack_create_hlp_compressor() creates a mshlp_compressor + * - mspack_create_szdd_compressor() creates a msszdd_compressor + * - mspack_create_kwaj_compressor() creates a mskwaj_compressor + * + * For decompression: + * - mspack_create_cab_decompressor() creates a mscab_decompressor + * - mspack_create_chm_decompressor() creates a mschm_decompressor + * - mspack_create_lit_decompressor() creates a mslit_decompressor + * - mspack_create_hlp_decompressor() creates a mshlp_decompressor + * - mspack_create_szdd_decompressor() creates a msszdd_decompressor + * - mspack_create_kwaj_decompressor() creates a mskwaj_decompressor + * + * Once finished working with a format, each kind of + * compressor/decompressor has its own specific destructor: + * - mspack_destroy_cab_compressor() + * - mspack_destroy_cab_decompressor() + * - mspack_destroy_chm_compressor() + * - mspack_destroy_chm_decompressor() + * - mspack_destroy_lit_compressor() + * - mspack_destroy_lit_decompressor() + * - mspack_destroy_hlp_compressor() + * - mspack_destroy_hlp_decompressor() + * - mspack_destroy_szdd_compressor() + * - mspack_destroy_szdd_decompressor() + * - mspack_destroy_kwaj_compressor() + * - mspack_destroy_kwaj_decompressor() + * + * Destroying a compressor or decompressor does not destroy any objects, + * structures or handles that have been created using that compressor or + * decompressor. Ensure that everything created or opened is destroyed or + * closed before compressor/decompressor is itself destroyed. + * + * \section errors Error codes + * + * All compressors and decompressors use the same set of error codes. Most + * methods return an error code directly. For methods which do not + * return error codes directly, the error code can be obtained with the + * last_error() method. + * + * - #MSPACK_ERR_OK is used to indicate success. This error code is defined + * as zero, all other code are non-zero. + * - #MSPACK_ERR_ARGS indicates that a method was called with inappropriate + * arguments. + * - #MSPACK_ERR_OPEN indicates that mspack_system::open() failed. + * - #MSPACK_ERR_READ indicates that mspack_system::read() failed. + * - #MSPACK_ERR_WRITE indicates that mspack_system::write() failed. + * - #MSPACK_ERR_SEEK indicates that mspack_system::seek() failed. + * - #MSPACK_ERR_NOMEMORY indicates that mspack_system::alloc() failed. + * - #MSPACK_ERR_SIGNATURE indicates that the file being read does not + * have the correct "signature". It is probably not a valid file for + * whatever format is being read. + * - #MSPACK_ERR_DATAFORMAT indicates that the file being used or read + * is corrupt. + * - #MSPACK_ERR_CHECKSUM indicates that a data checksum has failed. + * - #MSPACK_ERR_CRUNCH indicates an error occured during compression. + * - #MSPACK_ERR_DECRUNCH indicates an error occured during decompression. + */ + +#ifndef LIB_MSPACK_H +#define LIB_MSPACK_H 1 + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#ifdef _MSC_VER +#include +#else +#include +#endif +/** + * System self-test function, to ensure both library and calling program + * can use one another. + * + * A result of MSPACK_ERR_OK means the library and caller are + * compatible. Any other result indicates that the library and caller are + * not compatible and should not be used. In particular, a value of + * MSPACK_ERR_SEEK means the library and caller use different off_t + * datatypes. + * + * It should be used like so: + * + * @code + * int selftest_result; + * MSPACK_SYS_SELFTEST(selftest_result); + * if (selftest_result != MSPACK_ERR_OK) { + * fprintf(stderr, "incompatible with this build of libmspack\n"); + * exit(0); + * } + * @endcode + * + * @param result an int variable to store the result of the self-test + */ +#define MSPACK_SYS_SELFTEST(result) do { \ + (result) = mspack_sys_selftest_internal(sizeof(off_t)); \ +} while (0) + +/** Part of the MSPACK_SYS_SELFTEST() macro, must not be used directly. */ +extern int mspack_sys_selftest_internal(int); + +/** + * Enquire about the binary compatibility version of a specific interface in + * the library. Currently, the following interfaces are defined: + * + * - #MSPACK_VER_LIBRARY: the overall library + * - #MSPACK_VER_SYSTEM: the mspack_system interface + * - #MSPACK_VER_MSCABD: the mscab_decompressor interface + * - #MSPACK_VER_MSCABC: the mscab_compressor interface + * - #MSPACK_VER_MSCHMD: the mschm_decompressor interface + * - #MSPACK_VER_MSCHMC: the mschm_compressor interface + * - #MSPACK_VER_MSLITD: the mslit_decompressor interface + * - #MSPACK_VER_MSLITC: the mslit_compressor interface + * - #MSPACK_VER_MSHLPD: the mshlp_decompressor interface + * - #MSPACK_VER_MSHLPC: the mshlp_compressor interface + * - #MSPACK_VER_MSSZDDD: the msszdd_decompressor interface + * - #MSPACK_VER_MSSZDDC: the msszdd_compressor interface + * - #MSPACK_VER_MSKWAJD: the mskwaj_decompressor interface + * - #MSPACK_VER_MSKWAJC: the mskwaj_compressor interface + * + * The result of the function should be interpreted as follows: + * - -1: this interface is completely unknown to the library + * - 0: this interface is known, but non-functioning + * - 1: this interface has all basic functionality + * - 2, 3, ...: this interface has additional functionality, clearly marked + * in the documentation as "version 2", "version 3" and so on. + * + * @param interface the interface to request current version of + * @return the version of the requested interface + */ +extern int mspack_version(int interface); + +/** Pass to mspack_version() to get the overall library version */ +#define MSPACK_VER_LIBRARY (0) +/** Pass to mspack_version() to get the mspack_system version */ +#define MSPACK_VER_SYSTEM (1) +/** Pass to mspack_version() to get the mscab_decompressor version */ +#define MSPACK_VER_MSCABD (2) +/** Pass to mspack_version() to get the mscab_compressor version */ +#define MSPACK_VER_MSCABC (3) +/** Pass to mspack_version() to get the mschm_decompressor version */ +#define MSPACK_VER_MSCHMD (4) +/** Pass to mspack_version() to get the mschm_compressor version */ +#define MSPACK_VER_MSCHMC (5) +/** Pass to mspack_version() to get the mslit_decompressor version */ +#define MSPACK_VER_MSLITD (6) +/** Pass to mspack_version() to get the mslit_compressor version */ +#define MSPACK_VER_MSLITC (7) +/** Pass to mspack_version() to get the mshlp_decompressor version */ +#define MSPACK_VER_MSHLPD (8) +/** Pass to mspack_version() to get the mshlp_compressor version */ +#define MSPACK_VER_MSHLPC (9) +/** Pass to mspack_version() to get the msszdd_decompressor version */ +#define MSPACK_VER_MSSZDDD (10) +/** Pass to mspack_version() to get the msszdd_compressor version */ +#define MSPACK_VER_MSSZDDC (11) +/** Pass to mspack_version() to get the mskwaj_decompressor version */ +#define MSPACK_VER_MSKWAJD (12) +/** Pass to mspack_version() to get the mskwaj_compressor version */ +#define MSPACK_VER_MSKWAJC (13) + +/* --- file I/O abstraction ------------------------------------------------ */ + +/** + * A structure which abstracts file I/O and memory management. + * + * The library always uses the mspack_system structure for interaction + * with the file system and to allocate, free and copy all memory. It also + * uses it to send literal messages to the library user. + * + * When the library is compiled normally, passing NULL to a compressor or + * decompressor constructor will result in a default mspack_system being + * used, where all methods are implemented with the standard C library. + * However, all constructors support being given a custom created + * mspack_system structure, with the library user's own methods. This + * allows for more abstract interaction, such as reading and writing files + * directly to memory, or from a network socket or pipe. + * + * Implementors of an mspack_system structure should read all + * documentation entries for every structure member, and write methods + * which conform to those standards. + */ +struct mspack_system { + /** + * Opens a file for reading, writing, appending or updating. + * + * @param this a self-referential pointer to the mspack_system + * structure whose open() method is being called. If + * this pointer is required by close(), read(), write(), + * seek() or tell(), it should be stored in the result + * structure at this time. + * @param filename the file to be opened. It is passed directly from the + * library caller without being modified, so it is up to + * the caller what this parameter actually represents. + * @param mode one of #MSPACK_SYS_OPEN_READ (open an existing file + * for reading), #MSPACK_SYS_OPEN_WRITE (open a new file + * for writing), #MSPACK_SYS_OPEN_UPDATE (open an existing + * file for reading/writing from the start of the file) or + * #MSPACK_SYS_OPEN_APPEND (open an existing file for + * reading/writing from the end of the file) + * @return a pointer to a mspack_file structure. This structure officially + * contains no members, its true contents are up to the + * mspack_system implementor. It should contain whatever is needed + * for other mspack_system methods to operate. + * @see close(), read(), write(), seek(), tell(), message() + */ + struct mspack_file * (*open)(struct mspack_system *this, + char *filename, + int mode); + + /** + * Closes a previously opened file. If any memory was allocated for this + * particular file handle, it should be freed at this time. + * + * @param file the file to close + * @see open() + */ + void (*close)(struct mspack_file *file); + + /** + * Reads a given number of bytes from an open file. + * + * @param file the file to read from + * @param buffer the location where the read bytes should be stored + * @param bytes the number of bytes to read from the file. + * @return the number of bytes successfully read (this can be less than + * the number requested), zero to mark the end of file, or less + * than zero to indicate an error. + * @see open(), write() + */ + int (*read)(struct mspack_file *file, + void *buffer, + int bytes); + + /** + * Writes a given number of bytes to an open file. + * + * @param file the file to write to + * @param buffer the location where the written bytes should be read from + * @param bytes the number of bytes to write to the file. + * @return the number of bytes successfully written, this can be less + * than the number requested. Zero or less can indicate an error + * where no bytes at all could be written. All cases where less + * bytes were written than requested are considered by the library + * to be an error. + * @see open(), read() + */ + int (*write)(struct mspack_file *file, + void *buffer, + int bytes); + + /** + * Seeks to a specific file offset within an open file. + * + * Sometimes the library needs to know the length of a file. It does + * this by seeking to the end of the file with seek(file, 0, + * MSPACK_SYS_SEEK_END), then calling tell(). Implementations may want + * to make a special case for this. + * + * Due to the potentially varying 32/64 bit datatype off_t on some + * architectures, the #MSPACK_SYS_SELFTEST macro MUST be used before + * using the library. If not, the error caused by the library passing an + * inappropriate stackframe to seek() is subtle and hard to trace. + * + * @param file the file to be seeked + * @param offset an offset to seek, measured in bytes + * @param mode one of #MSPACK_SYS_SEEK_START (the offset should be + * measured from the start of the file), #MSPACK_SYS_SEEK_CUR + * (the offset should be measured from the current file offset) + * or #MSPACK_SYS_SEEK_END (the offset should be measured from + * the end of the file) + * @return zero for success, non-zero for an error + * @see open(), tell() + */ + int (*seek)(struct mspack_file *file, + off_t offset, + int mode); + + /** + * Returns the current file position (in bytes) of the given file. + * + * @param file the file whose file position is wanted + * @return the current file position of the file + * @see open(), seek() + */ + off_t (*tell)(struct mspack_file *file); + + /** + * Used to send messages from the library to the user. + * + * Occasionally, the library generates warnings or other messages in + * plain english to inform the human user. These are informational only + * and can be ignored if not wanted. + * + * @param file may be a file handle returned from open() if this message + * pertains to a specific open file, or NULL if not related to + * a specific file. + * @param format a printf() style format string. It does NOT include a + * trailing newline. + * @see open() + */ + void (*message)(struct mspack_file *file, + char *format, + ...); + + /** + * Allocates memory. + * + * @param this a self-referential pointer to the mspack_system + * structure whose alloc() method is being called. + * @param bytes the number of bytes to allocate + * @result a pointer to the requested number of bytes, or NULL if + * not enough memory is available + * @see free() + */ + void * (*alloc)(struct mspack_system *this, + size_t bytes); + + /** + * Frees memory. + * + * @param ptr the memory to be freed. + * @see alloc() + */ + void (*free)(void *ptr); + + /** + * Copies from one region of memory to another. + * + * The regions of memory are guaranteed not to overlap, are usually less + * than 256 bytes, and may not be aligned. Please note that the source + * parameter comes before the destination parameter, unlike the standard + * C function memcpy(). + * + * @param src the region of memory to copy from + * @param dest the region of memory to copy to + * @param bytes the size of the memory region, in bytes + */ + void (*copy)(void *src, + void *dest, + size_t bytes); + + /** + * A null pointer to mark the end of mspack_system. It must equal NULL. + * + * Should the mspack_system structure extend in the future, this NULL + * will be seen, rather than have an invalid method pointer called. + */ + void *null_ptr; +}; + +/** mspack_system::open() mode: open existing file for reading. */ +#define MSPACK_SYS_OPEN_READ (0) +/** mspack_system::open() mode: open new file for writing */ +#define MSPACK_SYS_OPEN_WRITE (1) +/** mspack_system::open() mode: open existing file for writing */ +#define MSPACK_SYS_OPEN_UPDATE (2) +/** mspack_system::open() mode: open existing file for writing */ +#define MSPACK_SYS_OPEN_APPEND (3) + +/** mspack_system::seek() mode: seek relative to start of file */ +#define MSPACK_SYS_SEEK_START (0) +/** mspack_system::seek() mode: seek relative to current offset */ +#define MSPACK_SYS_SEEK_CUR (1) +/** mspack_system::seek() mode: seek relative to end of file */ +#define MSPACK_SYS_SEEK_END (2) + +/** + * A structure which represents an open file handle. The contents of this + * structure are determined by the implementation of the + * mspack_system::open() method. + */ +struct mspack_file { + int dummy; +}; + +/* --- error codes --------------------------------------------------------- */ + +/** Error code: no error */ +#define MSPACK_ERR_OK (0) +/** Error code: bad arguments to method */ +#define MSPACK_ERR_ARGS (1) +/** Error code: error opening file */ +#define MSPACK_ERR_OPEN (2) +/** Error code: error reading file */ +#define MSPACK_ERR_READ (3) +/** Error code: error writing file */ +#define MSPACK_ERR_WRITE (4) +/** Error code: seek error */ +#define MSPACK_ERR_SEEK (5) +/** Error code: out of memory */ +#define MSPACK_ERR_NOMEMORY (6) +/** Error code: bad "magic id" in file */ +#define MSPACK_ERR_SIGNATURE (7) +/** Error code: bad or corrupt file format */ +#define MSPACK_ERR_DATAFORMAT (8) +/** Error code: bad checksum or CRC */ +#define MSPACK_ERR_CHECKSUM (9) +/** Error code: error during compression */ +#define MSPACK_ERR_CRUNCH (10) +/** Error code: error during decompression */ +#define MSPACK_ERR_DECRUNCH (11) + +/* --- functions available in library -------------------------------------- */ + +/** Creates a new CAB compressor. + * @param sys a custom mspack_system structure, or NULL to use the default + * @return a #mscab_compressor or NULL + */ +extern struct mscab_compressor * + mspack_create_cab_compressor(struct mspack_system *sys); + +/** Creates a new CAB decompressor. + * @param sys a custom mspack_system structure, or NULL to use the default + * @return a #mscab_decompressor or NULL + */ +extern struct mscab_decompressor * + mspack_create_cab_decompressor(struct mspack_system *sys); + +/** Destroys an existing CAB compressor. + * @param this the #mscab_compressor to destroy + */ +extern void mspack_destroy_cab_compressor(struct mscab_compressor *this); + +/** Destroys an existing CAB decompressor. + * @param this the #mscab_decompressor to destroy + */ +extern void mspack_destroy_cab_decompressor(struct mscab_decompressor *this); + + +/** Creates a new CHM compressor. + * @param sys a custom mspack_system structure, or NULL to use the default + * @return a #mschm_compressor or NULL + */ +extern struct mschm_compressor * + mspack_create_chm_compressor(struct mspack_system *sys); + +/** Creates a new CHM decompressor. + * @param sys a custom mspack_system structure, or NULL to use the default + * @return a #mschm_decompressor or NULL + */ +extern struct mschm_decompressor * + mspack_create_chm_decompressor(struct mspack_system *sys); + +/** Destroys an existing CHM compressor. + * @param this the #mschm_compressor to destroy + */ +extern void mspack_destroy_chm_compressor(struct mschm_compressor *this); + +/** Destroys an existing CHM decompressor. + * @param this the #mschm_decompressor to destroy + */ +extern void mspack_destroy_chm_decompressor(struct mschm_decompressor *this); + + +/** Creates a new LIT compressor. + * @param sys a custom mspack_system structure, or NULL to use the default + * @return a #mslit_compressor or NULL + */ +extern struct mslit_compressor * + mspack_create_lit_compressor(struct mspack_system *sys); + +/** Creates a new LIT decompressor. + * @param sys a custom mspack_system structure, or NULL to use the default + * @return a #mslit_decompressor or NULL + */ +extern struct mslit_decompressor * + mspack_create_lit_decompressor(struct mspack_system *sys); + +/** Destroys an existing LIT compressor. + * @param this the #mslit_compressor to destroy + */ +extern void mspack_destroy_lit_compressor(struct mslit_compressor *this); + +/** Destroys an existing LIT decompressor. + * @param this the #mslit_decompressor to destroy + */ +extern void mspack_destroy_lit_decompressor(struct mslit_decompressor *this); + + +/** Creates a new HLP compressor. + * @param sys a custom mspack_system structure, or NULL to use the default + * @return a #mshlp_compressor or NULL + */ +extern struct mshlp_compressor * + mspack_create_hlp_compressor(struct mspack_system *sys); + +/** Creates a new HLP decompressor. + * @param sys a custom mspack_system structure, or NULL to use the default + * @return a #mshlp_decompressor or NULL + */ +extern struct mshlp_decompressor * + mspack_create_hlp_decompressor(struct mspack_system *sys); + +/** Destroys an existing hlp compressor. + * @param this the #mshlp_compressor to destroy + */ +extern void mspack_destroy_hlp_compressor(struct mshlp_compressor *this); + +/** Destroys an existing hlp decompressor. + * @param this the #mshlp_decompressor to destroy + */ +extern void mspack_destroy_hlp_decompressor(struct mshlp_decompressor *this); + + +/** Creates a new SZDD compressor. + * @param sys a custom mspack_system structure, or NULL to use the default + * @return a #msszdd_compressor or NULL + */ +extern struct msszdd_compressor * + mspack_create_szdd_compressor(struct mspack_system *sys); + +/** Creates a new SZDD decompressor. + * @param sys a custom mspack_system structure, or NULL to use the default + * @return a #msszdd_decompressor or NULL + */ +extern struct msszdd_decompressor * + mspack_create_szdd_decompressor(struct mspack_system *sys); + +/** Destroys an existing SZDD compressor. + * @param this the #msszdd_compressor to destroy + */ +extern void mspack_destroy_szdd_compressor(struct msszdd_compressor *this); + +/** Destroys an existing SZDD decompressor. + * @param this the #msszdd_decompressor to destroy + */ +extern void mspack_destroy_szdd_decompressor(struct msszdd_decompressor *this); + + +/** Creates a new KWAJ compressor. + * @param sys a custom mspack_system structure, or NULL to use the default + * @return a #mskwaj_compressor or NULL + */ +extern struct mskwaj_compressor * + mspack_create_kwaj_compressor(struct mspack_system *sys); + +/** Creates a new KWAJ decompressor. + * @param sys a custom mspack_system structure, or NULL to use the default + * @return a #mskwaj_decompressor or NULL + */ +extern struct mskwaj_decompressor * + mspack_create_kwaj_decompressor(struct mspack_system *sys); + +/** Destroys an existing KWAJ compressor. + * @param this the #mskwaj_compressor to destroy + */ +extern void mspack_destroy_kwaj_compressor(struct mskwaj_compressor *this); + +/** Destroys an existing KWAJ decompressor. + * @param this the #mskwaj_decompressor to destroy + */ +extern void mspack_destroy_kwaj_decompressor(struct mskwaj_decompressor *this); + + +/* --- support for .CAB (MS Cabinet) file format --------------------------- */ + +/** + * A structure which represents a single cabinet file. + * + * All fields are READ ONLY. + * + * If this cabinet is part of a merged cabinet set, the #files and #folders + * fields are common to all cabinets in the set, and will be identical. + * + * @see mscab_decompressor::open(), mscab_decompressor::close(), + * mscab_decompressor::search() + */ +struct mscabd_cabinet { + /** + * The next cabinet in a chained list, if this cabinet was opened with + * mscab_decompressor::search(). May be NULL to mark the end of the + * list. + */ + struct mscabd_cabinet *next; + + /** + * The filename of the cabinet. More correctly, the filename of the + * physical file that the cabinet resides in. This is given by the + * library user and may be in any format. + */ + char *filename; + + /** The file offset of cabinet within the physical file it resides in. */ + off_t base_offset; + + /** The length of the cabinet file in bytes. */ + unsigned int length; + + /** The previous cabinet in a cabinet set, or NULL. */ + struct mscabd_cabinet *prevcab; + + /** The next cabinet in a cabinet set, or NULL. */ + struct mscabd_cabinet *nextcab; + + /** The filename of the previous cabinet in a cabinet set, or NULL. */ + char *prevname; + + /** The filename of the next cabinet in a cabinet set, or NULL. */ + char *nextname; + + /** The name of the disk containing the previous cabinet in a cabinet + * set, or NULL. + */ + char *previnfo; + + /** The name of the disk containing the next cabinet in a cabinet set, + * or NULL. + */ + char *nextinfo; + + /** A list of all files in the cabinet or cabinet set. */ + struct mscabd_file *files; + + /** A list of all folders in the cabinet or cabinet set. */ + struct mscabd_folder *folders; + + /** + * The set ID of the cabinet. All cabinets in the same set should have + * the same set ID. + */ + unsigned short set_id; + + /** + * The index number of the cabinet within the set. Numbering should + * start from 0 for the first cabinet in the set, and increment by 1 for + * each following cabinet. + */ + unsigned short set_index; + + /** + * The number of bytes reserved in the header area of the cabinet. + * + * If this is non-zero and flags has MSCAB_HDR_RESV set, this data can + * be read by the calling application. It is of the given length, + * located at offset (base_offset + MSCAB_HDR_RESV_OFFSET) in the + * cabinet file. + * + * @see flags + */ + unsigned short header_resv; + + /** + * Header flags. + * + * - MSCAB_HDR_PREVCAB indicates the cabinet is part of a cabinet set, and + * has a predecessor cabinet. + * - MSCAB_HDR_NEXTCAB indicates the cabinet is part of a cabinet set, and + * has a successor cabinet. + * - MSCAB_HDR_RESV indicates the cabinet has reserved header space. + * + * @see prevname, previnfo, nextname, nextinfo, header_resv + */ + int flags; +}; + +/** Offset from start of cabinet to the reserved header data (if present). */ +#define MSCAB_HDR_RESV_OFFSET (0x28) + +/** Cabinet header flag: cabinet has a predecessor */ +#define MSCAB_HDR_PREVCAB (0x01) +/** Cabinet header flag: cabinet has a successor */ +#define MSCAB_HDR_NEXTCAB (0x02) +/** Cabinet header flag: cabinet has reserved header space */ +#define MSCAB_HDR_RESV (0x04) + +/** + * A structure which represents a single folder in a cabinet or cabinet set. + * + * All fields are READ ONLY. + * + * A folder is a single compressed stream of data. When uncompressed, it + * holds the data of one or more files. A folder may be split across more + * than one cabinet. + */ +struct mscabd_folder { + /** + * A pointer to the next folder in this cabinet or cabinet set, or NULL + * if this is the final folder. + */ + struct mscabd_folder *next; + + /** + * The compression format used by this folder. + * + * The macro MSCABD_COMP_METHOD() should be used on this field to get + * the algorithm used. The macro MSCABD_COMP_LEVEL() should be used to get + * the "compression level". + * + * @see MSCABD_COMP_METHOD(), MSCABD_COMP_LEVEL() + */ + int comp_type; + + /** + * The total number of data blocks used by this folder. This includes + * data blocks present in other files, if this folder spans more than + * one cabinet. + */ + unsigned int num_blocks; +}; + +/** + * Returns the compression method used by a folder. + * + * @param comp_type a mscabd_folder::comp_type value + * @return one of #MSCAB_COMP_NONE, #MSCAB_COMP_MSZIP, #MSCAB_COMP_QUANTUM + * or #MSCAB_COMP_LZX + */ +#define MSCABD_COMP_METHOD(comp_type) ((comp_type) & 0x0F) +/** + * Returns the compression level used by a folder. + * + * @param comp_type a mscabd_folder::comp_type value + * @return the compression level. This is only defined by LZX and Quantum + * compression + */ +#define MSCABD_COMP_LEVEL(comp_type) (((comp_type) >> 8) & 0x1F) + +/** Compression mode: no compression. */ +#define MSCAB_COMP_NONE (0) +/** Compression mode: MSZIP (deflate) compression. */ +#define MSCAB_COMP_MSZIP (1) +/** Compression mode: Quantum compression */ +#define MSCAB_COMP_QUANTUM (2) +/** Compression mode: LZX compression */ +#define MSCAB_COMP_LZX (3) + +/** + * A structure which represents a single file in a cabinet or cabinet set. + * + * All fields are READ ONLY. + */ +struct mscabd_file { + /** + * The next file in the cabinet or cabinet set, or NULL if this is the + * final file. + */ + struct mscabd_file *next; + + /** + * The filename of the file. + * + * A null terminated string of up to 255 bytes in length, it may be in + * either ISO-8859-1 or UTF8 format, depending on the file attributes. + * + * @see attribs + */ + char *filename; + + /** The uncompressed length of the file, in bytes. */ + unsigned int length; + + /** + * File attributes. + * + * The following attributes are defined: + * - #MSCAB_ATTRIB_RDONLY indicates the file is write protected. + * - #MSCAB_ATTRIB_HIDDEN indicates the file is hidden. + * - #MSCAB_ATTRIB_SYSTEM indicates the file is a operating system file. + * - #MSCAB_ATTRIB_ARCH indicates the file is "archived". + * - #MSCAB_ATTRIB_EXEC indicates the file is an executable program. + * - #MSCAB_ATTRIB_UTF_NAME indicates the filename is in UTF8 format rather + * than ISO-8859-1. + */ + int attribs; + + /** File's last modified time, hour field. */ + char time_h; + /** File's last modified time, minute field. */ + char time_m; + /** File's last modified time, second field. */ + char time_s; + + /** File's last modified date, day field. */ + char date_d; + /** File's last modified date, month field. */ + char date_m; + /** File's last modified date, year field. */ + int date_y; + + /** A pointer to the folder that contains this file. */ + struct mscabd_folder *folder; + + /** The uncompressed offset of this file in its folder. */ + unsigned int offset; +}; + +/** mscabd_file::attribs attribute: file is read-only. */ +#define MSCAB_ATTRIB_RDONLY (0x01) +/** mscabd_file::attribs attribute: file is hidden. */ +#define MSCAB_ATTRIB_HIDDEN (0x02) +/** mscabd_file::attribs attribute: file is an operating system file. */ +#define MSCAB_ATTRIB_SYSTEM (0x04) +/** mscabd_file::attribs attribute: file is "archived". */ +#define MSCAB_ATTRIB_ARCH (0x20) +/** mscabd_file::attribs attribute: file is an executable program. */ +#define MSCAB_ATTRIB_EXEC (0x40) +/** mscabd_file::attribs attribute: filename is UTF8, not ISO-8859-1. */ +#define MSCAB_ATTRIB_UTF_NAME (0x80) + +/** mscab_decompressor::set_param() parameter: search buffer size. */ +#define MSCABD_PARAM_SEARCHBUF (0) +/** mscab_decompressor::set_param() parameter: repair MS-ZIP streams? */ +#define MSCABD_PARAM_FIXMSZIP (1) +/** mscab_decompressor::set_param() parameter: size of decompression buffer */ +#define MSCABD_PARAM_DECOMPBUF (2) + +/** TODO */ +struct mscab_compressor { + int dummy; +}; + +/** + * A decompressor for .CAB (Microsoft Cabinet) files + * + * All fields are READ ONLY. + * + * @see mspack_create_cab_decompressor(), mspack_destroy_cab_decompressor() + */ +struct mscab_decompressor { + /** + * Opens a cabinet file and reads its contents. + * + * If the file opened is a valid cabinet file, all headers will be read + * and a mscabd_cabinet structure will be returned, with a full list of + * folders and files. + * + * In the case of an error occuring, NULL is returned and the error code + * is available from last_error(). + * + * The filename pointer should be considered "in use" until close() is + * called on the cabinet. + * + * @param this a self-referential pointer to the mscab_decompressor + * instance being called + * @param filename the filename of the cabinet file. This is passed + * directly to mspack_system::open(). + * @return a pointer to a mscabd_cabinet structure, or NULL on failure + * @see close(), search(), last_error() + */ + struct mscabd_cabinet * (*open) (struct mscab_decompressor *this, + char *filename); + + /** + * Closes a previously opened cabinet or cabinet set. + * + * This closes a cabinet, all cabinets associated with it via the + * mscabd_cabinet::next, mscabd_cabinet::prevcab and + * mscabd_cabinet::nextcab pointers, and all folders and files. All + * memory used by these entities is freed. + * + * The cabinet pointer is now invalid and cannot be used again. All + * mscabd_folder and mscabd_file pointers from that cabinet or cabinet + * set are also now invalid, and cannot be used again. + * + * If the cabinet pointer given was created using search(), it MUST be + * the cabinet pointer returned by search() and not one of the later + * cabinet pointers further along the mscabd_cabinet::next chain. + + * If extra cabinets have been added using append() or prepend(), these + * will all be freed, even if the cabinet pointer given is not the first + * cabinet in the set. Do NOT close() more than one cabinet in the set. + * + * The mscabd_cabinet::filename is not freed by the library, as it is + * not allocated by the library. The caller should free this itself if + * necessary, before it is lost forever. + * + * @param this a self-referential pointer to the mscab_decompressor + * instance being called + * @param cab the cabinet to close + * @see open(), search(), append(), prepend() + */ + void (*close)(struct mscab_decompressor *this, + struct mscabd_cabinet *cab); + + /** + * Searches a regular file for embedded cabinets. + * + * This opens a normal file with the given filename and will search the + * entire file for embedded cabinet files + * + * If any cabinets are found, the equivalent of open() is called on each + * potential cabinet file at the offset it was found. All successfully + * open()ed cabinets are kept in a list. + * + * The first cabinet found will be returned directly as the result of + * this method. Any further cabinets found will be chained in a list + * using the mscabd_cabinet::next field. + * + * In the case of an error occuring anywhere other than the simulated + * open(), NULL is returned and the error code is available from + * last_error(). + * + * If no error occurs, but no cabinets can be found in the file, NULL is + * returned and last_error() returns MSPACK_ERR_OK. + * + * The filename pointer should be considered in use until close() is + * called on the cabinet. + * + * close() should only be called on the result of search(), not on any + * subsequent cabinets in the mscabd_cabinet::next chain. + * + * @param this a self-referential pointer to the mscab_decompressor + * instance being called + * @param filename the filename of the file to search for cabinets. This + * is passed directly to mspack_system::open(). + * @return a pointer to a mscabd_cabinet structure, or NULL + * @see close(), open(), last_error() + */ + struct mscabd_cabinet * (*search) (struct mscab_decompressor *this, + char *filename); + + /** + * Appends one mscabd_cabinet to another, forming or extending a cabinet + * set. + * + * This will attempt to append one cabinet to another such that + * (cab->nextcab == nextcab) && (nextcab->prevcab == cab) and + * any folders split between the two cabinets are merged. + * + * The cabinets MUST be part of a cabinet set -- a cabinet set is a + * cabinet that spans more than one physical cabinet file on disk -- and + * must be appropriately matched. + * + * It can be determined if a cabinet has further parts to load by + * examining the mscabd_cabinet::flags field: + * + * - if (flags & MSCAB_HDR_PREVCAB) is non-zero, there is a + * predecessor cabinet to open() and prepend(). Its MS-DOS + * case-insensitive filename is mscabd_cabinet::prevname + * - if (flags & MSCAB_HDR_NEXTCAB) is non-zero, there is a + * successor cabinet to open() and append(). Its MS-DOS case-insensitive + * filename is mscabd_cabinet::nextname + * + * If the cabinets do not match, an error code will be returned. Neither + * cabinet has been altered, and both should be closed seperately. + * + * Files and folders in a cabinet set are a single entity. All cabinets + * in a set use the same file list, which is updated as cabinets in the + * set are added. All pointers to mscabd_folder and mscabd_file + * structures in either cabinet must be discarded and re-obtained after + * merging. + * + * @param this a self-referential pointer to the mscab_decompressor + * instance being called + * @param cab the cabinet which will be appended to, + * predecessor of nextcab + * @param nextcab the cabinet which will be appended, + * successor of cab + * @return an error code, or MSPACK_ERR_OK if successful + * @see prepend(), open(), close() + */ + int (*append) (struct mscab_decompressor *this, + struct mscabd_cabinet *cab, + struct mscabd_cabinet *nextcab); + + /** + * Prepends one mscabd_cabinet to another, forming or extending a + * cabinet set. + * + * This will attempt to prepend one cabinet to another, such that + * (cab->prevcab == prevcab) && (prevcab->nextcab == cab). In + * all other respects, it is identical to append(). See append() for the + * full documentation. + * + * @param this a self-referential pointer to the mscab_decompressor + * instance being called + * @param cab the cabinet which will be prepended to, + * successor of prevcab + * @param prevcab the cabinet which will be prepended, + * predecessor of cab + * @return an error code, or MSPACK_ERR_OK if successful + * @see append(), open(), close() + */ + int (*prepend) (struct mscab_decompressor *this, + struct mscabd_cabinet *cab, + struct mscabd_cabinet *prevcab); + + /** + * Extracts a file from a cabinet or cabinet set. + * + * This extracts a compressed file in a cabinet and writes it to the given + * filename. + * + * The MS-DOS filename of the file, mscabd_file::filename, is NOT USED + * by extract(). The caller must examine this MS-DOS filename, copy and + * change it as necessary, create directories as necessary, and provide + * the correct filename as a parameter, which will be passed unchanged + * to the decompressor's mspack_system::open() + * + * If the file belongs to a split folder in a multi-part cabinet set, + * and not enough parts of the cabinet set have been loaded and appended + * or prepended, an error will be returned immediately. + * + * @param this a self-referential pointer to the mscab_decompressor + * instance being called + * @param file the file to be decompressed + * @param filename the filename of the file being written to + * @return an error code, or MSPACK_ERR_OK if successful + */ + int (*extract)(struct mscab_decompressor *this, + struct mscabd_file *file, + char *filename); + + /** + * Sets a CAB decompression engine parameter. + * + * The following parameters are defined: + * - #MSCABD_PARAM_SEARCHBUF: How many bytes should be allocated as a + * buffer when using search()? The minimum value is 4. The default + * value is 32768. + * - #MSCABD_PARAM_FIXMSZIP: If non-zero, extract() will ignore bad + * checksums and recover from decompression errors in MS-ZIP + * compressed folders. The default value is 0 (don't recover). + * - #MSCABD_PARAM_DECOMPBUF: How many bytes should be used as an input + * bit buffer by decompressors? The minimum value is 4. The default + * value is 4096. + * + * @param this a self-referential pointer to the mscab_decompressor + * instance being called + * @param param the parameter to set + * @param value the value to set the parameter to + * @return MSPACK_ERR_OK if all is OK, or MSPACK_ERR_ARGS if there + * is a problem with either parameter or value. + * @see search(), extract() + */ + int (*set_param)(struct mscab_decompressor *this, + int param, + int value); + + /** + * Returns the error code set by the most recently called method. + * + * This is useful for open() and search(), which do not return an error + * code directly. + * + * @param this a self-referential pointer to the mscab_decompressor + * instance being called + * @return the most recent error code + * @see open(), search() + */ + int (*last_error)(struct mscab_decompressor *); +}; + +/* --- support for .CHM (HTMLHelp) file format ----------------------------- */ + +/** + * A structure which represents a section of a CHM helpfile. + * + * All fields are READ ONLY. + * + * Not used directly, but used as a generic base type for + * mschmd_sec_uncompressed and mschmd_sec_mscompressed. + */ +struct mschmd_section { + /** A pointer to the CHM helpfile that contains this section. */ + struct mschmd_header *chm; + + /** + * The section ID. Either 0 for the uncompressed section + * mschmd_sec_uncompressed, or 1 for the LZX compressed section + * mschmd_sec_mscompressed. No other section IDs are known. + */ + unsigned int id; +}; + +/** + * A structure which represents the uncompressed section of a CHM helpfile. + * + * All fields are READ ONLY. + */ +struct mschmd_sec_uncompressed { + /** Generic section data. */ + struct mschmd_section base; + + /** The file offset of where this section begins in the CHM helpfile. */ + off_t offset; +}; + +/** + * A structure which represents the compressed section of a CHM helpfile. + * + * All fields are READ ONLY. + */ +struct mschmd_sec_mscompressed { + /** Generic section data. */ + struct mschmd_section base; + + /** A pointer to the meta-file which represents all LZX compressed data. */ + struct mschmd_file *content; + + /** A pointer to the file which contains the LZX control data. */ + struct mschmd_file *control; + + /** A pointer to the file which contains the LZX reset table. */ + struct mschmd_file *rtable; +}; + +/** + * A structure which represents a CHM helpfile. + * + * All fields are READ ONLY. + */ +struct mschmd_header { + /** The version of the CHM file format used in this file. */ + unsigned int version; + + /** + * The "timestamp" of the CHM helpfile. + * + * It is the lower 32 bits of a 64-bit value representing the number of + * centiseconds since 1601-01-01 00:00:00 UTC, plus 42. It is not useful + * as a timestamp, but it is useful as a semi-unique ID. + */ + unsigned int timestamp; + + + /** + * The default Language and Country ID (LCID) of the user who ran the + * HTMLHelp Compiler. This is not the language of the CHM file itself. + */ + unsigned int language; + + /** + * The filename of the CHM helpfile. This is given by the library user + * and may be in any format. + */ + char *filename; + + /** The length of the CHM helpfile, in bytes. */ + off_t length; + + /** A list of all non-system files in the CHM helpfile. */ + struct mschmd_file *files; + + /** + * A list of all system files in the CHM helpfile. + * + * System files are files which begin with "::". They are meta-files + * generated by the CHM creation process. + */ + struct mschmd_file *sysfiles; + + /** The section 0 (uncompressed) data in this CHM helpfile. */ + struct mschmd_sec_uncompressed sec0; + + /** The section 1 (MSCompressed) data in this CHM helpfile. */ + struct mschmd_sec_mscompressed sec1; + + /** The file offset of the first PMGL/PMGI directory chunk. */ + off_t dir_offset; + + /** The number of PMGL/PMGI directory chunks in this CHM helpfile. */ + unsigned int num_chunks; + + /** The size of each PMGL/PMGI chunk, in bytes. */ + unsigned int chunk_size; + + /** The "density" of the quick-reference section in PMGL/PMGI chunks. */ + unsigned int density; + + /** The depth of the index tree. + * + * - if 1, there are no PMGI chunks, only PMGL chunks. + * - if 2, there is 1 PMGI chunk. All chunk indices point to PMGL chunks. + * - if 3, the root PMGI chunk points to secondary PMGI chunks, which in + * turn point to PMGL chunks. + * - and so on... + */ + unsigned int depth; + + /** + * The number of the root PGMI chunk. + * + * If there is no index in the CHM helpfile, this will be 0xFFFFFFFF. + */ + unsigned int index_root; +}; + +/** + * A structure which represents a file stored in a CHM helpfile. + * + * All fields are READ ONLY. + */ +struct mschmd_file { + /** + * A pointer to the next file in the list, or NULL if this is the final + * file. + */ + struct mschmd_file *next; + + /** + * A pointer to the section that this file is located in. Indirectly, + * it also points to the CHM helpfile the file is located in. + */ + struct mschmd_section *section; + + /** The offset within the section data that this file is located at. */ + off_t offset; + + /** The length of this file, in bytes */ + off_t length; + + /** The filename of this file -- a null terminated string in UTF8. */ + char *filename; +}; + +/** TODO */ +struct mschm_compressor { + int dummy; +}; + +/** + * A decompressor for .CHM (Microsoft HTMLHelp) files + * + * All fields are READ ONLY. + * + * @see mspack_create_chm_decompressor(), mspack_destroy_chm_decompressor() + */ +struct mschm_decompressor { + /** + * Opens a CHM helpfile and reads its contents. + * + * If the file opened is a valid CHM helpfile, all headers will be read + * and a mschmd_header structure will be returned, with a full list of + * files. + * + * In the case of an error occuring, NULL is returned and the error code + * is available from last_error(). + * + * The filename pointer should be considered "in use" until close() is + * called on the CHM helpfile. + * + * @param this a self-referential pointer to the mschm_decompressor + * instance being called + * @param filename the filename of the CHM helpfile. This is passed + * directly to mspack_system::open(). + * @return a pointer to a mschmd_header structure, or NULL on failure + * @see close() + */ + struct mschmd_header *(*open)(struct mschm_decompressor *this, + char *filename); + + /** + * Closes a previously opened CHM helpfile. + * + * This closes a CHM helpfile, frees the mschmd_header and all + * mschmd_file structures associated with it (if any). This works on + * both helpfiles opened with open() and helpfiles opened with + * fast_open(). + * + * The CHM header pointer is now invalid and cannot be used again. All + * mschmd_file pointers referencing that CHM are also now invalid, and + * cannot be used again. + * + * @param this a self-referential pointer to the mschm_decompressor + * instance being called + * @param chm the CHM helpfile to close + * @see open(), fast_open() + */ + void (*close)(struct mschm_decompressor *this, + struct mschmd_header *chm); + + /** + * Extracts a file from a CHM helpfile. + * + * This extracts a file from a CHM helpfile and writes it to the given + * filename. The filename of the file, mscabd_file::filename, is not + * used by extract(), but can be used by the caller as a guide for + * constructing an appropriate filename. + * + * This method works both with files found in the mschmd_header::files + * and mschmd_header::sysfiles list and mschmd_file structures generated + * on the fly by fast_find(). + * + * @param this a self-referential pointer to the mscab_decompressor + * instance being called + * @param file the file to be decompressed + * @param filename the filename of the file being written to + * @return an error code, or MSPACK_ERR_OK if successful + */ + int (*extract)(struct mschm_decompressor *this, + struct mschmd_file *file, + char *filename); + + /** + * Returns the error code set by the most recently called method. + * + * This is useful for open() and fast_open(), which do not return an + * error code directly. + * + * @param this a self-referential pointer to the mschm_decompressor + * instance being called + * @return the most recent error code + * @see open(), search() + */ + int (*last_error)(struct mschm_decompressor *this); + + /** + * Opens a CHM helpfile quickly. + * + * If the file opened is a valid CHM helpfile, only essential headers + * will be read. A mschmd_header structure will be still be returned, as + * with open(), but the mschmd_header::files field will be NULL. No + * files details will be automatically read. The fast_find() method + * must be used to obtain file details. + * + * In the case of an error occuring, NULL is returned and the error code + * is available from last_error(). + * + * The filename pointer should be considered "in use" until close() is + * called on the CHM helpfile. + * + * @param this a self-referential pointer to the mschm_decompressor + * instance being called + * @param filename the filename of the CHM helpfile. This is passed + * directly to mspack_system::open(). + * @return a pointer to a mschmd_header structure, or NULL on failure + * @see open(), close(), fast_find(), extract() + */ + struct mschmd_header *(*fast_open)(struct mschm_decompressor *this, + char *filename); + + /** + * Finds file details quickly. + * + * Instead of reading all CHM helpfile headers and building a list of + * files, fast_open() and fast_find() are intended for finding file + * details only when they are needed. The CHM file format includes an + * on-disk file index to allow this. + * + * Given a case-sensitive filename, fast_find() will search the on-disk + * index for that file. + * + * If the file was found, the caller-provided mschmd_file structure will + * be filled out like so: + * - section: the correct value for the found file + * - offset: the correct value for the found file + * - length: the correct value for the found file + * - all other structure elements: NULL or 0 + * + * If the file was not found, MSPACK_ERR_OK will still be returned as the + * result, but the caller-provided structure will be filled out like so: + * - section: NULL + * - offset: 0 + * - length: 0 + * - all other structure elements: NULL or 0 + * + * This method is intended to be used in conjunction with CHM helpfiles + * opened with fast_open(), but it also works with helpfiles opened + * using the regular open(). + * + * @param this a self-referential pointer to the mschm_decompressor + * instance being called + * @param chm the CHM helpfile to search for the file + * @param filename the filename of the file to search for + * @param f_ptr a pointer to a caller-provded mschmd_file structure + * @param f_size sizeof(struct mschmd_file) + * @return MSPACK_ERR_OK, or an error code + * @see open(), close(), fast_find(), extract() + */ + int (*fast_find)(struct mschm_decompressor *this, + struct mschmd_header *chm, + char *filename, + struct mschmd_file *f_ptr, + int f_size); +}; + +/* --- support for .LIT (EBook) file format -------------------------------- */ + +/** TODO */ +struct mslit_compressor { + int dummy; +}; + +/** TODO */ +struct mslit_decompressor { + int dummy; +}; + + +/* --- support for .HLP (MS Help) file format ------------------------------ */ + +/** TODO */ +struct mshlp_compressor { + int dummy; +}; + +/** TODO */ +struct mshlp_decompressor { + int dummy; +}; + + +/* --- support for SZDD file format ---------------------------------------- */ + +/** TODO */ +struct msszdd_compressor { + int dummy; +}; + +/** TODO */ +struct msszdd_decompressor { + int dummy; +}; + +/* --- support for KWAJ file format ---------------------------------------- */ + +/** TODO */ +struct mskwaj_compressor { + int dummy; +}; + +/** TODO */ +struct mskwaj_decompressor { + int dummy; +}; + +#ifdef __cplusplus +}; +#endif + +#endif diff --git a/src/calibre/utils/lzx/system.h b/src/calibre/utils/lzx/system.h new file mode 100644 index 0000000000..acc7d23f56 --- /dev/null +++ b/src/calibre/utils/lzx/system.h @@ -0,0 +1,66 @@ +/* This file is part of libmspack. + * (C) 2003-2004 Stuart Caie. + * + * libmspack is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License (LGPL) version 2.1 + * + * For further details, see the file COPYING.LIB distributed with libmspack + */ + +#ifndef MSPACK_SYSTEM_H +#define MSPACK_SYSTEM_H 1 + +#ifdef _MSC_VER +#define inline +#endif + +#ifdef DEBUG +# include +# define D(x) do { printf("%s:%d (%s) ",__FILE__, __LINE__, __FUNCTION__); \ + printf x ; fputc('\n', stdout); fflush(stdout);} while (0); +#else +# define D(x) +#endif + +/* endian-neutral reading of little-endian data */ +#define __egi32(a,n) ( (((a)[n+3]) << 24) | (((a)[n+2]) << 16) | \ + (((a)[n+1]) << 8) | ((a)[n+0]) ) +#define EndGetI64(a) ((((unsigned long long int) __egi32(a,4)) << 32) | \ + ((unsigned int) __egi32(a,0))) +#define EndGetI32(a) __egi32(a,0) +#define EndGetI16(a) ((((a)[1])<<8)|((a)[0])) + +/* endian-neutral reading of big-endian data */ +#define EndGetM32(a) ((((a)[0])<<24)|(((a)[1])<<16)|(((a)[2])<<8)|((a)[3])) +#define EndGetM16(a) ((((a)[0])<<8)|((a)[1])) + +extern struct mspack_system *mspack_default_system; + +/* returns the length of a file opened for reading */ +extern int mspack_sys_filelen(struct mspack_system *system, + struct mspack_file *file, off_t *length); + +/* validates a system structure */ +extern int mspack_valid_system(struct mspack_system *sys); + +/* Can't redfine intrinsics in Microsoft Visual C */ +#ifndef _MSC_VER + +/* inline memcmp() */ +static inline int memcmp(const void *s1, const void *s2, size_t n) { + unsigned char *c1 = (unsigned char *) s1; + unsigned char *c2 = (unsigned char *) s2; + if (n == 0) return 0; + while (--n && (*c1 == *c2)) c1++, c2++; + return *c1 - *c2; +} + +/* inline strlen() */ +static inline size_t strlen(const char *s) { + const char *e = s; + while (*e) e++; + return e - s; +} +#endif + +#endif From 1367ba58f3dba20a1221888af2e3912320db6a0f Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Fri, 18 Jul 2008 18:03:28 -0400 Subject: [PATCH 09/44] Section decompression working --- src/calibre/ebooks/lit/reader.py | 102 ++++++++++++++---- src/calibre/utils/lzx/lzxglue.c | 172 ------------------------------ src/calibre/utils/lzx/lzxmodule.c | 7 +- 3 files changed, 90 insertions(+), 191 deletions(-) delete mode 100644 src/calibre/utils/lzx/lzxglue.c diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py index 2608d63399..9963e14bf2 100644 --- a/src/calibre/ebooks/lit/reader.py +++ b/src/calibre/ebooks/lit/reader.py @@ -15,13 +15,14 @@ from calibre.ebooks.lit import LitError from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP import calibre.ebooks.lit.mssha1 as mssha1 import calibre.ebooks.lit.msdes as msdes +import calibre.utils.lzx as lzx OPF_DECL = """" """ -XHTML_DECL = """ +HTML_DECL = """ @@ -30,6 +31,14 @@ XHTML_DECL = """ DESENCRYPT_GUID = "{67F6E4A2-60BF-11D3-8540-00C04F58C3CF}" LZXCOMPRESS_GUID = "{0A9007C6-4076-11D3-8789-0000F8105754}" +LZXC_TAG = 0x43585a4c +CONTROL_TAG = 4 +CONTROL_WINDOW_SIZE = 12 +RESET_NENTRIES = 4 +RESET_HDRLEN = 12 +RESET_UCLENGTH = 16 +RESET_INTERVAL = 32 + def u32(bytes): return struct.unpack('= 16: - ndwords = int32(control[idx_control:]) + 1 - if (idx_control + (ndwords * 4)) > len(control) or ndwords <= 0: + while len(transform) >= 16: + csize = (int32(control) + 1) * 4 + if csize > len(control) or csize <= 0: raise LitError("ControlData is too short") - guid = msguid(transform[idx_transform:]) + guid = msguid(transform) if guid == DESENCRYPT_GUID: content = self._decrypt(content) - idx_control += ndwords * 4 + control = control[csize:] elif guid == LZXCOMPRESS_GUID: - raise LitError("LZX decompression not implemented") + content = self._decompress_section(name, control, content) + control = control[csize:] else: raise LitError("Unrecognized transform: %s." % repr(guid)) - idx_transform += 16 + transform = transform[16:] return content def _decrypt(self, content): @@ -685,6 +698,59 @@ class LitFile(object): raise LitError('Cannot extract content from a DRM protected ebook') return msdes.new(self.bookkey).decrypt(content) + def _decompress_section(self, name, control, content): + if len(control) < 32 or u32(control[CONTROL_TAG:]) != LZXC_TAG: + raise LitError("Invalid ControlData tag value") + result = [] + + window_size = 14 + u = u32(control[CONTROL_WINDOW_SIZE:]) + while u > 0: + u >>= 1 + window_size += 1 + if window_size < 15 or window_size > 21: + raise LitError("Invalid window in ControlData") + lzx.init(window_size) + + reset_table = self.get_file('/'.join( + ['::DataSpace/Storage', name, 'Transform', + LZXCOMPRESS_GUID, 'InstanceData/ResetTable'])) + if len(reset_table) < (RESET_INTERVAL + 8): + raise LitError("Reset table is too short") + if u32(reset_table[RESET_UCLENGTH + 4:]) != 0: + raise LitError("Reset table has 64bit value for UCLENGTH") + ofs_entry = int32(reset_table[RESET_HDRLEN:]) + 8 + uclength = int32(reset_table[RESET_UCLENGTH:]) + accum = int32(reset_table[RESET_INTERVAL:]) + bytes_remaining = uclength + window_bytes = (1 << window_size) + base = 0 + + while ofs_entry < len(reset_table): + if accum >= window_bytes: + accum = 0 + size = int32(reset_table[ofs_entry:]) + u = int32(reset_table[ofs_entry + 4:]) + if u != 0: + raise LitError("Reset table entry greater than 32 bits") + if size >= (len(content) + base): + raise("Reset table entry out of bounds") + if bytes_remaining >= window_bytes: + lzx.reset() + result.append(lzx.decompress(content, window_bytes)) + bytes_remaining -= window_bytes + content = content[size - base:] + base = size + accum += int32(reset_table[RESET_INTERVAL:]) + ofs_entry += 8 + if bytes_remaining < window_bytes and bytes_remaining > 0: + lzx.reset() + result.append(lzx.decompress(content, bytes_remaining)) + bytes_remaining = 0 + if bytes_remaining > 0: + raise LitError("Failed to completely decompress section") + return ''.join(result) + def get_metadata(stream): try: litfile = LitFile(stream) @@ -693,7 +759,7 @@ def get_metadata(stream): cover_url, cover_item = mi.cover, None if cover_url: cover_url = relpath(cover_url, os.getcwd()) - for item in litfile.manifest: + for item in litfile.manifest.values(): if item.path == cover_url: cover_item = item.internal if cover_item is not None: diff --git a/src/calibre/utils/lzx/lzxglue.c b/src/calibre/utils/lzx/lzxglue.c deleted file mode 100644 index 7820c68cbf..0000000000 --- a/src/calibre/utils/lzx/lzxglue.c +++ /dev/null @@ -1,172 +0,0 @@ -/*--[lzxglue.c]---------------------------------------------------------------- - | Copyright (C) 2004 DRS - | - | This file is part of the "openclit" library for processing .LIT files. - | - | "Openclit" is free software; you can redistribute it and/or modify - | it under the terms of the GNU General Public License as published by - | the Free Software Foundation; either version 2 of the License, or - | (at your option) any later version. - | - | This program is distributed in the hope that it will be useful, - | but WITHOUT ANY WARRANTY; without even the implied warranty of - | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - | GNU General Public License for more details. - | - | You should have received a copy of the GNU General Public License - | along with this program; if not, write to the Free Software - | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - | - | The GNU General Public License may also be available at the following - | URL: http://www.gnu.org/licenses/gpl.html -*/ - -/* This provides a "glue" between Stuart Caie's libmspack library and the - * Openclit calls to the earlier LZX library. - * - * This way, I should be able to use the files unmodified. - */ -#include -#include -#include "litlib.h" -#include "mspack.h" -#include "lzx.h" - -typedef struct memory_file -{ - unsigned int magic; /* 0xB5 */ - void * buffer; - int total_bytes; - int current_bytes; -} memory_file; - - -void * glue_alloc(struct mspack_system *this, size_t bytes) -{ - void * p; - p = (void *)malloc(bytes); - if (p == NULL) { - lit_error(ERR_R|ERR_LIBC,"Malloc(%d) failed!", bytes); - } - return p; -} - -void glue_free(void * p) -{ - free(p); -} - -void glue_copy(void *src, void *dest, size_t bytes) -{ - memcpy(dest, src, bytes); -} - -struct mspack_file * glue_open(struct mspack_system *this, char *filename, - int mode) -{ - lit_error(0,"MSPACK_OPEN unsupported!"); - return NULL; -} - -void glue_close(struct mspack_file * file) { - return; -} - - -int glue_read(struct mspack_file * file, void * buffer, int bytes) -{ - memory_file * mem; - int remaining; - - mem = (memory_file *)file; - if (mem->magic != 0xB5) return -1; - - remaining = mem->total_bytes - mem->current_bytes; - if (!remaining) return 0; - if (bytes > remaining) bytes = remaining; - memcpy(buffer, (unsigned char *)mem->buffer+mem->current_bytes, bytes); - mem->current_bytes += bytes; - return bytes; -} - -int glue_write(struct mspack_file * file, void * buffer, int bytes) -{ - memory_file * mem; - int remaining; - - mem = (memory_file *)file; - if (mem->magic != 0xB5) return -1; - - remaining = mem->total_bytes - mem->current_bytes; - if (!remaining) return 0; - if (bytes > remaining) { - lit_error(0,"MSPACK_READ tried to write %d bytes, only %d left.", - bytes, remaining); - bytes = remaining; - } - memcpy((unsigned char *)mem->buffer+mem->current_bytes, buffer, bytes); - mem->current_bytes += bytes; - return bytes; -} - -struct mspack_system lzxglue_system = -{ - glue_open, - glue_close, - glue_read, /* Read */ - glue_write, /* Write */ - NULL, /* Seek */ - NULL, /* Tell */ - NULL, /* Message */ - glue_alloc, - glue_free, - glue_copy, - NULL /* Termination */ -}; - -int LZXwindow; -struct lzxd_stream * lzx_stream = NULL; - - -/* Can't really init here,don't know enough */ -int LZXinit(int window) -{ - LZXwindow = window; - lzx_stream = NULL; - - return 0; -} - -/* Doesn't exist. Oh well, reinitialize state every time anyway */ -void LZXreset(void) -{ - return; -} - -int LZXdecompress(unsigned char *inbuf, unsigned char *outbuf, - unsigned int inlen, unsigned int outlen) -{ - int err; - memory_file source; - memory_file dest; - - source.magic = 0xB5; - source.buffer = inbuf; - source.current_bytes = 0; - source.total_bytes = inlen; - - dest.magic = 0xB5; - dest.buffer = outbuf; - dest.current_bytes = 0; - dest.total_bytes = outlen; - - lzx_stream = lzxd_init(&lzxglue_system, (struct mspack_file *)&source, - (struct mspack_file *)&dest, LZXwindow, - 0x7fff /* Never reset, I do it */, 4096, outlen); - err = -1; - if (lzx_stream) err = lzxd_decompress(lzx_stream, outlen); - - lzxd_free(lzx_stream); - lzx_stream = NULL; - return err; -} diff --git a/src/calibre/utils/lzx/lzxmodule.c b/src/calibre/utils/lzx/lzxmodule.c index 44cc91c11d..bf8a48a056 100644 --- a/src/calibre/utils/lzx/lzxmodule.c +++ b/src/calibre/utils/lzx/lzxmodule.c @@ -199,8 +199,13 @@ initlzx(void) PyObject *m; m = Py_InitModule3("lzx", lzx_methods, lzx_doc); - if (m == NULL) return; + if (m == NULL) { + return; + } + LzxError = PyErr_NewException("lzx.LzxError", NULL, NULL); Py_INCREF(LzxError); PyModule_AddObject(m, "LzxError", LzxError); + + return; } From c33df41eb2eff184268f864fda0f51c40fff1e5f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 18 Jul 2008 21:03:19 -0700 Subject: [PATCH 10/44] IGN:... --- src/calibre/trac/bzr_commit_plugin.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/calibre/trac/bzr_commit_plugin.py b/src/calibre/trac/bzr_commit_plugin.py index 0174a685a9..01e3bc7ab0 100644 --- a/src/calibre/trac/bzr_commit_plugin.py +++ b/src/calibre/trac/bzr_commit_plugin.py @@ -86,9 +86,13 @@ class cmd_commit(_cmd_commit): def run(self, message=None, file=None, verbose=False, selected_list=None, unchanged=False, strict=False, local=False, fixes=None, author=None, show_diff=False): + nick = config = bug = action = None if message: - message, bug, url, action, nick, config = \ - self.expand_message(message, tree_files(selected_list)[0]) + try: + message, bug, url, action, nick, config = \ + self.expand_message(message, tree_files(selected_list)[0]) + except ValueError: + pass if nick and bug and not fixes: fixes = [nick+':'+bug] From d2c5ba9de4caf9729b11ecda5bcf393e560b6e88 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 19 Jul 2008 02:09:34 -0700 Subject: [PATCH 11/44] Make parsing of archives for ebooks more robust --- src/calibre/ebooks/lrf/any/convert_from.py | 32 +++++++++++---------- src/calibre/ebooks/lrf/html/convert_from.py | 5 ++-- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/src/calibre/ebooks/lrf/any/convert_from.py b/src/calibre/ebooks/lrf/any/convert_from.py index c0a1a6500c..246473c2b2 100644 --- a/src/calibre/ebooks/lrf/any/convert_from.py +++ b/src/calibre/ebooks/lrf/any/convert_from.py @@ -2,7 +2,7 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' '''Convert any ebook file into a LRF file.''' -import sys, os, logging, shutil, tempfile, glob +import sys, os, logging, shutil, tempfile, glob, re from calibre.ebooks import UnknownFormatError from calibre.ebooks.lrf import option_parser as _option_parser @@ -26,17 +26,16 @@ def largest_file(files): return file def find_htmlfile(dir): - cwd = os.getcwd() - try: - os.chdir(dir) - for pair in (('*toc*.htm*', '*toc*.xhtm*'), ('*.htm*', '*.xhtm*')): - files = glob.glob(pair[0]) - files += glob.glob(pair[1]) - file = largest_file(files) - if file: - return os.path.join(dir, file) - finally: - os.chdir(cwd) + ext_pat = re.compile(r'\.(x){0,1}htm(l){0,1}', re.IGNORECASE) + toc_pat = re.compile(r'toc', re.IGNORECASE) + toc_files, files = [], [] + for f in map(lambda x:os.path.join(dir, x), os.listdir(dir)): + name, ext = os.path.splitext(f) + if ext and ext_pat.match(ext): + toc_files.append(f) if toc_pat.search(f) else files.append(f) + a = toc_files if toc_files else files + if a: + return largest_file(a) def number_of_unhidden_files(base, listing): ans = 0 @@ -71,9 +70,12 @@ def handle_archive(path): files = [] cdir = traverse_subdirs(tdir) file = None - for ext in ('lit', 'rtf', 'fb2','pdf', 'txt', 'epub', 'mobi', 'prc'): - pat = os.path.join(cdir, '*.'+ext) - files.extend(glob.glob(pat)) + exts = ['lit', 'rtf', 'fb2','pdf', 'txt', 'epub', 'mobi', 'prc'] + candidates = map(lambda x:os.path.join(cdir, x), os.listdir(cdir)) + for ext in exts: + for f in candidates: + if f.lower().endswith(ext): + files.append(f) file = largest_file(files) if not file: file = find_htmlfile(cdir) diff --git a/src/calibre/ebooks/lrf/html/convert_from.py b/src/calibre/ebooks/lrf/html/convert_from.py index 8f46f1daef..e602a61156 100644 --- a/src/calibre/ebooks/lrf/html/convert_from.py +++ b/src/calibre/ebooks/lrf/html/convert_from.py @@ -1975,8 +1975,9 @@ def try_opf(path, options, logger): except: continue if not getattr(options, 'cover', None) and orig_cover is not None: - options.cover = orig_cover - options.spine = [i.path for i in opf.spine if i.path] + options.cover = orig_cover + if getattr(opf, 'spine', False): + options.spine = [i.path for i in opf.spine if i.path] if not getattr(options, 'toc', None): options.toc = opf.toc except Exception: From 1a45fc3d58e7ee05c191ffde6218312381acbd2f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 19 Jul 2008 02:10:49 -0700 Subject: [PATCH 12/44] Fix various bugs in the worker process control code and switch to using local sockets to communicate on Unix --- src/calibre/gui2/main.py | 5 +- src/calibre/parallel.py | 197 +++++++++++++++++++++----------- src/calibre/utils/fontconfig.py | 54 +++++---- 3 files changed, 164 insertions(+), 92 deletions(-) diff --git a/src/calibre/gui2/main.py b/src/calibre/gui2/main.py index 5dafe3c683..e73ada9cb8 100644 --- a/src/calibre/gui2/main.py +++ b/src/calibre/gui2/main.py @@ -1241,9 +1241,10 @@ path_to_ebook to the database. if single_instance is not None and single_instance.is_running() and \ single_instance.send_message('launched:'+repr(args)): return 0 - + extra = '' if iswindows else \ + ('If you\'re sure it is not running, delete the file %s.'%os.path.expanduser('~/.calibre_calibre GUI.lock')) QMessageBox.critical(None, 'Cannot Start '+__appname__, - '

%s is already running.

'%__appname__) + '

%s is already running. %s

'%(__appname__, extra)) return 1 initialize_file_icon_provider() try: diff --git a/src/calibre/parallel.py b/src/calibre/parallel.py index d3531c5525..d33728042b 100644 --- a/src/calibre/parallel.py +++ b/src/calibre/parallel.py @@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en' ''' Used to run jobs in parallel in separate processes. Features output streaming, support for progress notification as well as job killing. The worker processes -are controlled via a simple protocol run over TCP/IP sockets. The control happens +are controlled via a simple protocol run over sockets. The control happens mainly in two class, :class:`Server` and :class:`Overseer`. The worker is encapsulated in the function :function:`worker`. Every worker process has the environment variable :envvar:`CALIBRE_WORKER` defined. @@ -25,7 +25,7 @@ the worker interrupts the job and dies. The sending of progress and console outp is buffered and asynchronous to prevent the job from being IO bound. ''' import sys, os, gc, cPickle, traceback, atexit, cStringIO, time, signal, \ - subprocess, socket, collections, binascii, re, tempfile, thread + subprocess, socket, collections, binascii, re, tempfile, thread, tempfile from select import select from functools import partial from threading import RLock, Thread, Event @@ -33,6 +33,7 @@ from threading import RLock, Thread, Event from calibre.ptempfile import PersistentTemporaryFile from calibre import iswindows, detect_ncpus, isosx +DEBUG = False #: A mapping from job names to functions that perform the jobs PARALLEL_FUNCS = { @@ -51,11 +52,14 @@ PARALLEL_FUNCS = { isfrozen = hasattr(sys, 'frozen') +isworker = False win32event = __import__('win32event') if iswindows else None win32process = __import__('win32process') if iswindows else None msvcrt = __import__('msvcrt') if iswindows else None +SOCKET_TYPE = socket.AF_UNIX if not iswindows else socket.AF_INET + class WorkerStatus(object): ''' A platform independent class to control child processes. Provides the @@ -223,6 +227,7 @@ class WorkerMother(object): mother = WorkerMother() +_comm_lock = RLock() def write(socket, msg, timeout=5): ''' Write a message on socket. If `msg` is unicode, it is encoded in utf-8. @@ -230,22 +235,29 @@ def write(socket, msg, timeout=5): `msg` is broken into chunks of size 4096 and sent. The :function:`read` function automatically re-assembles the chunks into whole message. ''' - if isinstance(msg, unicode): - msg = msg.encode('utf-8') - length = None - while len(msg) > 0: - if length is None: - length = len(msg) - chunk = ('%-12d'%length) + msg[:4096-12] - msg = msg[4096-12:] - else: - chunk, msg = msg[:4096], msg[4096:] - w = select([], [socket], [], timeout)[1] - if not w: - raise RuntimeError('Write to socket timed out') - if socket.sendall(chunk) is not None: - raise RuntimeError('Failed to write chunk to socket') - + if isworker: + _comm_lock.acquire() + try: + if isinstance(msg, unicode): + msg = msg.encode('utf-8') + if DEBUG: + print >>sys.__stdout__, 'write(%s):'%('worker' if isworker else 'overseer'), repr(msg) + length = None + while len(msg) > 0: + if length is None: + length = len(msg) + chunk = ('%-12d'%length) + msg[:4096-12] + msg = msg[4096-12:] + else: + chunk, msg = msg[:4096], msg[4096:] + w = select([], [socket], [], timeout)[1] + if not w: + raise RuntimeError('Write to socket timed out') + if socket.sendall(chunk) is not None: + raise RuntimeError('Failed to write chunk to socket') + finally: + if isworker: + _comm_lock.release() def read(socket, timeout=5): ''' @@ -253,24 +265,33 @@ def read(socket, timeout=5): function. Raises a `RuntimeError` if the message is corrpted. Can return an empty string. ''' - buf = cStringIO.StringIO() - length = None - while select([socket],[],[],timeout)[0]: - msg = socket.recv(4096) - if not msg: - break - if length is None: - length, msg = int(msg[:12]), msg[12:] - buf.write(msg) - if buf.tell() >= length: - break - if not length: - return '' - msg = buf.getvalue()[:length] - if len(msg) < length: - raise RuntimeError('Corrupted packet received') - - return msg + if isworker: + _comm_lock.acquire() + try: + buf = cStringIO.StringIO() + length = None + while select([socket],[],[],timeout)[0]: + msg = socket.recv(4096) + if not msg: + break + if length is None: + length, msg = int(msg[:12]), msg[12:] + buf.write(msg) + if buf.tell() >= length: + break + if not length: + if DEBUG: + print >>sys.__stdout__, 'read(%s):'%('worker' if isworker else 'overseer'), 'nothing' + return '' + msg = buf.getvalue()[:length] + if len(msg) < length: + raise RuntimeError('Corrupted packet received') + if DEBUG: + print >>sys.__stdout__, 'read(%s):'%('worker' if isworker else 'overseer'), repr(msg) + return msg + finally: + if isworker: + _comm_lock.release() class RepeatingTimer(Thread): ''' @@ -306,11 +327,13 @@ class Overseer(object): INTERVAL = 0.1 def __init__(self, server, port, timeout=5): - self.worker_status = mother.spawn_worker('127.0.0.1:%d'%port) + self.worker_status = mother.spawn_worker('127.0.0.1:'+str(port)) self.socket = server.accept()[0] # Needed if terminate called hwen interpreter is shutting down self.os = os self.signal = signal + self.on_probation = False + self.terminated = False self.working = False self.timeout = timeout @@ -329,6 +352,7 @@ class Overseer(object): def terminate(self): 'Kill worker process.' + self.terminated = True try: if self.socket: self.write('STOP:') @@ -363,7 +387,9 @@ class Overseer(object): def __eq__(self, other): return hasattr(other, 'process') and hasattr(other, 'worker_pid') and self.worker_pid == other.worker_pid - def __bool__(self): + def is_viable(self): + if self.terminated: + return False return self.worker_status.is_alive() def select(self, timeout=0): @@ -386,6 +412,7 @@ class Overseer(object): self.output = job.output if callable(job.output) else sys.stdout.write self.progress = job.progress if callable(job.progress) else None self.job = job + self.last_report = time.time() def control(self): ''' @@ -397,8 +424,21 @@ class Overseer(object): ''' if select([self.socket],[],[],0)[0]: msg = self.read() + if msg: + self.on_probation = False + self.last_report = time.time() + else: + if self.on_probation: + self.terminate() + return Result(None, ControlError('Worker process died unexpectedly'), '') + else: + self.on_probation = True + return word, msg = msg.partition(':')[0], msg.partition(':')[-1] - if word == 'RESULT': + if word == 'PING': + self.write('OK') + return + elif word == 'RESULT': self.write('OK') return Result(cPickle.loads(msg), None, None) elif word == 'OUTPUT': @@ -421,11 +461,11 @@ class Overseer(object): return Result(None, *cPickle.loads(msg)) else: self.terminate() - return Result(None, ControlError('Worker sent invalid msg: %s', repr(msg)), '') - if not self.worker_status.is_alive(): - return Result(None, ControlError('Worker process died unexpectedly with returncode: %d'%self.process.returncode), '') + return Result(None, ControlError('Worker sent invalid msg: %s'%repr(msg)), '') + if not self.worker_status.is_alive() or time.time() - self.last_report > 180: + self.terminate() + return Result(None, ControlError('Worker process died unexpectedly with returncode: %s'%str(self.process.returncode)), '') - class Job(object): @@ -458,18 +498,23 @@ class Server(Thread): KILL_RESULT = Overseer.KILL_RESULT START_PORT = 10013 + PID = os.getpid() + def __init__(self, number_of_workers=detect_ncpus()): Thread.__init__(self) self.setDaemon(True) - self.server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - self.port = self.START_PORT + self.server_socket = socket.socket(SOCKET_TYPE, socket.SOCK_STREAM) + self.port = tempfile.mktemp(prefix='calibre_server')+'_%d_'%self.PID if not iswindows else self.START_PORT while True: try: - self.server_socket.bind(('localhost', self.port)) + address = ('localhost', self.port) if iswindows else self.port + self.server_socket.bind(address) break - except: - self.port += 1 + except socket.error: + self.port += (1 if iswindows else '1') + if not iswindows: + atexit.register(os.unlink, self.port) self.server_socket.listen(5) self.number_of_workers = number_of_workers self.pool, self.jobs, self.working, self.results = [], collections.deque(), [], {} @@ -525,7 +570,7 @@ class Server(Thread): res = Result(None, unicode(err), traceback.format_exc()) job.done(res) o = None - if o: + if o and o.is_viable(): with self.working_lock: self.working.append(o) @@ -542,7 +587,7 @@ class Server(Thread): done.append(o) for o in done: self.working.remove(o) - if o: + if o and o.is_viable(): with self.pool_lock: self.pool.append(o) @@ -601,9 +646,11 @@ class BufferedSender(object): self.socket = socket self.wbuf, self.pbuf = [], [] self.wlock, self.plock = RLock(), RLock() + self.last_report = None self.timer = RepeatingTimer(0.5, self.send, 'BufferedSender') self.timer.start() + def write(self, msg): if not isinstance(msg, basestring): msg = unicode(msg) @@ -623,20 +670,31 @@ class BufferedSender(object): if not select([], [self.socket], [], 30)[1]: print >>sys.__stderr__, 'Cannot pipe to overseer' return - + + reported = False with self.wlock: if self.wbuf: msg = cPickle.dumps(self.wbuf, -1) self.wbuf = [] write(self.socket, 'OUTPUT:'+msg) read(self.socket, 10) + reported = True with self.plock: if self.pbuf: msg = cPickle.dumps(self.pbuf, -1) self.pbuf = [] write(self.socket, 'PROGRESS:'+msg) - read(self.socket, 10) + read(self.socket, 10) + reported = True + + if self.last_report is not None: + if reported: + self.last_report = time.time() + elif time.time() - self.last_report > 60: + write(self.socket, 'PING:') + read(self.socket, 10) + self.last_report = time.time() def notify(self, percent, msg=''): with self.plock: @@ -652,19 +710,25 @@ def get_func(name): return func, kwdargs, notification def work(client_socket, func, args, kwdargs): - func, kargs, notification = get_func(func) - if notification is not None and hasattr(sys.stdout, 'notify'): - kargs[notification] = sys.stdout.notify - kargs.update(kwdargs) - res = func(*args, **kargs) - if hasattr(sys.stdout, 'send'): - sys.stdout.send() - return res + sys.stdout.last_report = time.time() + try: + func, kargs, notification = get_func(func) + if notification is not None and hasattr(sys.stdout, 'notify'): + kargs[notification] = sys.stdout.notify + kargs.update(kwdargs) + res = func(*args, **kargs) + if hasattr(sys.stdout, 'send'): + sys.stdout.send() + return res + finally: + sys.stdout.last_report = None + time.sleep(5) # Give any in progress BufferedSend time to complete def worker(host, port): - client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - client_socket.connect((host, port)) + client_socket = socket.socket(SOCKET_TYPE, socket.SOCK_STREAM) + address = (host, port) if iswindows else port + client_socket.connect(address) write(client_socket, 'CALIBRE_WORKER:%d'%os.getpid()) msg = read(client_socket, timeout=10) if msg != 'OK': @@ -685,10 +749,11 @@ def worker(host, port): try: result = work(client_socket, func, args, kwdargs) write(client_socket, 'RESULT:'+ cPickle.dumps(result)) - except (Exception, SystemExit), err: + except BaseException, err: exception = (err.__class__.__name__, unicode(str(err), 'utf-8', 'replace')) tb = traceback.format_exc() - write(client_socket, 'ERROR:'+cPickle.dumps((exception, tb),-1)) + msg = 'ERROR:'+cPickle.dumps((exception, tb),-1) + write(client_socket, msg) if read(client_socket, 10) != 'OK': break gc.collect() @@ -714,11 +779,13 @@ def free_spirit(path): func(*args, **kargs) def main(args=sys.argv): + global isworker + isworker = True args = args[1].split(':') if len(args) == 1: free_spirit(binascii.unhexlify(re.sub(r'[^a-f0-9A-F]', '', args[0]))) else: - worker(args[0].replace("'", ''), int(args[1])) + worker(args[0].replace("'", ''), int(args[1]) if iswindows else args[1]) return 0 if __name__ == '__main__': diff --git a/src/calibre/utils/fontconfig.py b/src/calibre/utils/fontconfig.py index 3e74362720..4275d03479 100644 --- a/src/calibre/utils/fontconfig.py +++ b/src/calibre/utils/fontconfig.py @@ -130,36 +130,40 @@ lib.FcConfigBuildFonts.restype = c_int _init_error = None _initialized = False -from threading import Timer -def _do_init(): - # Initialize the fontconfig library. This has to be done manually - # for the OS X bundle as it may have its own private fontconfig. - if hasattr(sys, 'frameworks_dir'): - config_dir = os.path.join(os.path.dirname(getattr(sys, 'frameworks_dir')), 'Resources', 'fonts') - if isinstance(config_dir, unicode): - config_dir = config_dir.encode(sys.getfilesystemencoding()) - config = lib.FcConfigCreate() - if not lib.FcConfigParseAndLoad(config, os.path.join(config_dir, 'fonts.conf'), 1): - _init_error = 'Could not parse the fontconfig configuration' +from threading import Thread + +class FontScanner(Thread): + def run(self): + # Initialize the fontconfig library. This has to be done manually + # for the OS X bundle as it may have its own private fontconfig. + if getattr(sys, 'frameworks_dir', False): + config_dir = os.path.join(os.path.dirname(getattr(sys, 'frameworks_dir')), 'Resources', 'fonts') + if isinstance(config_dir, unicode): + config_dir = config_dir.encode(sys.getfilesystemencoding()) + config = lib.FcConfigCreate() + if not lib.FcConfigParseAndLoad(config, os.path.join(config_dir, 'fonts.conf'), 1): + _init_error = 'Could not parse the fontconfig configuration' + return + if not lib.FcConfigBuildFonts(config): + _init_error = 'Could not build fonts' + return + if not lib.FcConfigSetCurrent(config): + _init_error = 'Could not set font config' + return + elif not lib.FcInit(): + _init_error = _('Could not initialize the fontconfig library') return - if not lib.FcConfigBuildFonts(config): - _init_error = 'Could not build fonts' - return - if not lib.FcConfigSetCurrent(config): - _init_error = 'Could not set font config' - return - elif not lib.FcInit(): - _init_error = _('Could not initialize the fontconfig library') - return - global _initialized - _initialized = True + global _initialized + _initialized = True -_init_timer = Timer(0.1, _do_init) -_init_timer.start() +_scanner = FontScanner() +_scanner.start() def join(): - _init_timer.join() + _scanner.join(120) + if _scanner.isAlive(): + raise RuntimeError('Scanning for system fonts seems to have hung. Try again in a little while.') if _init_error is not None: raise RuntimeError(_init_error) From 897c75dbf8425acf5f47f26e3f05f89f1699fb87 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 19 Jul 2008 08:39:02 -0700 Subject: [PATCH 13/44] IGN:... --- src/calibre/parallel.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/calibre/parallel.py b/src/calibre/parallel.py index d33728042b..f794626441 100644 --- a/src/calibre/parallel.py +++ b/src/calibre/parallel.py @@ -25,7 +25,7 @@ the worker interrupts the job and dies. The sending of progress and console outp is buffered and asynchronous to prevent the job from being IO bound. ''' import sys, os, gc, cPickle, traceback, atexit, cStringIO, time, signal, \ - subprocess, socket, collections, binascii, re, tempfile, thread, tempfile + subprocess, socket, collections, binascii, re, thread, tempfile from select import select from functools import partial from threading import RLock, Thread, Event @@ -494,6 +494,11 @@ class Result(object): def __iter__(self): return iter((self.result, self.exception, self.traceback)) +def remove_ipc_socket(path): + os = __import__('os') + if os.path.exists(path): + os.path.unlink(path) + class Server(Thread): KILL_RESULT = Overseer.KILL_RESULT @@ -508,13 +513,13 @@ class Server(Thread): self.port = tempfile.mktemp(prefix='calibre_server')+'_%d_'%self.PID if not iswindows else self.START_PORT while True: try: - address = ('localhost', self.port) if iswindows else self.port - self.server_socket.bind(address) + address = ('localhost', self.port) if iswindows else self.port + self.server_socket.bind(address) break except socket.error: self.port += (1 if iswindows else '1') if not iswindows: - atexit.register(os.unlink, self.port) + atexit.register(remove_ipc_socket, self.port) self.server_socket.listen(5) self.number_of_workers = number_of_workers self.pool, self.jobs, self.working, self.results = [], collections.deque(), [], {} From 3737fd3e13c380bcfda7b9d54d7ee012547d401e Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Sat, 19 Jul 2008 14:51:31 -0400 Subject: [PATCH 14/44] Added path clean-up and basic extraction method. --- src/calibre/ebooks/lit/reader.py | 200 ++++++++++++++++++------------- 1 file changed, 114 insertions(+), 86 deletions(-) diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py index 9963e14bf2..afe5d96297 100644 --- a/src/calibre/ebooks/lit/reader.py +++ b/src/calibre/ebooks/lit/reader.py @@ -1,8 +1,10 @@ +''' +Support for reading LIT files. +''' +from __future__ import with_statement + __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' -''' -Support for reading the metadata from a lit file. -''' import sys, struct, cStringIO, os import functools @@ -39,6 +41,13 @@ RESET_HDRLEN = 12 RESET_UCLENGTH = 16 RESET_INTERVAL = 32 +FLAG_OPENING = 1 +FLAG_CLOSING = 2 +FLAG_BLOCK = 4 +FLAG_HEAD = 8 +FLAG_ATOM = 16 +XML_ENTITIES = ['&', ''', '<', '>', '"'] + def u32(bytes): return struct.unpack('= len(raw): - raise LitError('Truncated manifest.') + while raw: + slen, raw = ord(raw[0]), raw[1:] + if slen == 0: break + root, raw = raw[:slen].decode('utf8'), raw[slen:] + if not raw: + raise LitError('Truncated manifest') for state in ['spine', 'not spine', 'css', 'images']: - num_files = int32(raw[pos:pos+4]) - pos += 4 + num_files, raw = int32(raw), raw[4:] if num_files == 0: continue - - i = 0 - while i < num_files: - if pos+5 >= len(raw): - raise LitError('Truncated manifest.') - offset = u32(raw[pos:pos+4]) - pos += 4 - - slen = ord(raw[pos]) - pos += 1 - internal = raw[pos:pos+slen].decode('utf8') - pos += slen - - slen = ord(raw[pos]) - pos += 1 - original = raw[pos:pos+slen].decode('utf8') - pos += slen - - slen = ord(raw[pos]) - pos += 1 - mime_type = raw[pos:pos+slen].decode('utf8') - pos += slen + 1 - - self.manifest[internal] = \ - ManifestItem(original, internal, mime_type, - offset, root, state) - i += 1 + for i in xrange(num_files): + if len(raw) < 5: + raise LitError('Truncated manifest') + offset, raw = u32(raw), raw[4:] + slen, raw = ord(raw[0]), raw[1:] + internal, raw = raw[:slen].decode('utf8'), raw[slen:] + slen, raw = ord(raw[0]), raw[1:] + original, raw = raw[:slen].decode('utf8'), raw[slen:] + slen, raw = ord(raw[0]), raw[1:] + mime_type, raw = raw[:slen].decode('utf8'), raw[slen+1:] + self.manifest[internal] = ManifestItem( + original, internal, mime_type, offset, root, state) + mlist = self.manifest.values() + shared = mlist[0].path + for item in mlist[1:]: + path = item.path + while not path.startswith(shared): + shared = shared[:-1] + if shared == '': + break + else: + slen = len(shared) + for item in mlist: + item.path = item.path[slen:] def read_meta(self, entry): raw = self._read_content(entry.offset, entry.size) @@ -610,16 +606,12 @@ class LitFile(object): self.meta = xml def read_drm(self): - def exists_file(name): - try: self.get_file(name) - except KeyError: return False - return True self.drmlevel = 0 - if exists_file('/DRMStorage/Licenses/EUL'): + if '/DRMStorage/Licenses/EUL' in self.entries: self.drmlevel = 5 - elif exists_file('/DRMStorage/DRMBookplate'): + elif '/DRMStorage/DRMBookplate' in self.entries: self.drmlevel = 3 - elif exists_file('/DRMStorage/DRMSealed'): + elif '/DRMStorage/DRMSealed' in self.entries: self.drmlevel = 1 else: return @@ -686,7 +678,10 @@ class LitFile(object): content = self._decrypt(content) control = control[csize:] elif guid == LZXCOMPRESS_GUID: - content = self._decompress_section(name, control, content) + reset_table = self.get_file( + '/'.join(['::DataSpace/Storage', name, 'Transform', + LZXCOMPRESS_GUID, 'InstanceData/ResetTable'])) + content = self._decompress(content, control, reset_table) control = control[csize:] else: raise LitError("Unrecognized transform: %s." % repr(guid)) @@ -698,9 +693,14 @@ class LitFile(object): raise LitError('Cannot extract content from a DRM protected ebook') return msdes.new(self.bookkey).decrypt(content) - def _decompress_section(self, name, control, content): + def _decompress(self, content, control, reset_table): if len(control) < 32 or u32(control[CONTROL_TAG:]) != LZXC_TAG: raise LitError("Invalid ControlData tag value") + if len(reset_table) < (RESET_INTERVAL + 8): + raise LitError("Reset table is too short") + if u32(reset_table[RESET_UCLENGTH + 4:]) != 0: + raise LitError("Reset table has 64bit value for UCLENGTH") + result = [] window_size = 14 @@ -712,13 +712,6 @@ class LitFile(object): raise LitError("Invalid window in ControlData") lzx.init(window_size) - reset_table = self.get_file('/'.join( - ['::DataSpace/Storage', name, 'Transform', - LZXCOMPRESS_GUID, 'InstanceData/ResetTable'])) - if len(reset_table) < (RESET_INTERVAL + 8): - raise LitError("Reset table is too short") - if u32(reset_table[RESET_UCLENGTH + 4:]) != 0: - raise LitError("Reset table has 64bit value for UCLENGTH") ofs_entry = int32(reset_table[RESET_HDRLEN:]) + 8 uclength = int32(reset_table[RESET_UCLENGTH:]) accum = int32(reset_table[RESET_INTERVAL:]) @@ -749,11 +742,38 @@ class LitFile(object): bytes_remaining = 0 if bytes_remaining > 0: raise LitError("Failed to completely decompress section") - return ''.join(result) - + return ''.join(result) + + def extract_content(self, output_dir=os.getcwdu()): + output_dir = os.path.abspath(output_dir) + try: + opf_path = os.path.splitext( + os.path.basename(self._stream.name))[0] + '.opf' + except AttributeError: + opf_path = 'content.opf' + opf_path = os.path.join(output_dir, opf_path) + self._ensure_dir(opf_path) + with open(opf_path, 'w') as f: + f.write(self.get_markup_file('/meta').encode('utf-8')) + for entry in self.manifest.values(): + path = os.path.join(output_dir, entry.path) + self._ensure_dir(path) + with open(path, 'w') as f: + if 'spine' in entry.state: + name = '/'.join(['/data', entry.internal, 'content']) + f.write(self.get_markup_file(name).encode('utf-8')) + else: + name = '/'.join(['/data', entry.internal]) + f.write(self.get_file(name)) + + def _ensure_dir(self, path): + dir = os.path.dirname(path) + if not os.path.isdir(dir): + os.makedirs(dir) + def get_metadata(stream): try: - litfile = LitFile(stream) + litfile = LitReader(stream) src = litfile.meta.encode('utf-8') mi = OPFReader(cStringIO.StringIO(src), dir=os.getcwd()) cover_url, cover_item = mi.cover, None @@ -775,16 +795,24 @@ def get_metadata(stream): mi = MetaInformation(title, ['Unknown']) return mi +def option_parser(): + from calibre import OptionParser + parser = OptionParser(usage=_('%prog [options] EBOOK')) + parser.add_option('-o', '--output-dir', default='.', + help=_('Output directory. Defaults to current directory.')) + parser.add_option('--verbose', default=False, action='store_true', + help='Useful for debugging.') + return parser + def main(args=sys.argv): + parser = option_parser() + opts, args = parser.parse_args(args) if len(args) != 2: - print >>sys.stderr, _('Usage: %s file.lit')%(args[0],) + parser.print_help() return 1 - mi = get_metadata(open(args[1], 'rb')) - print unicode(mi) - if mi.cover_data[1]: - cover = os.path.abspath(os.path.splitext(os.path.basename(args[1]))[0] + '.' + mi.cover_data[0]) - open(cover, 'wb').write(mi.cover_data[1]) - print _('Cover saved to'), cover + lr = LitReader(args[1]) + lr.extract_content(opts.output_dir) + print _('OEB ebook created in'), opts.output_dir return 0 if __name__ == '__main__': From 731631a7d7dcc4662100caea66f841b32fae6fa6 Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Sat, 19 Jul 2008 16:45:41 -0400 Subject: [PATCH 15/44] Added a few speed-ups to the DES code --- src/calibre/ebooks/lit/msdes.py | 38 ++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/src/calibre/ebooks/lit/msdes.py b/src/calibre/ebooks/lit/msdes.py index 5bc67b09bb..de980f8c3d 100644 --- a/src/calibre/ebooks/lit/msdes.py +++ b/src/calibre/ebooks/lit/msdes.py @@ -1,6 +1,7 @@ # Re-modified for use in MS LIT decryption. Un-reversed the bytebit[] array. -# Substituted Microsoft's absurd modified S-boxes. Modified the encrypt/decrypt -# methods to handle more than one block at a time. +# Substituted Microsoft's absurd modified S-boxes. Modified the +# encrypt/decrypt methods to handle more than one block at a time. Added a few +# speed-ups supported by modern versions of Python. Added option 'psyco' use. # # And lo, all the previous notices follow: @@ -125,30 +126,30 @@ class DesCipher: pcr = [0]*56 #new int[56]; kn = [0]*32 #new int[32]; - for j in range(56): + for j in xrange(56): l = pc1[j] m = l & 07 pc1m[j] = ((keyBlock[l >> 3] & bytebit[m]) != 0) - for i in range(16): + for i in xrange(16): if encrypting: m = i << 1 else: m = (15-i) << 1 n = m + 1 kn[m] = kn[n] = 0 - for j in range(28): + for j in xrange(28): l = j + totrot[i] if l < 28: pcr[j] = pc1m[l] else: pcr[j] = pc1m[l - 28] - for j in range(28, 56): + for j in xrange(28, 56): l = j + totrot[i] if l < 56: pcr[j] = pc1m[l] else: pcr[j] = pc1m[l - 28] - for j in range(24): + for j in xrange(24): if pcr[pc2[j]] != 0: kn[m] |= bigbyte[j] if pcr[pc2[j+24]] != 0: @@ -163,7 +164,7 @@ class DesCipher: rawi = 0 KnLi = 0 - for i in range(16): + for i in xrange(16): raw0 = raw[rawi] rawi += 1 raw1 = raw[rawi] @@ -187,11 +188,10 @@ class DesCipher: if len(clearText) % 8 != 0: raise TypeError, "length must be multiple of block size" result = [] - while clearText: + for base in xrange(0, len(clearText), 8): result.append(struct.pack( - ">LL", *self.des(struct.unpack(">LL", clearText[:8]), + ">LL", *self.des(struct.unpack(">LL", clearText[base:base+8]), self.encryptKeys))) - clearText = clearText[8:] return ''.join(result) #/ Decrypt a block of eight bytes. @@ -199,11 +199,10 @@ class DesCipher: if len(cipherText) % 8 != 0: raise TypeError, "length must be multiple of block size" result = [] - while cipherText: + for base in xrange(0, len(cipherText), 8): result.append(struct.pack( - ">LL", *self.des(struct.unpack(">LL", cipherText[:8]), + ">LL", *self.des(struct.unpack(">LL", cipherText[base:base+8]), self.decryptKeys))) - cipherText = cipherText[8:] return ''.join(result) # The DES function. @@ -234,7 +233,7 @@ class DesCipher: right ^= work leftt = ((leftt << 1) | ((leftt >> 31) & 1)) & 0xffffffffL - for round in range(8): + for round in xrange(8): work = ((right << 28) | (right >> 4)) & 0xffffffffL work ^= keys[keysi] keysi += 1 @@ -322,6 +321,7 @@ pc2 = [ 45, 41, 49, 35, 28, 31, ] +# Microsoft's modified S-boxes for LIT file encryption SP1 = [ 0x02080800L, 0x00080000L, 0x02000002L, 0x02080802L, 0x02000000L, 0x00080802L, 0x00080002L, 0x02000002L, @@ -473,6 +473,14 @@ def new(key): block_size = 8 key_size = 8 +try: + import psyco + psyco.bind(DesCipher.deskey) + psyco.bind(DesCipher.cookey) + psyco.bind(DesCipher.des) +except ImportError: + pass + #test only: if __name__ == '__main__': des = DesCipher("\x01\x23\x45\x67\x89\xab\xcd\xef") From fb4f2f3a81c3e27837cfb47697f00cbafeac07ee Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Sat, 19 Jul 2008 16:47:14 -0400 Subject: [PATCH 16/44] Added comments for LIT-specific SHA-1 changes. --- src/calibre/ebooks/lit/mssha1.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/src/calibre/ebooks/lit/mssha1.py b/src/calibre/ebooks/lit/mssha1.py index d61bd39094..1708c8dd8b 100644 --- a/src/calibre/ebooks/lit/mssha1.py +++ b/src/calibre/ebooks/lit/mssha1.py @@ -1,21 +1,11 @@ -#!/usr/bin/env python -# -*- coding: iso-8859-1 +""" +Modified version of SHA-1 used in Microsoft LIT files. -"""A sample implementation of SHA-1 in pure Python. - - Framework adapted from Dinu Gherman's MD5 implementation by - J. Hallén and L. Creighton. SHA-1 implementation based directly on - the text of the NIST standard FIPS PUB 180-1. +Adapted from the PyPy pure-Python SHA-1 implementation. """ - -__date__ = '2004-11-17' -__version__ = 0.91 # Modernised by J. Hallén and L. Creighton for Pypy - - import struct, copy - # ====================================================================== # Bit-Manipulation helpers # @@ -100,10 +90,13 @@ def f40_59(B, C, D): def f60_79(B, C, D): return B ^ C ^ D +# Microsoft's lovely addition... def f6_42(B, C, D): return (B + C) ^ C f = [f0_19]*20 + [f20_39]*20 + [f40_59]*20 + [f60_79]*20 + +# ...and delightful changes f[3] = f20_39 f[6] = f6_42 f[10] = f20_39 @@ -148,6 +141,7 @@ class mssha1(object): self.input = [] # Initial 160 bit message digest (5 times 32 bit). + # Also changed by Microsoft from standard. self.H0 = 0x32107654L self.H1 = 0x23016745L self.H2 = 0xC4E680A2L From 6b18c8b745cf6be6dbc463d7032942a375a2d61a Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Sat, 19 Jul 2008 16:47:46 -0400 Subject: [PATCH 17/44] Added "lit2oeb" to set of command-line tools --- src/calibre/linux.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/calibre/linux.py b/src/calibre/linux.py index 5d3ead778e..4d7ff9c8aa 100644 --- a/src/calibre/linux.py +++ b/src/calibre/linux.py @@ -46,6 +46,7 @@ entry_points = { 'librarything = calibre.ebooks.metadata.library_thing:main', 'mobi2oeb = calibre.ebooks.mobi.reader:main', 'lrf2html = calibre.ebooks.lrf.html.convert_to:main', + 'lit2oeb = calibre.ebooks.lit.reader:main', 'calibre-debug = calibre.debug:main', 'calibredb = calibre.library.cli:main', 'calibre-fontconfig = calibre.utils.fontconfig:main', From 006182e5f46ea8f5da43607ab530dff752a12d94 Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Sat, 19 Jul 2008 16:50:14 -0400 Subject: [PATCH 18/44] Fixed bug in directory processing and re-named methods to reflect public/private status. --- src/calibre/ebooks/lit/reader.py | 113 ++++++++++++++----------------- 1 file changed, 51 insertions(+), 62 deletions(-) diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py index afe5d96297..0fed4aacbc 100644 --- a/src/calibre/ebooks/lit/reader.py +++ b/src/calibre/ebooks/lit/reader.py @@ -19,13 +19,13 @@ import calibre.ebooks.lit.mssha1 as mssha1 import calibre.ebooks.lit.msdes as msdes import calibre.utils.lzx as lzx -OPF_DECL = """" +OPF_DECL = """ """ HTML_DECL = """ - """ @@ -421,8 +421,13 @@ class LitReader(object): raise LitError('Not a valid LIT file') if self.version != 1: raise LitError('Unknown LIT version %d'%(self.version,)) - self.read_secondary_header() - self.read_header_pieces() + self.entries = {} + self._read_secondary_header() + self._read_header_pieces() + self._read_section_names() + self._read_manifest() + self._read_meta() + self._read_drm() @preserve def __len__(self): @@ -437,10 +442,9 @@ class LitReader(object): def _read_content(self, offset, size): return self._read_raw(self.content_offset + offset, size) - @preserve - def read_secondary_header(self): - self._stream.seek(self.hdr_len + self.num_pieces*self.PIECE_SIZE) - bytes = self._stream.read(self.sec_hdr_len) + def _read_secondary_header(self): + offset = self.hdr_len + (self.num_pieces * self.PIECE_SIZE) + bytes = self._read_raw(offset, self.sec_hdr_len) offset = int32(bytes[4:]) while offset < len(bytes): blocktype = bytes[offset:offset+4] @@ -468,23 +472,21 @@ class LitReader(object): if not hasattr(self, 'content_offset'): raise LitError('Could not figure out the content offset') - @preserve - def read_header_pieces(self): + def _read_header_pieces(self): src = self.header[self.hdr_len:] for i in range(self.num_pieces): piece = src[i * self.PIECE_SIZE:(i + 1) * self.PIECE_SIZE] if u32(piece[4:]) != 0 or u32(piece[12:]) != 0: raise LitError('Piece %s has 64bit value' % repr(piece)) offset, size = u32(piece), int32(piece[8:]) - self._stream.seek(offset) - piece = self._stream.read(size) + piece = self._read_raw(offset, size) if i == 0: continue # Dont need this piece elif i == 1: if u32(piece[8:]) != self.entry_chunklen or \ u32(piece[12:]) != self.entry_unknown: raise LitError('Secondary header does not match piece') - self.read_directory(piece) + self._read_directory(piece) elif i == 2: if u32(piece[8:]) != self.count_chunklen or \ u32(piece[12:]) != self.count_unknown: @@ -495,58 +497,44 @@ class LitReader(object): elif i == 4: self.piece4_guid = piece - def read_directory(self, piece): - self.entries = {} + def _read_directory(self, piece): if not piece.startswith('IFCM'): raise LitError('Header piece #1 is not main directory.') chunk_size, num_chunks = int32(piece[8:12]), int32(piece[24:28]) - if (32 + chunk_size * num_chunks) != len(piece): + if (32 + (num_chunks * chunk_size)) != len(piece): raise LitError('IFCM HEADER has incorrect length') - for chunk in range(num_chunks): - p = 32 + chunk * chunk_size - if piece[p:p+4] != 'AOLL': - continue - remaining = chunk_size - int32(piece[p+4:p+8]) - 48 - if remaining < 0: + for i in xrange(num_chunks): + offset = 32 + (i * chunk_size) + chunk = piece[offset:offset + chunk_size] + tag, chunk = chunk[:4], chunk[4:] + if tag != 'AOLL': continue + remaining, chunk = int32(chunk[:4]), chunk[4:] + if remaining >= chunk_size: raise LitError('AOLL remaining count is negative') - entries = u16(piece[p+chunk_size-2:]) - if entries <= 0: - # Hopefully everything will work even without a correct entries - # count + remaining = chunk_size - (remaining + 48) + entries = u16(chunk[-2:]) + if entries == 0: + # Hopefully will work even without a correct entries count entries = (2 ** 16) - 1 - piece = piece[p+48:] - i = 0 - while i < entries: + chunk = chunk[40:] + for j in xrange(entries): if remaining <= 0: break - namelen, piece, remaining = encint(piece, remaining) + namelen, chunk, remaining = encint(chunk, remaining) if namelen != (namelen & 0x7fffffff): raise LitError('Directory entry had 64bit name length.') if namelen > remaining - 3: raise LitError('Read past end of directory chunk') - name = piece[:namelen] - piece = piece[namelen:] - section, piece, remaining = encint(piece, remaining) - offset, piece, remaining = encint(piece, remaining) - size, piece, remaining = encint(piece, remaining) - + name, chunk = chunk[:namelen], chunk[namelen:] + section, chunk, remaining = encint(chunk, remaining) + offset, chunk, remaining = encint(chunk, remaining) + size, chunk, remaining = encint(chunk, remaining) entry = DirectoryEntry(name, section, offset, size) - - if name == '::DataSpace/NameList': - self.read_section_names(entry) - elif name == '/manifest': - self.read_manifest(entry) - elif name == '/meta': - self.read_meta(entry) self.entries[name] = entry - i += 1 - if not hasattr(self, 'section_names'): - raise LitError('Lit file does not have a valid NameList') - if not hasattr(self, 'manifest'): - raise LitError('Lit file does not have a valid manifest') - self.read_drm() - def read_section_names(self, entry): - raw = self._read_content(entry.offset, entry.size) + def _read_section_names(self): + if '::DataSpace/NameList' not in self.entries: + raise LitError('Lit file does not have a valid NameList') + raw = self.get_file('::DataSpace/NameList') if len(raw) < 4: raise LitError('Invalid Namelist section') pos = 4 @@ -563,9 +551,11 @@ class LitReader(object): raw[pos:pos+size].decode('utf-16-le').rstrip('\000') pos += size - def read_manifest(self, entry): + def _read_manifest(self): + if '/manifest' not in self.entries: + raise LitError('Lit file does not have a valid manifest') + raw = self.get_file('/manifest') self.manifest = {} - raw = self._read_content(entry.offset, entry.size) while raw: slen, raw = ord(raw[0]), raw[1:] if slen == 0: break @@ -600,12 +590,12 @@ class LitReader(object): for item in mlist: item.path = item.path[slen:] - def read_meta(self, entry): - raw = self._read_content(entry.offset, entry.size) + def _read_meta(self): + raw = self.get_file('/meta') xml = OPF_DECL + unicode(UnBinary(raw, self.manifest, OPF_MAP)) self.meta = xml - def read_drm(self): + def _read_drm(self): self.drmlevel = 0 if '/DRMStorage/Licenses/EUL' in self.entries: self.drmlevel = 5 @@ -615,13 +605,13 @@ class LitReader(object): self.drmlevel = 1 else: return - des = msdes.new(self.calculate_deskey()) + des = msdes.new(self._calculate_deskey()) bookkey = des.decrypt(self.get_file('/DRMStorage/DRMSealed')) if bookkey[0] != '\000': raise LitError('Unable to decrypt title key!') self.bookkey = bookkey[1:9] - def calculate_deskey(self): + def _calculate_deskey(self): hashfiles = ['/meta', '/DRMStorage/DRMSource'] if self.drmlevel == 3: hashfiles.append('/DRMStorage/DRMBookplate') @@ -726,19 +716,18 @@ class LitReader(object): u = int32(reset_table[ofs_entry + 4:]) if u != 0: raise LitError("Reset table entry greater than 32 bits") - if size >= (len(content) + base): + if size >= len(content): raise("Reset table entry out of bounds") if bytes_remaining >= window_bytes: lzx.reset() - result.append(lzx.decompress(content, window_bytes)) + result.append(lzx.decompress(content[base:size], window_bytes)) bytes_remaining -= window_bytes - content = content[size - base:] base = size accum += int32(reset_table[RESET_INTERVAL:]) ofs_entry += 8 if bytes_remaining < window_bytes and bytes_remaining > 0: lzx.reset() - result.append(lzx.decompress(content, bytes_remaining)) + result.append(lzx.decompress(content[base:], bytes_remaining)) bytes_remaining = 0 if bytes_remaining > 0: raise LitError("Failed to completely decompress section") From a349d763791c48d47cea1f32778b244aef794b5c Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Sat, 19 Jul 2008 18:24:59 -0400 Subject: [PATCH 19/44] Various encoding fix-ups. Fix for broken file(s?) from Penguin. --- src/calibre/ebooks/lit/reader.py | 40 ++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py index 0fed4aacbc..66d6fe9385 100644 --- a/src/calibre/ebooks/lit/reader.py +++ b/src/calibre/ebooks/lit/reader.py @@ -8,6 +8,7 @@ __copyright__ = '2008, Kovid Goyal ' import sys, struct, cStringIO, os import functools +import codecs from itertools import repeat from calibre import relpath @@ -33,7 +34,6 @@ HTML_DECL = """ DESENCRYPT_GUID = "{67F6E4A2-60BF-11D3-8540-00C04F58C3CF}" LZXCOMPRESS_GUID = "{0A9007C6-4076-11D3-8789-0000F8105754}" -LZXC_TAG = 0x43585a4c CONTROL_TAG = 4 CONTROL_WINDOW_SIZE = 12 RESET_NENTRIES = 4 @@ -41,11 +41,11 @@ RESET_HDRLEN = 12 RESET_UCLENGTH = 16 RESET_INTERVAL = 32 -FLAG_OPENING = 1 -FLAG_CLOSING = 2 -FLAG_BLOCK = 4 -FLAG_HEAD = 8 -FLAG_ATOM = 16 +FLAG_OPENING = (1 << 0) +FLAG_CLOSING = (1 << 1) +FLAG_BLOCK = (1 << 2) +FLAG_HEAD = (1 << 3) +FLAG_ATOM = (1 << 4) XML_ENTITIES = ['&', ''', '<', '>', '"'] def u32(bytes): @@ -202,7 +202,7 @@ class UnBinary(object): is_goingdown = False if not tag_name: raise LitError('Tag ends before it begins.') - self.buf.write('') + self.buf.write(u''.join(('')).encode('utf-8')) dynamic_tag = 0 tag_name = None state = 'text' @@ -252,7 +252,7 @@ class UnBinary(object): state = 'get attr' elif count > 0: if not in_censorship: - self.buf.write(c) + self.buf.write(unicode(c).encode('utf-8')) count -= 1 if count == 0: if not in_censorship: @@ -272,7 +272,7 @@ class UnBinary(object): tag_name += c count -= 1 if count == 0: - self.buf.write(tag_name) + self.buf.write(unicode(tag_name).encode('utf-8')) state = 'get attr' elif state == 'get attr length': @@ -283,7 +283,7 @@ class UnBinary(object): state = 'get custom attr' elif state == 'get custom attr': - self.buf.write(c) + self.buf.write(unicode(c).encode('utf-8')) count -= 1 if count == 0: self.buf.write('=') @@ -592,7 +592,13 @@ class LitReader(object): def _read_meta(self): raw = self.get_file('/meta') - xml = OPF_DECL + unicode(UnBinary(raw, self.manifest, OPF_MAP)) + try: + xml = OPF_DECL + unicode(UnBinary(raw, self.manifest, OPF_MAP)) + except LitError: + if 'PENGUIN group' not in raw: raise + print "WARNING: attempting PENGUIN malformed OPF fix" + raw = raw.replace('PENGUIN group', '\x00\x01\x18\x00PENGUIN group', 1) + xml = OPF_DECL + unicode(UnBinary(raw, self.manifest, OPF_MAP)) self.meta = xml def _read_drm(self): @@ -669,8 +675,8 @@ class LitReader(object): control = control[csize:] elif guid == LZXCOMPRESS_GUID: reset_table = self.get_file( - '/'.join(['::DataSpace/Storage', name, 'Transform', - LZXCOMPRESS_GUID, 'InstanceData/ResetTable'])) + '/'.join(('::DataSpace/Storage', name, 'Transform', + LZXCOMPRESS_GUID, 'InstanceData/ResetTable'))) content = self._decompress(content, control, reset_table) control = control[csize:] else: @@ -684,7 +690,7 @@ class LitReader(object): return msdes.new(self.bookkey).decrypt(content) def _decompress(self, content, control, reset_table): - if len(control) < 32 or u32(control[CONTROL_TAG:]) != LZXC_TAG: + if len(control) < 32 or control[CONTROL_TAG:CONTROL_TAG+4] != "LZXC": raise LitError("Invalid ControlData tag value") if len(reset_table) < (RESET_INTERVAL + 8): raise LitError("Reset table is too short") @@ -743,16 +749,16 @@ class LitReader(object): opf_path = os.path.join(output_dir, opf_path) self._ensure_dir(opf_path) with open(opf_path, 'w') as f: - f.write(self.get_markup_file('/meta').encode('utf-8')) + f.write(self.meta.encode('utf-8')) for entry in self.manifest.values(): path = os.path.join(output_dir, entry.path) self._ensure_dir(path) with open(path, 'w') as f: if 'spine' in entry.state: - name = '/'.join(['/data', entry.internal, 'content']) + name = '/'.join(('/data', entry.internal, 'content')) f.write(self.get_markup_file(name).encode('utf-8')) else: - name = '/'.join(['/data', entry.internal]) + name = '/'.join(('/data', entry.internal)) f.write(self.get_file(name)) def _ensure_dir(self, path): From d77e8ba4a2cdb5a0ce225a308beeacf9400b7049 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 19 Jul 2008 18:26:15 -0700 Subject: [PATCH 20/44] version 0.4.78 --- src/calibre/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index 5c55632da9..2d058fa221 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -1,7 +1,7 @@ ''' E-book management software''' __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' -__version__ = '0.4.77' +__version__ = '0.4.78' __docformat__ = "epytext" __author__ = "Kovid Goyal " __appname__ = 'calibre' From 279a8e60675ace1381e948b6548e7693e0dc01f1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 19 Jul 2008 18:29:46 -0700 Subject: [PATCH 21/44] IGN:Tag release From 5a5e63499ef1e64b919401d6ea12dc7f16ab1d0b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 19 Jul 2008 20:23:46 -0700 Subject: [PATCH 22/44] Fix bug in html2lrf that was causing problems with paragraghs inside tags. --- src/calibre/ebooks/lrf/html/convert_from.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/lrf/html/convert_from.py b/src/calibre/ebooks/lrf/html/convert_from.py index e602a61156..e1014d69dd 100644 --- a/src/calibre/ebooks/lrf/html/convert_from.py +++ b/src/calibre/ebooks/lrf/html/convert_from.py @@ -92,8 +92,8 @@ class HTMLConverter(object, LoggingInterface): # sometimes there are unterminated comments (re.compile(r"<\s*style.*?>(.*?)<\/\s*style\s*>", re.DOTALL|re.IGNORECASE), lambda match: match.group().replace('', '')), - # remove

tags from within tags - (re.compile(r'%(open)s(.*?)%(close)s'%tag_regex('a'), re.DOTALL|re.IGNORECASE), + # remove

tags from within tags + (re.compile(r'<\s*a\s+[^<>]*href\s*=[^<>]*>(.*?)<\s*/\s*a\s*>', re.DOTALL|re.IGNORECASE), lambda match: re.compile(r'%(open)s|%(close)s'%tag_regex('p'), re.IGNORECASE).sub('', match.group())), # Replace common line break patterns with line breaks From 015ca663506ee5fa930adbbcaa294e847a4ae2d8 Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Sun, 20 Jul 2008 00:20:18 -0400 Subject: [PATCH 23/44] Added various copyright headers and doc strings --- src/calibre/ebooks/lit/maps/__init__.py | 7 +++++++ src/calibre/ebooks/lit/maps/html.py | 7 +++++++ src/calibre/ebooks/lit/maps/opf.py | 7 +++++++ src/calibre/ebooks/lit/reader.py | 3 ++- src/calibre/utils/lzx/lzxmodule.c | 7 +++++++ 5 files changed, 30 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/lit/maps/__init__.py b/src/calibre/ebooks/lit/maps/__init__.py index 2abab3efe9..2235c384ff 100644 --- a/src/calibre/ebooks/lit/maps/__init__.py +++ b/src/calibre/ebooks/lit/maps/__init__.py @@ -1,2 +1,9 @@ +__license__ = 'GPL v3' +__copyright__ = '2008, Marshall T. Vandegrift ' + +""" +Microsoft LIT tag and attribute tables. +""" + from calibre.ebooks.lit.maps.opf import MAP as OPF_MAP from calibre.ebooks.lit.maps.html import MAP as HTML_MAP diff --git a/src/calibre/ebooks/lit/maps/html.py b/src/calibre/ebooks/lit/maps/html.py index de0286c764..c0b9987f32 100644 --- a/src/calibre/ebooks/lit/maps/html.py +++ b/src/calibre/ebooks/lit/maps/html.py @@ -1,3 +1,10 @@ +__license__ = 'GPL v3' +__copyright__ = '2008, Marshall T. Vandegrift ' + +""" +Microsoft LIT HTML tag and attribute tables, copied from ConvertLIT. +""" + TAGS = [ None, None, diff --git a/src/calibre/ebooks/lit/maps/opf.py b/src/calibre/ebooks/lit/maps/opf.py index cc1acc4dfa..f3bb7dcb89 100644 --- a/src/calibre/ebooks/lit/maps/opf.py +++ b/src/calibre/ebooks/lit/maps/opf.py @@ -1,3 +1,10 @@ +__license__ = 'GPL v3' +__copyright__ = '2008, Marshall T. Vandegrift ' + +""" +Microsoft LIT OPF tag and attribute tables, copied from ConvertLIT. +""" + TAGS = [ None, "package", diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py index 66d6fe9385..c53f266e79 100644 --- a/src/calibre/ebooks/lit/reader.py +++ b/src/calibre/ebooks/lit/reader.py @@ -4,7 +4,8 @@ Support for reading LIT files. from __future__ import with_statement __license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal ' +__copyright__ = '2008, Kovid Goyal ' \ + 'and Marshall T. Vandegrift ' import sys, struct, cStringIO, os import functools diff --git a/src/calibre/utils/lzx/lzxmodule.c b/src/calibre/utils/lzx/lzxmodule.c index bf8a48a056..a1917b5749 100644 --- a/src/calibre/utils/lzx/lzxmodule.c +++ b/src/calibre/utils/lzx/lzxmodule.c @@ -1,3 +1,10 @@ +/* __license__ = 'GPL v3' + * __copyright__ = '2008, Marshall T. Vandegrift ' + * + * Python module C glue code. + */ + + #include #include From 87ae95cc7a1caeb2f20236db2df4b124fb99cc18 Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Sun, 20 Jul 2008 00:40:41 -0400 Subject: [PATCH 24/44] Removed duplicate LIT-parsing code. --- src/calibre/ebooks/lit/reader.py | 26 +- src/calibre/ebooks/metadata/lit.py | 737 +---------------------------- 2 files changed, 16 insertions(+), 747 deletions(-) diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py index c53f266e79..65fce4f3e9 100644 --- a/src/calibre/ebooks/lit/reader.py +++ b/src/calibre/ebooks/lit/reader.py @@ -767,33 +767,9 @@ class LitReader(object): if not os.path.isdir(dir): os.makedirs(dir) -def get_metadata(stream): - try: - litfile = LitReader(stream) - src = litfile.meta.encode('utf-8') - mi = OPFReader(cStringIO.StringIO(src), dir=os.getcwd()) - cover_url, cover_item = mi.cover, None - if cover_url: - cover_url = relpath(cover_url, os.getcwd()) - for item in litfile.manifest.values(): - if item.path == cover_url: - cover_item = item.internal - if cover_item is not None: - ext = cover_url.rpartition('.')[-1] - if not ext: - ext = 'jpg' - else: - ext = ext.lower() - cd = litfile.get_file(cover_item) - mi.cover_data = (ext, cd) if cd else (None, None) - except: - title = stream.name if hasattr(stream, 'name') and stream.name else 'Unknown' - mi = MetaInformation(title, ['Unknown']) - return mi - def option_parser(): from calibre import OptionParser - parser = OptionParser(usage=_('%prog [options] EBOOK')) + parser = OptionParser(usage=_('%prog [options] LITFILE')) parser.add_option('-o', '--output-dir', default='.', help=_('Output directory. Defaults to current directory.')) parser.add_option('--verbose', default=False, action='store_true', diff --git a/src/calibre/ebooks/metadata/lit.py b/src/calibre/ebooks/metadata/lit.py index 2b8c3a4b9f..825fe45cf4 100644 --- a/src/calibre/ebooks/metadata/lit.py +++ b/src/calibre/ebooks/metadata/lit.py @@ -1,734 +1,25 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' ''' -Support for reading the metadata from a lit file. +Support for reading the metadata from a LIT file. ''' -import sys, struct, cStringIO, os -from itertools import repeat +import sys, cStringIO, os from calibre import relpath from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata.opf import OPFReader - -OPF_ATTR_MAP = [ - None, - "href", - "%never-used", - "%guid", - "%minimum_level", - "%attr5", - "id", - "href", - "media-type", - "fallback", - "idref", - "xmlns:dc", - "xmlns:oebpackage", - "role", - "file-as", - "event", - "scheme", - "title", - "type", - "unique-identifier", - "name", - "content", - "xml:lang", - ] - -OPF_TAG_MAP = [ - None, - "package", - "dc:Title", - "dc:Creator", - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - "manifest", - "item", - "spine", - "itemref", - "metadata", - "dc-metadata", - "dc:Subject", - "dc:Description", - "dc:Publisher", - "dc:Contributor", - "dc:Date", - "dc:Type", - "dc:Format", - "dc:Identifier", - "dc:Source", - "dc:Language", - "dc:Relation", - "dc:Coverage", - "dc:Rights", - "x-metadata", - "meta", - "tours", - "tour", - "site", - "guide", - "reference", - None, - ] - -class DirectoryEntry(object): - def __init__(self, name, section, offset, size): - self.name = name - self.section = section - self.offset = offset - self.size = size - - def __repr__(self): - return '%s\n\tSection: %d\n\tOffset: %d\n\tSize: %d\n'%(self.name, - self.section, self.offset, self.size) - - def __str__(self): - return repr(self) - -class LitReadError(Exception): - pass - -def u32(bytes): - b = struct.unpack('BBBB', bytes[:4]) - return b[0] + (b[1] << 8) + (b[2] << 16) + (b[3] << 32) - -def u16(bytes): - b = struct.unpack('BB', bytes[:2]) - return b[0] + (b[1] << 8) - -def int32(bytes): - return u32(bytes)&0x7FFFFFFF - -def encint(bytes, remaining): - pos, val = 0, 0 - while remaining > 0: - b = ord(bytes[pos]) - pos += 1 - remaining -= 1 - val <<= 7 - val |= (b & 0x7f) - if b & 0x80 == 0: break - return val, bytes[pos:], remaining - -def read_utf8_char(bytes, pos): - c = ord(bytes[pos]) - mask = 0x80 - if (c & mask): - elsize = 0 - while c & mask: - mask >>= 1 - elsize += 1 - if (mask <= 1) or (mask == 0x40): - raise LitReadError('Invalid UTF8 character: %s'%(repr(bytes[pos]))) - else: - elsize = 1 - - - if elsize > 1: - if elsize + pos > len(bytes): - raise LitReadError('Invalid UTF8 character: %s'%(repr(bytes[pos]))) - c &= (mask - 1) - for i in range(1, elsize): - b = ord(bytes[pos+i]) - if (b & 0xC0) != 0x80: - raise LitReadError('Invalid UTF8 character: %s'%(repr(bytes[pos:pos+i]))) - c = (c << 6) | (b & 0x3F) - return unichr(c), pos+elsize - -FLAG_OPENING = 1 -FLAG_CLOSING = 2 -FLAG_BLOCK = 4 -FLAG_HEAD = 8 -FLAG_ATOM = 16 -XML_ENTITIES = ['&', ''', '<', '>', '"'] - -class UnBinary(object): - def __init__(self, bin, manifest, attr_map=OPF_ATTR_MAP, tag_map=OPF_TAG_MAP, - tag_to_attr_map=[[] for i in range(43)]): - self.manifest = manifest - self.pending_indent = 0 - self.lingering_space = 0 - self.was_in_text = 0 - self.attr_map = attr_map - self.tag_map = tag_map - self.tag_to_attr_map = tag_to_attr_map - self.opf = self.attr_map is OPF_ATTR_MAP - self.bin = bin - self.buf = cStringIO.StringIO() - self.ampersands = [] - self.binary_to_text() - self.raw = self.buf.getvalue().lstrip().decode('utf-8') - self.escape_ampersands() - - def escape_ampersands(self): - offset = 0 - for pos in self.ampersands: - test = self.raw[pos+offset:pos+offset+6] - if test.startswith('&#') and ';' in test: - continue - escape = True - for ent in XML_ENTITIES: - if test.startswith(ent): - escape = False - break - if not escape: - continue - self.raw = self.raw[:pos+offset] + '&' + self.raw[pos+offset+1:] - offset += 4 - - - def write_spaces(self, depth): - self.buf.write(u''.join(repeat(' ', depth))) - - def item_path(self, internal_id): - for i in self.manifest: - if i == internal_id: - return i.path - raise LitReadError('Could not find item %s'%(internal_id,)) - - def __unicode__(self): - return self.raw - - def binary_to_text(self, base=0, depth=0): - space_enabled, saved_space_enabled = 1, 0 - was_indented, is_goingdown = 0, 0 - tag_name = current_map = None - dynamic_tag = errors = in_censorship = 0 - - state = 'text' - index = base - flags = 0 - - while index < len(self.bin): - c, index = read_utf8_char(self.bin, index) - if state == 'text': - if ord(c) == 0: - state = 'get flags' - continue - if (not self.was_in_text) or space_enabled: - space_enabled = 0; - if c in (' ', '\t', '\n', '\r'): - space_enabled += 1 - else: - self.was_in_text = 1 - if c == '\v': - c = '\n' - pending_indent = 0 - if c == '&': - self.ampersands.append(self.buf.tell()-1) - self.buf.write(c.encode('utf-8') if isinstance(c, unicode) else c) - elif state == 'get flags': - if ord(c) == 0: - state = 'text' - continue - flags = ord(c) - state = 'get tag' - elif state == 'get tag': - state = 'text' if ord(c) == 0 else 'get attr' - if flags & FLAG_OPENING: - if space_enabled and ((not self.was_in_text) or (flags &(FLAG_BLOCK|FLAG_HEAD))): - self.pending_indent += 1 - if self.pending_indent or self.opf: - was_indented += 1 - self.buf.write(u'\n') - self.write_spaces(depth) - pending_indent = 0 - if (flags & FLAG_HEAD) or (flags & FLAG_BLOCK) or \ - self.opf or depth == 0: - pending_indent = 1 - tag = ord(c) - self.buf.write('<') - if not (flags & FLAG_CLOSING): - is_goingdown = 1 - if tag == 0x8000: - state = 'get custom length' - continue - if flags & FLAG_ATOM: - raise LitReadError('TODO: Atoms not yet implemented') - elif tag < len(self.tag_map): - tag_name = self.tag_map[tag] - current_map = self.tag_to_attr_map[tag] - else: - dynamic_tag += 1 - errors += 1 - tag_name = '?'+unichr(tag)+'?' - current_map = self.tag_to_attr_map[tag] - print 'WARNING: tag %s unknown'%(unichr(tag),) - - self.buf.write(unicode(tag_name).encode('utf-8')) - elif flags & FLAG_CLOSING: - #if depth == 0: - # raise LitReadError('Extra closing tag') - self.lingering_space = space_enabled - return index - elif state == 'get attr': - in_censorship = 0 - if ord(c) == 0: - if not is_goingdown: - tag_name = None - dynamic_tag = 0 - self.buf.write(' />') - else: - self.buf.write('>') - if not self.opf and (flags & (FLAG_BLOCK|FLAG_HEAD)): - pending_indent += 1 - index = self.binary_to_text(base=index, depth=depth+1) - is_goingdown = 0 - if not tag_name: - raise LitReadError('Tag ends before it begins.') - saved_space_enabled = space_enabled - space_enabled = self.lingering_space - if space_enabled and was_indented and not self.was_in_text: - self.buf.write('\n') - self.write_spaces(depth) - self.buf.write('') - if (space_enabled and self.opf) or (flags & (FLAG_BLOCK|FLAG_HEAD)): - self.pending_indent += 1 - dynamic_tag = 0 - tag_name = None - space_enabled = saved_space_enabled - - self.was_in_text = 0 - state = 'text' - else: - if ord(c) == 0x8000: - state = 'get attr length' - continue - attr = None - if ord(c) < len(current_map) and current_map[ord(c)]: - attr = current_map[ord(c)] - elif ord(c) < len(self.attr_map): - attr = self.attr_map[ord(c)] - - if not attr or not isinstance(attr, basestring): - raise LitReadError('Unknown attribute %d in tag %s'%(ord(c), tag_name)) - - if attr.startswith('%'): - in_censorship = 1 - state = 'get value length' - continue - - self.buf.write(' ' + unicode(attr).encode('utf-8') + '=') - if attr in ['href', 'src']: - state = 'get href' - else: - state = 'get value length' - elif state == 'get value length': - if not in_censorship: - self.buf.write('"') - char_count = ord(c) - 1 - if not char_count: - if not in_censorship: - self.buf.write('"') - in_censorship = 0 - state = 'get attr' - state = 'get value' - if ord(c) == 0xffff: - continue - if char_count < 0 or char_count > len(self.bin)-index: - raise LitReadError('Invalid character count %d'%(char_count,)) - elif state == 'get value': - if char_count == 0xfffe: - if not in_censorship: - self.buf.write(str(ord(c)-1)) - in_censorship = 0 - state = 'get attr' - elif char_count: - if not in_censorship: - self.buf.write(c) - char_count -= 1 - if not char_count: - if not in_censorship: - self.buf.write('"') - in_censorship = 0 - state = 'get attr' - elif state == 'get custom length': - char_count = ord(c) - 1 - if char_count <= 0 or char_count > len(self.bin)-index: - raise LitReadError('Invalid character count %d'%(char_count,)) - dynamic_tag += 1 - state = 'get custom' - tag_name = '' - elif state == 'get custom': - tag += c - char_count -= 1 - if not char_count: - self.buf.write(tag_name) - state = 'get attr' - elif state == 'get attr length': - char_count = ord(c) - 1 - if char_count <= 0 or char_count > len(self.bin)-index: - raise LitReadError('Invalid character count %d'%(char_count,)) - self.buf.write(' ') - state = 'get custom attr' - elif state == 'get custom attr': - self.buf.write(c) - char_count -= 1 - if not char_count: - self.buf.write('=') - state = 'get value length' - elif state == 'get href': - char_count = ord(c) - 1 - if char_count <= 0: - raise LitReadError('Invalid character count %d'%(char_count,)) - href = self.bin[index+1:index+char_count].decode('ascii') - index += char_count - doc, m, frag = href.partition('#') - path = self.item_path(doc) - if m and frag: - path += m+frag - self.buf.write((u'"%s"'%(path,)).encode('utf-8')) - state = 'get attr' - - self.lingering_space = space_enabled - return index - -class ManifestItem(object): - - def __init__(self, original, internal, mime_type, offset, root, state): - self.original = original - self.internal = internal - self.mime_type = mime_type - self.offset = offset - self.root = root - self.state = state - self.prefix = 'images' if state == 'images' else 'css' if state == 'css' else '' - self.prefix = self.prefix + os.sep if self.prefix else '' - self.path = self.prefix + self.original - - def __eq__(self, other): - if hasattr(other, 'internal'): - return self.internal == other.internal - return self.internal == other - - def __repr__(self): - return self.internal + u'->' + self.path - -class LitFile(object): - - PIECE_SIZE = 16 - - @apply - def magic(): - def fget(self): - opos = self._stream.tell() - self._stream.seek(0) - val = self._stream.read(8) - self._stream.seek(opos) - return val - return property(fget=fget) - - @apply - def version(): - def fget(self): - opos = self._stream.tell() - self._stream.seek(8) - val = u32(self._stream.read(4)) - self._stream.seek(opos) - return val - return property(fget=fget) - - @apply - def hdr_len(): - def fget(self): - opos = self._stream.tell() - self._stream.seek(12) - val = int32(self._stream.read(4)) - self._stream.seek(opos) - return val - return property(fget=fget) - - @apply - def num_pieces(): - def fget(self): - opos = self._stream.tell() - self._stream.seek(16) - val = int32(self._stream.read(4)) - self._stream.seek(opos) - return val - return property(fget=fget) - - @apply - def sec_hdr_len(): - def fget(self): - opos = self._stream.tell() - self._stream.seek(20) - val = int32(self._stream.read(4)) - self._stream.seek(opos) - return val - return property(fget=fget) - - @apply - def guid(): - def fget(self): - opos = self._stream.tell() - self._stream.seek(24) - val = self._stream.read(16) - self._stream.seek(opos) - return val - return property(fget=fget) - - @apply - def header(): - def fget(self): - opos = self._stream.tell() - size = self.hdr_len + self.num_pieces*self.PIECE_SIZE + self.sec_hdr_len - self._stream.seek(0) - val = self._stream.read(size) - self._stream.seek(opos) - return val - return property(fget=fget) - - def __init__(self, stream): - self._stream = stream - if self.magic != 'ITOLITLS': - raise LitReadError('Not a valid LIT file') - if self.version != 1: - raise LitReadError('Unknown LIT version %d'%(self.version,)) - self.read_secondary_header() - self.read_header_pieces() - - - def read_secondary_header(self): - opos = self._stream.tell() - try: - self._stream.seek(self.hdr_len + self.num_pieces*self.PIECE_SIZE) - bytes = self._stream.read(self.sec_hdr_len) - offset = int32(bytes[4:]) - - while offset < len(bytes): - blocktype = bytes[offset:offset+4] - blockver = u32(bytes[offset+4:]) - - if blocktype == 'CAOL': - if blockver != 2: - raise LitReadError('Unknown CAOL block format %d'%(blockver,)) - self.creator_id = u32(bytes[offset+12:]) - self.entry_chunklen = u32(bytes[offset+20:]) - self.count_chunklen = u32(bytes[offset+24:]) - self.entry_unknown = u32(bytes[offset+28:]) - self.count_unknown = u32(bytes[offset+32:]) - offset += 48 - elif blocktype == 'ITSF': - if blockver != 4: - raise LitReadError('Unknown ITSF block format %d'%(blockver,)) - if u32(bytes[offset+4+16:]): - raise LitReadError('This file has a 64bit content offset') - self.content_offset = u32(bytes[offset+16:]) - self.timestamp = u32(bytes[offset+24:]) - self.language_id = u32(bytes[offset+28:]) - offset += 48 - - if not hasattr(self, 'content_offset'): - raise LitReadError('Could not figure out the content offset') - finally: - self._stream.seek(opos) - - def read_header_pieces(self): - opos = self._stream.tell() - try: - src = self.header[self.hdr_len:] - for i in range(self.num_pieces): - piece = src[i*self.PIECE_SIZE:(i+1)*self.PIECE_SIZE] - if u32(piece[4:]) != 0 or u32(piece[12:]) != 0: - raise LitReadError('Piece %s has 64bit value'%(repr(piece),)) - offset, size = u32(piece), int32(piece[8:]) - self._stream.seek(offset) - piece = self._stream.read(size) - if i == 0: - continue # Dont need this piece - elif i == 1: - if u32(piece[8:]) != self.entry_chunklen or \ - u32(piece[12:]) != self.entry_unknown: - raise LitReadError('Secondary header does not match piece') - self.read_directory(piece) - elif i == 2: - if u32(piece[8:]) != self.count_chunklen or \ - u32(piece[12:]) != self.count_unknown: - raise LitReadError('Secondary header does not match piece') - continue # No data needed from this piece - elif i == 3: - self.piece3_guid = piece - elif i == 4: - self.piece4_guid = piece - finally: - self._stream.seek(opos) - - def read_directory(self, piece): - self.entries = [] - if not piece.startswith('IFCM'): - raise LitReadError('Header piece #1 is not main directory.') - chunk_size, num_chunks = int32(piece[8:12]), int32(piece[24:28]) - - if 32 + chunk_size*num_chunks != len(piece): - raise LitReadError('IFCM HEADER has incorrect length') - - for chunk in range(num_chunks): - p = 32 + chunk*chunk_size - if piece[p:p+4] != 'AOLL': - continue - remaining = chunk_size - int32(piece[p+4:p+8]) - 48 - if remaining < 0: - raise LitReadError('AOLL remaining count is negative') - - entries = u16(piece[p+chunk_size-2:]) - - if entries <= 0: # Hopefully everything will work even without a correct entries count - entries = (2**16)-1 - - piece = piece[p+48:] - i = 0 - while i < entries: - if remaining <= 0: break - namelen, piece, remaining = encint(piece, remaining) - if namelen != (namelen & 0x7fffffff): - raise LitReadError('Directory entry had 64bit name length.') - if namelen > remaining - 3: - raise LitReadError('Read past end of directory chunk') - name = piece[:namelen] - piece = piece[namelen:] - section, piece, remaining = encint(piece, remaining) - offset, piece, remaining = encint(piece, remaining) - size, piece, remaining = encint(piece, remaining) - - entry = DirectoryEntry(name, section, offset, size) - - if name == '::DataSpace/NameList': - self.read_section_names(entry) - elif name == '/manifest': - self.read_manifest(entry) - elif name == '/meta': - self.read_meta(entry) - self.entries.append(entry) - i += 1 - - if not hasattr(self, 'sections'): - raise LitReadError('Lit file does not have a valid NameList') - - if not hasattr(self, 'manifest'): - raise LitReadError('Lit file does not have a valid manifest') - - def read_section_names(self, entry): - opos = self._stream.tell() - try: - self._stream.seek(self.content_offset + entry.offset) - raw = self._stream.read(entry.size) - if len(raw) < 4: - raise LitReadError('Invalid Namelist section') - pos = 4 - self.num_sections = u16(raw[2:pos]) - - self.sections = {} - for section in range(self.num_sections): - size = u16(raw[pos:pos+2]) - pos += 2 - size = size*2 + 2 - if pos + size > len(raw): - raise LitReadError('Invalid Namelist section') - self.sections[section] = raw[pos:pos+size].decode('utf-16-le') - pos += size - finally: - self._stream.seek(opos) - - def read_manifest(self, entry): - opos = self._stream.tell() - try: - self.manifest = [] - self._stream.seek(self.content_offset + entry.offset) - raw = self._stream.read(entry.size) - pos = 0 - while pos < len(raw): - size = ord(raw[pos]) - if size == 0: break - pos += 1 - root = raw[pos:pos+size].decode('utf8') - pos += size - if pos >= len(raw): - raise LitReadError('Truncated manifest.') - for state in ['spine', 'not spine', 'css', 'images']: - num_files = int32(raw[pos:pos+4]) - pos += 4 - if num_files == 0: continue - - i = 0 - while i < num_files: - if pos+5 >= len(raw): - raise LitReadError('Truncated manifest.') - offset = u32(raw[pos:pos+4]) - pos += 4 - - slen = ord(raw[pos]) - pos += 1 - internal = raw[pos:pos+slen].decode('utf8') - pos += slen - - slen = ord(raw[pos]) - pos += 1 - original = raw[pos:pos+slen].decode('utf8') - pos += slen - - slen = ord(raw[pos]) - pos += 1 - mime_type = raw[pos:pos+slen].decode('utf8') - pos += slen +1 - - self.manifest.append(ManifestItem(original, internal, mime_type, offset, root, state)) - i += 1 - finally: - self._stream.seek(opos) - - def read_meta(self, entry): - opos = self._stream.tell() - try: - self._stream.seek(self.content_offset + entry.offset) - raw = self._stream.read(entry.size) - - xml = \ -'''\ - - -'''+\ - unicode(UnBinary(raw, self.manifest)) - self.meta = xml - finally: - self._stream.seek(opos) - - def read_image(self, internal_name): - cover_entry = None - for entry in self.entries: - if internal_name in entry.name: - cover_entry = entry - break - opos = self._stream.tell() - try: - self._stream.seek(self.content_offset + cover_entry.offset) - return self._stream.read(cover_entry.size) - finally: - self._stream.seek(opos) +from calibre.ebooks.lit.reader import LitReader def get_metadata(stream): try: - litfile = LitFile(stream) + litfile = LitReader(stream) src = litfile.meta.encode('utf-8') mi = OPFReader(cStringIO.StringIO(src), dir=os.getcwd()) cover_url, cover_item = mi.cover, None if cover_url: cover_url = relpath(cover_url, os.getcwd()) - for item in litfile.manifest: + for item in litfile.manifest.values(): if item.path == cover_url: cover_item = item.internal if cover_item is not None: @@ -737,26 +28,28 @@ def get_metadata(stream): ext = 'jpg' else: ext = ext.lower() - cd = litfile.read_image(cover_item) - mi.cover_data = (ext, cd) if cd else (None, None) + cd = litfile.get_file('/data/' + cover_item) + mi.cover_data = (ext, cd) if cd else (None, None) except: title = stream.name if hasattr(stream, 'name') and stream.name else 'Unknown' mi = MetaInformation(title, ['Unknown']) return mi - - def main(args=sys.argv): if len(args) != 2: - print >>sys.stderr, _('Usage: %s file.lit')%(args[0],) + print >>sys.stderr, _('Usage: %s file.lit') % args[0] return 1 - mi = get_metadata(open(args[1], 'rb')) + fname = args[1] + mi = get_metadata(open(fname, 'rb')) print unicode(mi) if mi.cover_data[1]: - cover = os.path.abspath(os.path.splitext(os.path.basename(args[1]))[0] + '.' + mi.cover_data[0]) + cover = os.path.abspath( + '.'.join((os.path.splitext(os.path.basename(fname))[0], + mi.cover_data[0]))) open(cover, 'wb').write(mi.cover_data[1]) print _('Cover saved to'), cover return 0 if __name__ == '__main__': - sys.exit(main()) \ No newline at end of file + sys.exit(main()) + From 63b6550e21ca0e3b2d3ff5afeba479c0c3bec147 Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Sun, 20 Jul 2008 01:02:14 -0400 Subject: [PATCH 25/44] Fix for LIT files with '..' in filename paths --- src/calibre/ebooks/lit/reader.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py index 65fce4f3e9..2a862141d1 100644 --- a/src/calibre/ebooks/lit/reader.py +++ b/src/calibre/ebooks/lit/reader.py @@ -331,7 +331,15 @@ class ManifestItem(object): self.offset = offset self.root = root self.state = state - self.path = self.original + # Some paths in Fictionwise "multiformat" LIT files contain '..' (!?) + nodes = original.split('/') + path = [] + for node in nodes: + if node == '..': + if path: path.pop() + continue + path.append(node) + self.path = os.path.join(*path) def __eq__(self, other): if hasattr(other, 'internal'): From 56b5b0e26c1505e16bccabeb513dc3d7f9c69241 Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Sun, 20 Jul 2008 01:08:36 -0400 Subject: [PATCH 26/44] Fix a few lines which flow beyond 80 columns --- src/calibre/ebooks/lit/reader.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py index 2a862141d1..8cef0fdd18 100644 --- a/src/calibre/ebooks/lit/reader.py +++ b/src/calibre/ebooks/lit/reader.py @@ -122,7 +122,8 @@ class UnBinary(object): break if not escape: continue - self.raw = self.raw[:pos+offset] + '&' + self.raw[pos+offset+1:] + self.raw = '&'.join( + (self.raw[:pos+offset], self.raw[pos+offset+1:])) offset += 4 def item_path(self, internal_id): @@ -203,7 +204,8 @@ class UnBinary(object): is_goingdown = False if not tag_name: raise LitError('Tag ends before it begins.') - self.buf.write(u''.join(('')).encode('utf-8')) + self.buf.write(u''.join( + ('')).encode('utf-8')) dynamic_tag = 0 tag_name = None state = 'text' @@ -606,7 +608,8 @@ class LitReader(object): except LitError: if 'PENGUIN group' not in raw: raise print "WARNING: attempting PENGUIN malformed OPF fix" - raw = raw.replace('PENGUIN group', '\x00\x01\x18\x00PENGUIN group', 1) + raw = raw.replace( + 'PENGUIN group', '\x00\x01\x18\x00PENGUIN group', 1) xml = OPF_DECL + unicode(UnBinary(raw, self.manifest, OPF_MAP)) self.meta = xml @@ -735,7 +738,8 @@ class LitReader(object): raise("Reset table entry out of bounds") if bytes_remaining >= window_bytes: lzx.reset() - result.append(lzx.decompress(content[base:size], window_bytes)) + result.append( + lzx.decompress(content[base:size], window_bytes)) bytes_remaining -= window_bytes base = size accum += int32(reset_table[RESET_INTERVAL:]) @@ -778,10 +782,12 @@ class LitReader(object): def option_parser(): from calibre import OptionParser parser = OptionParser(usage=_('%prog [options] LITFILE')) - parser.add_option('-o', '--output-dir', default='.', - help=_('Output directory. Defaults to current directory.')) - parser.add_option('--verbose', default=False, action='store_true', - help='Useful for debugging.') + parser.add_option( + '-o', '--output-dir', default='.', + help=_('Output directory. Defaults to current directory.')) + parser.add_option( + '--verbose', default=False, action='store_true', + help='Useful for debugging.') return parser def main(args=sys.argv): From 0d2c447846bb87ea5cbaf9fe06329cc4a7b4ff5e Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Sun, 20 Jul 2008 08:51:38 -0400 Subject: [PATCH 27/44] Switch '..'-removal logic to code more likely to play nice on Windows --- src/calibre/ebooks/lit/reader.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py index 8cef0fdd18..583c621a55 100644 --- a/src/calibre/ebooks/lit/reader.py +++ b/src/calibre/ebooks/lit/reader.py @@ -334,14 +334,9 @@ class ManifestItem(object): self.root = root self.state = state # Some paths in Fictionwise "multiformat" LIT files contain '..' (!?) - nodes = original.split('/') - path = [] - for node in nodes: - if node == '..': - if path: path.pop() - continue - path.append(node) - self.path = os.path.join(*path) + path = os.path.normpath(original).replace('\\', '/') + while path.startswith('../'): path = path[3:] + self.path = path def __eq__(self, other): if hasattr(other, 'internal'): From 6ece3eb10bf2f37db6ff288b817ce86786ae5591 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 20 Jul 2008 11:12:20 -0700 Subject: [PATCH 28/44] Fix typo in IPC socket cleanup code. --- src/calibre/parallel.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/parallel.py b/src/calibre/parallel.py index f794626441..d46f61c8fd 100644 --- a/src/calibre/parallel.py +++ b/src/calibre/parallel.py @@ -18,7 +18,7 @@ Once a job is handed off to the worker, the protocol enters the second mode, whe the controller listens for messages from the worker. The worker can send progress updates as well as console output (i.e. text that would normally have been written to stdout or stderr by the job). Once the job completes (or raises an exception) the worker -returns the result (or exception) to the controller adnt he protocol reverts to the first mode. +returns the result (or exception) to the controller and the protocol reverts to the first mode. In the second mode, the controller can also send the worker STOP messages, in which case the worker interrupts the job and dies. The sending of progress and console output messages @@ -497,7 +497,7 @@ class Result(object): def remove_ipc_socket(path): os = __import__('os') if os.path.exists(path): - os.path.unlink(path) + os.unlink(path) class Server(Thread): From d78296e8bd45ef6bad0ec88535a395adca2d4034 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 20 Jul 2008 15:17:11 -0700 Subject: [PATCH 29/44] Disable forking of GUI in linux. --- src/calibre/gui2/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/gui2/main.py b/src/calibre/gui2/main.py index e73ada9cb8..d85acb9dcb 100644 --- a/src/calibre/gui2/main.py +++ b/src/calibre/gui2/main.py @@ -1223,7 +1223,7 @@ class Main(MainWindow, Ui_MainWindow): def main(args=sys.argv): from calibre import singleinstance - pid = os.fork() if islinux else -1 + pid = os.fork() if False and islinux else -1 if pid <= 0: parser = option_parser('''\ %prog [opts] [path_to_ebook] From 55f5523699c985ccaf522a1bfb9eaf27cbddf7c3 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 20 Jul 2008 15:18:32 -0700 Subject: [PATCH 30/44] Implement #890 (Improvement to cover view) --- src/calibre/gui2/cover_flow.py | 5 +++-- src/calibre/gui2/dialogs/config.py | 3 ++- src/calibre/gui2/dialogs/config.ui | 17 +++++++++++++++++ .../gui2/pictureflow/PyQt/pictureflow.sip | 2 +- src/calibre/gui2/pictureflow/pictureflow.cpp | 14 ++++++++------ src/calibre/gui2/pictureflow/pictureflow.h | 2 +- 6 files changed, 32 insertions(+), 11 deletions(-) diff --git a/src/calibre/gui2/cover_flow.py b/src/calibre/gui2/cover_flow.py index 7a6fe0a891..365d41ec50 100644 --- a/src/calibre/gui2/cover_flow.py +++ b/src/calibre/gui2/cover_flow.py @@ -12,7 +12,7 @@ import sys, os from PyQt4.QtGui import QImage, QSizePolicy from PyQt4.QtCore import Qt, QSize, SIGNAL, QObject -from calibre import pictureflow +from calibre import pictureflow, Settings if pictureflow is not None: class EmptyImageList(pictureflow.FlowImages): @@ -68,7 +68,8 @@ if pictureflow is not None: class CoverFlow(pictureflow.PictureFlow): def __init__(self, height=300, parent=None): - pictureflow.PictureFlow.__init__(self, parent) + pictureflow.PictureFlow.__init__(self, parent, + Settings().get('cover flow queue length', 6)+1) self.setSlideSize(QSize(int(2/3. * height), height)) self.setMinimumSize(QSize(int(2.35*0.67*height), (5/3.)*height+25)) self.setFocusPolicy(Qt.WheelFocus) diff --git a/src/calibre/gui2/dialogs/config.py b/src/calibre/gui2/dialogs/config.py index 89a90f04f0..6c6caddda9 100644 --- a/src/calibre/gui2/dialogs/config.py +++ b/src/calibre/gui2/dialogs/config.py @@ -66,6 +66,7 @@ class ConfigDialog(QDialog, Ui_Dialog): single_format = settings.get('save to disk single format', 'lrf') self.single_format.setCurrentIndex(BOOK_EXTENSIONS.index(single_format)) + self.cover_browse.setValue(settings.get('cover flow queue length', 6)) def compact(self, toggled): d = Vacuum(self, self.db) @@ -98,7 +99,7 @@ class ConfigDialog(QDialog, Ui_Dialog): pattern = self.filename_pattern.commit() settings.set('filename pattern', pattern) settings.set('save to disk single format', BOOK_EXTENSIONS[self.single_format.currentIndex()]) - + settings.set('cover flow queue length', self.cover_browse.value()) if not path or not os.path.exists(path) or not os.path.isdir(path): d = error_dialog(self, _('Invalid database location'), _('Invalid database location ')+path+_('
Must be a directory.')) diff --git a/src/calibre/gui2/dialogs/config.ui b/src/calibre/gui2/dialogs/config.ui index 1599520aca..70d223446f 100644 --- a/src/calibre/gui2/dialogs/config.ui +++ b/src/calibre/gui2/dialogs/config.ui @@ -132,6 +132,23 @@ + + + + + + &Number of covers to show in browse mode (after restart): + + + cover_browse + + + + + + + + diff --git a/src/calibre/gui2/pictureflow/PyQt/pictureflow.sip b/src/calibre/gui2/pictureflow/PyQt/pictureflow.sip index d0d3af27ea..4dc7e059d5 100644 --- a/src/calibre/gui2/pictureflow/PyQt/pictureflow.sip +++ b/src/calibre/gui2/pictureflow/PyQt/pictureflow.sip @@ -28,7 +28,7 @@ class PictureFlow : QWidget { public : - PictureFlow(QWidget *parent /TransferThis/ = 0); + PictureFlow(QWidget *parent /TransferThis/ = 0, int queueLength = 3); void setImages(FlowImages *images); diff --git a/src/calibre/gui2/pictureflow/pictureflow.cpp b/src/calibre/gui2/pictureflow/pictureflow.cpp index 9a7490d0d6..a6b28146e9 100644 --- a/src/calibre/gui2/pictureflow/pictureflow.cpp +++ b/src/calibre/gui2/pictureflow/pictureflow.cpp @@ -316,7 +316,7 @@ struct SlideInfo class PictureFlowPrivate { public: - PictureFlowPrivate(PictureFlow* widget); + PictureFlowPrivate(PictureFlow* widget, int queueLength); int slideCount() const; void setSlideCount(int count); @@ -368,6 +368,7 @@ private: int slideWidth; int slideHeight; int zoom; + int queueLength; int centerIndex; SlideInfo centerSlide; @@ -396,7 +397,7 @@ private: void resetSlides(); }; -PictureFlowPrivate::PictureFlowPrivate(PictureFlow* w) +PictureFlowPrivate::PictureFlowPrivate(PictureFlow* w, int queueLength_) { widget = w; slideImages = new FlowImages(); @@ -406,6 +407,7 @@ PictureFlowPrivate::PictureFlowPrivate(PictureFlow* w) zoom = 100; centerIndex = 0; + queueLength = queueLength_; slideFrame = 0; step = 0; @@ -553,7 +555,7 @@ void PictureFlowPrivate::resetSlides() centerSlide.slideIndex = centerIndex; leftSlides.clear(); - leftSlides.resize(3); + leftSlides.resize(queueLength); for(int i = 0; i < leftSlides.count(); i++) { SlideInfo& si = leftSlides[i]; @@ -565,7 +567,7 @@ void PictureFlowPrivate::resetSlides() } rightSlides.clear(); - rightSlides.resize(3); + rightSlides.resize(queueLength); for(int i = 0; i < rightSlides.count(); i++) { SlideInfo& si = rightSlides[i]; @@ -1104,9 +1106,9 @@ void PictureFlowPrivate::clearSurfaceCache() // ----------------------------------------- -PictureFlow::PictureFlow(QWidget* parent): QWidget(parent) +PictureFlow::PictureFlow(QWidget* parent, int queueLength): QWidget(parent) { - d = new PictureFlowPrivate(this); + d = new PictureFlowPrivate(this, queueLength); setAttribute(Qt::WA_StaticContents, true); setAttribute(Qt::WA_OpaquePaintEvent, true); diff --git a/src/calibre/gui2/pictureflow/pictureflow.h b/src/calibre/gui2/pictureflow/pictureflow.h index c1042e4adb..7431dee634 100644 --- a/src/calibre/gui2/pictureflow/pictureflow.h +++ b/src/calibre/gui2/pictureflow/pictureflow.h @@ -98,7 +98,7 @@ public: /*! Creates a new PictureFlow widget. */ - PictureFlow(QWidget* parent = 0); + PictureFlow(QWidget* parent = 0, int queueLength = 3); /*! Destroys the widget. From b06d484a099bfb8d0a6449c4bd95f14abde0200c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 20 Jul 2008 15:42:38 -0700 Subject: [PATCH 31/44] IGN:Remove old windows build script --- windows_installer.py | 586 ------------------------------------------- 1 file changed, 586 deletions(-) delete mode 100644 windows_installer.py diff --git a/windows_installer.py b/windows_installer.py deleted file mode 100644 index f21a842fbf..0000000000 --- a/windows_installer.py +++ /dev/null @@ -1,586 +0,0 @@ -__license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal ' -''' Create a windows installer ''' -import sys, re, os, shutil, subprocess, zipfile -from setup import VERSION, APPNAME, entry_points, scripts, basenames -from distutils.core import setup -from distutils.filelist import FileList -import py2exe, glob -from py2exe.build_exe import py2exe as build_exe -from calibre import __version__ as VERSION -from calibre import __appname__ as APPNAME - -PY2EXE_DIR = os.path.join('build','py2exe') -if os.path.exists(PY2EXE_DIR): - shutil.rmtree(PY2EXE_DIR) - - -class NSISInstaller(object): - TEMPLATE = r''' -; Do a Cyclic Redundancy Check to make sure the installer -; was not corrupted by the download. -CRCCheck on - -SetCompressor lzma -ShowInstDetails show -ShowUnInstDetails show - -;------------------------------------------------------------------------------------------------------ -;Include Modern UI - !include "MUI2.nsh" - !include "WinMessages.nsh" - -;------------------------------------------------------------------------------------------------------ -;Variables -Var STARTMENU_FOLDER -Var MUI_TEMP - -!define PRODUCT_NAME "%(name)s" -BrandingText "${PRODUCT_NAME} created by Kovid Goyal" -!define PRODUCT_VERSION "%(version)s" -!define WEBSITE "https://calibre.kovidgoyal.net" -!define DEVCON "C:\devcon\i386\devcon.exe" -!define PY2EXE_DIR "%(py2exe_dir)s" -!define LIBUSB_DIR "C:\libusb" -!define LIBUNRAR_DIR "C:\Program Files\UnrarDLL" -!define CLIT "C:\clit\clit.exe" -!define PDFTOHTML "C:\pdftohtml\pdftohtml.exe" -!define IMAGEMAGICK "C:\ImageMagick" -!DEFINE FONTCONFIG "C:\fontconfig" - - -; ---------------PATH manipulation ----------------------------------------------------------------- -; Registry key for changing the environment variables for all users on both XP and Vista -!define WriteEnvStr_RegKey 'HKLM "SYSTEM\CurrentControlSet\Control\Session Manager\Environment"' - -Function Trim ; Added by Pelaca - Exch $R1 - Push $R2 -Loop: - StrCpy $R2 "$R1" 1 -1 - StrCmp "$R2" " " RTrim - StrCmp "$R2" "$\n" RTrim - StrCmp "$R2" "$\r" RTrim - StrCmp "$R2" ";" RTrim - GoTo Done -RTrim: - StrCpy $R1 "$R1" -1 - Goto Loop -Done: - Pop $R2 - Exch $R1 -FunctionEnd - -; input, top of stack = string to search for -; top of stack-1 = string to search in -; output, top of stack (replaces with the portion of the string remaining) -; modifies no other variables. -; -; Usage: -; Push "this is a long ass string" -; Push "ass" -; Call StrStr -; Pop $R0 -; ($R0 at this point is "ass string") - -!macro StrStr un -Function ${un}StrStr -Exch $R1 ; st=haystack,old$R1, $R1=needle - Exch ; st=old$R1,haystack - Exch $R2 ; st=old$R1,old$R2, $R2=haystack - Push $R3 - Push $R4 - Push $R5 - StrLen $R3 $R1 - StrCpy $R4 0 - ; $R1=needle - ; $R2=haystack - ; $R3=len(needle) - ; $R4=cnt - ; $R5=tmp - loop: - StrCpy $R5 $R2 $R3 $R4 - StrCmp $R5 $R1 done - StrCmp $R5 "" done - IntOp $R4 $R4 + 1 - Goto loop -done: - StrCpy $R1 $R2 "" $R4 - Pop $R5 - Pop $R4 - Pop $R3 - Pop $R2 - Exch $R1 -FunctionEnd -!macroend -!insertmacro StrStr "" -!insertmacro StrStr "un." - -Function AddToPath - Exch $0 - Push $1 - Push $2 - Push $3 - ; don't add if the path doesn't exist - IfFileExists "$0\*.*" "" AddToPath_done - - ReadEnvStr $1 PATH - Push "$1;" - Push "$0;" - Call StrStr - Pop $2 - StrCmp $2 "" "" AddToPath_done - Push "$1;" - Push "$0\;" - Call StrStr - Pop $2 - StrCmp $2 "" "" AddToPath_done - GetFullPathName /SHORT $3 $0 - Push "$1;" - Push "$3;" - Call StrStr - Pop $2 - StrCmp $2 "" "" AddToPath_done - Push "$1;" - Push "$3\;" - Call StrStr - Pop $2 - StrCmp $2 "" "" AddToPath_done - - ReadRegStr $1 ${WriteEnvStr_RegKey} "PATH" - StrCmp $1 "" AddToPath_NTdoIt - Push $1 - Call Trim - Pop $1 - StrCpy $0 "$1;$0" - AddToPath_NTdoIt: - WriteRegExpandStr ${WriteEnvStr_RegKey} "PATH" $0 - SendMessage ${HWND_BROADCAST} ${WM_WININICHANGE} 0 "STR:Environment" /TIMEOUT=5000 - - AddToPath_done: - Pop $3 - Pop $2 - Pop $1 - Pop $0 -FunctionEnd - -Function un.RemoveFromPath - Exch $0 - Push $1 - Push $2 - Push $3 - Push $4 - Push $5 - Push $6 - - IntFmt $6 "%%c" 26 # DOS EOF - - ReadRegStr $1 ${WriteEnvStr_RegKey} "PATH" - StrCpy $5 $1 1 -1 # copy last char - StrCmp $5 ";" +2 # if last char != ; - StrCpy $1 "$1;" # append ; - Push $1 - Push "$0;" - Call un.StrStr ; Find `$0;` in $1 - Pop $2 ; pos of our dir - StrCmp $2 "" unRemoveFromPath_done - ; else, it is in path - # $0 - path to add - # $1 - path var - StrLen $3 "$0;" - StrLen $4 $2 - StrCpy $5 $1 -$4 # $5 is now the part before the path to remove - StrCpy $6 $2 "" $3 # $6 is now the part after the path to remove - StrCpy $3 $5$6 - - StrCpy $5 $3 1 -1 # copy last char - StrCmp $5 ";" 0 +2 # if last char == ; - StrCpy $3 $3 -1 # remove last char - - WriteRegExpandStr ${WriteEnvStr_RegKey} "PATH" $3 - SendMessage ${HWND_BROADCAST} ${WM_WININICHANGE} 0 "STR:Environment" /TIMEOUT=5000 - - unRemoveFromPath_done: - Pop $6 - Pop $5 - Pop $4 - Pop $3 - Pop $2 - Pop $1 - Pop $0 -FunctionEnd - -;------------------------------------------------------------------------------------------------------ -;General - - ;Name and file - Name "${PRODUCT_NAME}" - OutFile "%(outpath)s\${PRODUCT_NAME}-${PRODUCT_VERSION}.exe" - - ;Default installation folder - InstallDir "$PROGRAMFILES\${PRODUCT_NAME}" - - ;Get installation folder from registry if available - InstallDirRegKey HKCU "Software\${PRODUCT_NAME}" "" - - ;Vista redirects $SMPROGRAMS to all users without this - RequestExecutionLevel admin - -;------------------------------------------------------------------------------------------------------ -;Interface Settings - - !define MUI_HEADERIMAGE - !define MUI_HEADERIMAGE_BITMAP "icons\library.ico" - !define MUI_ABORTWARNING - -;------------------------------------------------------------------------------------------------------ -;Pages - - !insertmacro MUI_PAGE_WELCOME - !insertmacro MUI_PAGE_LICENSE "${PY2EXE_DIR}\LICENSE" - !insertmacro MUI_PAGE_COMPONENTS - !insertmacro MUI_PAGE_DIRECTORY - ;Start Menu Folder Page Configuration - !define MUI_STARTMENUPAGE_REGISTRY_ROOT "HKCU" - !define MUI_STARTMENUPAGE_REGISTRY_KEY "Software\${PRODUCT_NAME}" - !define MUI_STARTMENUPAGE_REGISTRY_VALUENAME "Start Menu Folder" - - !insertmacro MUI_PAGE_STARTMENU Application $STARTMENU_FOLDER - !insertmacro MUI_PAGE_INSTFILES - - ; Finish page with option to run program - ; Disabled as GUI requires PATH and working directory to be set correctly - ;!define MUI_FINISHPAGE_RUN "$INSTDIR\${PRODUCT_NAME}.exe" - ;!define MUI_FINISHPAGE_NOAUTOCLOSE - ;!insertmacro MUI_PAGE_FINISH - - !insertmacro MUI_UNPAGE_CONFIRM - !insertmacro MUI_UNPAGE_INSTFILES - !insertmacro MUI_UNPAGE_FINISH -;------------------------------------------------------------------------------------------------------ -;Languages - - !insertmacro MUI_LANGUAGE "English" -;------------------------------------------------------------------------------------------------------ -;Installer Sections - -Function .onInit - ; Prevent multiple instances of the installer from running - System::Call 'kernel32::CreateMutexA(i 0, i 0, t "${PRODUCT_NAME}-setup") i .r1 ?e' - Pop $R0 - - StrCmp $R0 0 +3 - MessageBox MB_OK|MB_ICONEXCLAMATION "The installer is already running." - Abort - -FunctionEnd - - -Section "Main" "secmain" - - SetOutPath "$INSTDIR" - - ;ADD YOUR OWN FILES HERE... - File /r "${PY2EXE_DIR}\*" - File "${CLIT}" - File "${PDFTOHTML}" - File /r "${FONTCONFIG}\*" - - SetOutPath "$INSTDIR\ImageMagick" - File /r "${IMAGEMAGICK}\*" - - - SetOutPath "$SYSDIR" - File "${LIBUNRAR_DIR}\unrar.dll" - DetailPrint " " - - ;Store installation folder - WriteRegStr HKCU "Software\${PRODUCT_NAME}" "" $INSTDIR - - ;Create uninstaller - WriteUninstaller "$INSTDIR\Uninstall.exe" - WriteRegStr HKLM "Software\Microsoft\Windows\CurrentVersion\Uninstall\${PRODUCT_NAME}" \ - "DisplayName" "${PRODUCT_NAME} -- E-book management software" - WriteRegStr HKLM "Software\Microsoft\Windows\CurrentVersion\Uninstall\${PRODUCT_NAME}" \ - "UninstallString" "$INSTDIR\Uninstall.exe" - - SetOutPath "$INSTDIR" - !insertmacro MUI_STARTMENU_WRITE_BEGIN Application - - ;Create shortcuts - WriteIniStr "$INSTDIR\${PRODUCT_NAME}.url" "InternetShortcut" "URL" "${WEBSITE}" - CreateDirectory "$SMPROGRAMS\$STARTMENU_FOLDER" - CreateShortCut "$SMPROGRAMS\$STARTMENU_FOLDER\calibre.lnk" "$INSTDIR\${PRODUCT_NAME}.exe" - CreateShortCut "$SMPROGRAMS\$STARTMENU_FOLDER\lrfviewer.lnk" "$INSTDIR\lrfviewer.exe" - CreateShortCut "$SMPROGRAMS\$STARTMENU_FOLDER\Website.lnk" "$INSTDIR\${PRODUCT_NAME}.url" - CreateShortCut "$SMPROGRAMS\$STARTMENU_FOLDER\Uninstall.lnk" "$INSTDIR\Uninstall.exe" - CreateShortCut "$DESKTOP\${PRODUCT_NAME}.lnk" "$INSTDIR\calibre.exe" - - !insertmacro MUI_STARTMENU_WRITE_END - - ;Add the installation directory to PATH for the commandline tools - Push "$INSTDIR" - Call AddToPath - -SectionEnd - -Section /o "Device Drivers (only needed for PRS500)" "secdd" - SetOutPath "$INSTDIR\driver" - File "${LIBUSB_DIR}\*.dll" - File "${LIBUSB_DIR}\*.sys" - File "${LIBUSB_DIR}\*.cat" - File "${LIBUSB_DIR}\*.inf" - File "${LIBUSB_DIR}\testlibusb-win.exe" - File "${DEVCON}" - - SetOutPath "$SYSDIR" - File "${LIBUSB_DIR}\libusb0.dll" - File "${LIBUSB_DIR}\libusb0.sys" - ;File "${LIBUSB_DIR}\libusb0_x64.dll" - ;File "${LIBUSB_DIR}\libusb0_x64.sys" - - ; Uninstall USB drivers - DetailPrint "Uninstalling any existing device drivers" - ExecWait '"$INSTDIR\driver\devcon.exe" remove "USB\VID_054C&PID_029B"' $0 - DetailPrint "devcon returned exit code $0" - - - DetailPrint "Installing USB driver for prs500..." - ExecWait '"$INSTDIR\driver\devcon.exe" install "$INSTDIR\driver\prs500.inf" "USB\VID_054C&PID_029B"' $0 - DetailPrint "devcon returned exit code $0" - IfErrors 0 +3 - MessageBox MB_OK|MB_ICONINFORMATION|MB_TOPMOST "Failed to install USB driver. devcon exit code: $0" - Goto +2 - MessageBox MB_OK '1. If you have the SONY Connect Reader software installed: $\nGoto Add Remove Programs and uninstall the entry "Windows Driver Package - Sony Corporation (PRSUSB)". $\n$\n2. If your reader is connected to the computer, disconnect and reconnect it now.' - DetailPrint " " - - - - -SectionEnd - -;------------------------------------------------------------------------------------------------------ -;Descriptions - - ;Language strings - LangString DESC_secmain ${LANG_ENGLISH} "The GUI and command-line tools for working with ebooks." - LangString DESC_secdd ${LANG_ENGLISH} "The device drivers to talk to the Sony PRS500. You only need this if you plan to transfer books to the Sony PRS500 with ${PRODUCT_NAME}. It is not required for the PRS 505." - - ;Assign language strings to sections - !insertmacro MUI_FUNCTION_DESCRIPTION_BEGIN - !insertmacro MUI_DESCRIPTION_TEXT ${secmain} $(DESC_secmain) - !insertmacro MUI_DESCRIPTION_TEXT ${secdd} $(DESC_secdd) - !insertmacro MUI_FUNCTION_DESCRIPTION_END -;------------------------------------------------------------------------------------------------------ -;Uninstaller Section - -Section "un.DeviceDrivers" - ; Uninstall USB drivers - ExecWait '"$INSTDIR\driver\devcon.exe" remove "USB\VID_054C&PID_029B"' $0 - DetailPrint "devcon returned exit code $0" -SectionEnd - -Section "Uninstall" - - ;ADD YOUR OWN FILES HERE... - RMDir /r "$INSTDIR" - !insertmacro MUI_STARTMENU_GETFOLDER Application $MUI_TEMP - RMDir /r "$SMPROGRAMS\$MUI_TEMP" - ;Delete empty start menu parent diretories - StrCpy $MUI_TEMP "$SMPROGRAMS\$MUI_TEMP" - - startMenuDeleteLoop: - ClearErrors - RMDir $MUI_TEMP - GetFullPathName $MUI_TEMP "$MUI_TEMP\.." - - IfErrors startMenuDeleteLoopDone - - StrCmp $MUI_TEMP $SMPROGRAMS startMenuDeleteLoopDone startMenuDeleteLoop - startMenuDeleteLoopDone: - Delete "$DESKTOP\${PRODUCT_NAME}.lnk" - - DeleteRegKey /ifempty HKCU "Software\${PRODUCT_NAME}" - DeleteRegKey HKLM "Software\Microsoft\Windows\CurrentVersion\Uninstall\${PRODUCT_NAME}" - ; Remove installation directory from PATH - Push "$INSTDIR" - Call un.RemoveFromPath -SectionEnd - ''' - def __init__(self, name, py2exe_dir, output_dir): - self.installer = self.__class__.TEMPLATE % dict(name=name, py2exe_dir=py2exe_dir, - version=VERSION, - outpath=os.path.abspath(output_dir)) - - def build(self): - f = open('installer.nsi', 'w') - path = f.name - f.write(self.installer) - f.close() - try: - subprocess.check_call('"C:\Program Files\NSIS\makensis.exe" /V2 ' + path, shell=True) - except: - print path - else: - os.remove(path) - - -class BuildEXE(build_exe): - manifest_resource_id = 0 - QT_PREFIX = r'C:\\Qt\\4.4.0' - MANIFEST_TEMPLATE = ''' - - - - Ebook management application - - - - - - - - - -''' - def build_plugins(self): - cwd = os.getcwd() - dd = os.path.join(cwd, self.dist_dir) - try: - os.chdir(os.path.join('src', 'calibre', 'gui2', 'pictureflow')) - if os.path.exists('.build'): - shutil.rmtree('.build') - os.mkdir('.build') - os.chdir('.build') - subprocess.check_call(['qmake', '../pictureflow.pro']) - subprocess.check_call(['mingw32-make', '-f', 'Makefile.Release']) - shutil.copyfile('release\\pictureflow0.dll', os.path.join(dd, 'pictureflow0.dll')) - os.chdir('..\\PyQt') - if not os.path.exists('.build'): - os.mkdir('.build') - os.chdir('.build') - subprocess.check_call(['python', '..\\configure.py']) - subprocess.check_call(['mingw32-make', '-f', 'Makefile']) - shutil.copyfile('pictureflow.pyd', os.path.join(dd, 'pictureflow.pyd')) - os.chdir('..') - shutil.rmtree('.build', True) - os.chdir('..') - shutil.rmtree('.build', True) - finally: - os.chdir(cwd) - - def run(self): - if not os.path.exists(self.dist_dir): - os.makedirs(self.dist_dir) - print 'Building custom plugins...' - self.build_plugins() - build_exe.run(self) - qtsvgdll = None - for other in self.other_depends: - if 'qtsvg4.dll' in other.lower(): - qtsvgdll = other - break - shutil.copyfile('LICENSE', os.path.join(self.dist_dir, 'LICENSE')) - print - if qtsvgdll: - print 'Adding', qtsvgdll - shutil.copyfile(qtsvgdll, os.path.join(self.dist_dir, os.path.basename(qtsvgdll))) - qtxmldll = os.path.join(os.path.dirname(qtsvgdll), 'QtXml4.dll') - print 'Adding', qtxmldll - shutil.copyfile(qtxmldll, - os.path.join(self.dist_dir, os.path.basename(qtxmldll))) - print 'Adding plugins...', - qt_prefix = self.QT_PREFIX - if qtsvgdll: - qt_prefix = os.path.dirname(os.path.dirname(qtsvgdll)) - plugdir = os.path.join(qt_prefix, 'plugins') - for d in ('imageformats', 'codecs', 'iconengines'): - print d, - imfd = os.path.join(plugdir, d) - tg = os.path.join(self.dist_dir, d) - if os.path.exists(tg): - shutil.rmtree(tg) - shutil.copytree(imfd, tg) - - print - print 'Adding main scripts' - f = zipfile.ZipFile(os.path.join('build', 'py2exe', 'library.zip'), 'a', zipfile.ZIP_DEFLATED) - for i in scripts['console'] + scripts['gui']: - f.write(i, i.partition('\\')[-1]) - f.close() - - print - print 'Doing DLL redirection' # See http://msdn.microsoft.com/en-us/library/ms682600(VS.85).aspx - for f in glob.glob(os.path.join('build', 'py2exe', '*.exe')): - open(f + '.local', 'wb').write('\n') - - - print - print - print 'Building Installer' - installer = NSISInstaller(APPNAME, self.dist_dir, 'dist') - installer.build() - - @classmethod - def manifest(cls, prog): - cls.manifest_resource_id += 1 - return (24, cls.manifest_resource_id, - cls.MANIFEST_TEMPLATE % dict(prog=prog, version=VERSION+'.0')) - - - -def main(): - sys.argv[1:2] = ['py2exe'] - - console = [dict(dest_base=basenames['console'][i], script=scripts['console'][i]) - for i in range(len(scripts['console']))] - sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src')) - setup( - cmdclass = {'py2exe': BuildEXE}, - windows = [ - {'script' : scripts['gui'][0], - 'dest_base' : APPNAME, - 'icon_resources' : [(1, 'icons/library.ico')], - 'other_resources' : [BuildEXE.manifest(APPNAME)], - }, - {'script' : scripts['gui'][1], - 'dest_base' : 'lrfviewer', - 'icon_resources' : [(1, 'icons/viewer.ico')], - 'other_resources' : [BuildEXE.manifest('lrfviewer')], - }, - ], - console = console, - options = { 'py2exe' : {'compressed': 1, - 'optimize' : 2, - 'dist_dir' : PY2EXE_DIR, - 'includes' : [ - 'sip', 'pkg_resources', 'PyQt4.QtSvg', - 'mechanize', 'ClientForm', 'wmi', - 'win32file', 'pythoncom', 'rtf2xml', - 'win32process', 'win32api', 'msvcrt', - 'win32event', 'calibre.ebooks.lrf.any.*', - 'calibre.ebooks.lrf.feeds.*', - 'lxml', 'lxml._elementpath', 'genshi', - 'path', 'pydoc', 'IPython.Extensions.*', - 'calibre.web.feeds.recipes.*', - 'PyQt4.QtWebKit', 'PyQt4.QtNetwork', - ], - 'packages' : ['PIL'], - 'excludes' : ["Tkconstants", "Tkinter", "tcl", - "_imagingtk", "ImageTk", "FixTk" - ], - 'dll_excludes' : ['mswsock.dll'], - }, - }, - - ) - return 0 - -if __name__ == '__main__': - sys.exit(main()) From 0957cca3b3a22a9896c21796860d915d6784f877 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 20 Jul 2008 15:51:03 -0700 Subject: [PATCH 32/44] Fix --- src/calibre/ebooks/metadata/opf.py | 2 +- src/calibre/ebooks/metadata/toc.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/metadata/opf.py b/src/calibre/ebooks/metadata/opf.py index e8075465a7..f5eb54bb6f 100644 --- a/src/calibre/ebooks/metadata/opf.py +++ b/src/calibre/ebooks/metadata/opf.py @@ -278,7 +278,7 @@ class OPF(MetaInformation): def get_comments(self): comments = self.soup.find('dc:description') - if comments: + if comments and comments.string: return self.ENTITY_PATTERN.sub(entity_to_unicode, comments.string).strip() return None diff --git a/src/calibre/ebooks/metadata/toc.py b/src/calibre/ebooks/metadata/toc.py index dc039a7f80..0069505f79 100644 --- a/src/calibre/ebooks/metadata/toc.py +++ b/src/calibre/ebooks/metadata/toc.py @@ -89,6 +89,7 @@ class TOC(list): print 'Continuing anyway' else: path = opfreader.manifest.item(toc.lower()) + path = getattr(path, 'path', path) if path and os.access(path, os.R_OK): self.read_ncx_toc(path) return From dbc614118a632fab5114b96514ce7edfccddc000 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 20 Jul 2008 15:52:23 -0700 Subject: [PATCH 33/44] version 0.4.79 --- src/calibre/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index 2d058fa221..b52a2bcbc8 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -1,7 +1,7 @@ ''' E-book management software''' __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' -__version__ = '0.4.78' +__version__ = '0.4.79' __docformat__ = "epytext" __author__ = "Kovid Goyal " __appname__ = 'calibre' From d4cedac9727bb7ad0b8b47be0bc7d8c0887dcaea Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 20 Jul 2008 15:59:19 -0700 Subject: [PATCH 34/44] IGN:Tag release From 8af938af1347677ca0916b1276f5306082f1f53b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 21 Jul 2008 01:52:42 -0700 Subject: [PATCH 35/44] Use the system fontconfig on OSX Leopard --- src/calibre/utils/fontconfig.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/calibre/utils/fontconfig.py b/src/calibre/utils/fontconfig.py index 4275d03479..e55a4aab53 100644 --- a/src/calibre/utils/fontconfig.py +++ b/src/calibre/utils/fontconfig.py @@ -37,8 +37,11 @@ isosx = 'darwin' in sys.platform def load_library(): if isosx: - lib = os.path.join(getattr(sys, 'frameworks_dir'), 'libfontconfig.1.dylib') \ - if hasattr(sys, 'frameworks_dir') else util.find_library('fontconfig') + if os.path.exists('/usr/X11/lib/libfontconfig.1.dylib'): # The fontconfig shipped with calibre doesn't work on Leopard + lib = '/usr/X11/lib/libfontconfig.1.dylib' + else: + lib = os.path.join(getattr(sys, 'frameworks_dir'), 'libfontconfig.1.dylib') \ + if hasattr(sys, 'frameworks_dir') else util.find_library('fontconfig') return cdll.LoadLibrary(lib) elif iswindows: return cdll.LoadLibrary('libfontconfig-1') @@ -136,7 +139,7 @@ class FontScanner(Thread): def run(self): # Initialize the fontconfig library. This has to be done manually # for the OS X bundle as it may have its own private fontconfig. - if getattr(sys, 'frameworks_dir', False): + if getattr(sys, 'frameworks_dir', False) and not os.path.exists('/usr/X11/lib/libfontconfig.1.dylib'): config_dir = os.path.join(os.path.dirname(getattr(sys, 'frameworks_dir')), 'Resources', 'fonts') if isinstance(config_dir, unicode): config_dir = config_dir.encode(sys.getfilesystemencoding()) @@ -163,7 +166,7 @@ _scanner.start() def join(): _scanner.join(120) if _scanner.isAlive(): - raise RuntimeError('Scanning for system fonts seems to have hung. Try again in a little while.') + raise RuntimeError('Scanning for system fonts seems to have hung. Try again in a little while.') if _init_error is not None: raise RuntimeError(_init_error) From 961aa7c15bbf59b05e7b603b19d54b5672047f5a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 21 Jul 2008 08:58:04 -0700 Subject: [PATCH 36/44] IGN:... --- src/calibre/linux_installer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/linux_installer.py b/src/calibre/linux_installer.py index fdfae15d07..d2266720f9 100644 --- a/src/calibre/linux_installer.py +++ b/src/calibre/linux_installer.py @@ -266,7 +266,7 @@ def download_tarball(): def main(args=sys.argv): defdir = '/opt/calibre' - destdir = raw_input('Enter the installation directory for calibre [%s]: '%defdir) + destdir = raw_input('Enter the installation directory for calibre [%s]: '%defdir).strip() if not destdir: destdir = defdir if os.path.exists(destdir): From 2d512fa7f72510e1b850e7537d3c623b573de8b5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 21 Jul 2008 10:22:22 -0700 Subject: [PATCH 37/44] Fix the -c option to calibre-debug --- src/calibre/debug.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/calibre/debug.py b/src/calibre/debug.py index ab990b38a8..aecff15351 100644 --- a/src/calibre/debug.py +++ b/src/calibre/debug.py @@ -19,7 +19,7 @@ Run an embedded python interpreter. parser.add_option('--update-module', help='Update the specified module in the frozen library. '+ 'Module specifications are of the form full.name.of.module,path_to_module.py', default=None ) - parser.add_option('-c', help='Run python code.', default=None, dest='command') + parser.add_option('-c', '--command', help='Run python code.', default=None) return parser def update_zipfile(zipfile, mod, path): @@ -43,6 +43,7 @@ def main(args=sys.argv): mod, path = opts.update_module.partition(',')[0], opts.update_module.partition(',')[-1] update_module(mod, os.path.expanduser(path)) elif opts.command: + sys.argv = args[:1] exec opts.command else: from IPython.Shell import IPShellEmbed From 8d779e91567133e8b35e856949d12e0f196dd296 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 21 Jul 2008 13:26:35 -0700 Subject: [PATCH 38/44] Fix #898 (TOC text block error) --- Makefile | 8 +++- src/calibre/__init__.py | 15 ++++--- src/calibre/ebooks/lrf/html/convert_from.py | 2 + src/calibre/gui2/main.py | 8 +--- src/calibre/library/database.py | 46 --------------------- src/calibre/manual/faq.rst | 5 +-- 6 files changed, 21 insertions(+), 63 deletions(-) diff --git a/Makefile b/Makefile index 1abffcec66..399c086d95 100644 --- a/Makefile +++ b/Makefile @@ -33,11 +33,17 @@ pictureflow : cd ../PyQt && \ mkdir -p .build && \ cd .build && rm -f * && \ - python ../configure.py && make && \ + ${PYTHON} ../configure.py && make && \ cd ../../../../../.. && \ cp src/calibre/gui2/pictureflow/PyQt/.build/pictureflow.so src/calibre/plugins/ && \ rm -rf src/calibre/gui2/pictureflow/.build rm -rf src/calibre/gui2/pictureflow/PyQt/.build +lzx : + mkdir -p src/calibre/plugins && rm -f src/calibre/plugins/*pictureflow* && \ + cd src/calibre/utils/lzx && mkdir .build && cd .build && \ + ${PYTHON} ../configure.py && make && cd - && \ + cp src/calibre/utils/lzx/.build/lzx.so src/calibre/plugins/ && \ + rm -rf src/calibre/utils/lzx/.build/ pot : cd src/calibre/translations && ${PYTHON} __init__.py pot diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index b52a2bcbc8..2237024963 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -477,13 +477,16 @@ class Settings(QSettings): 'kovidgoyal.net', name) def get(self, key, default=None): - key = str(key) - if not self.contains(key): + try: + key = str(key) + if not self.contains(key): + return default + val = str(self.value(key, QVariant()).toByteArray()) + if not val: + return None + return cPickle.loads(val) + except: return default - val = str(self.value(key, QVariant()).toByteArray()) - if not val: - return None - return cPickle.loads(val) def set(self, key, val): val = cPickle.dumps(val, -1) diff --git a/src/calibre/ebooks/lrf/html/convert_from.py b/src/calibre/ebooks/lrf/html/convert_from.py index e1014d69dd..b118520b82 100644 --- a/src/calibre/ebooks/lrf/html/convert_from.py +++ b/src/calibre/ebooks/lrf/html/convert_from.py @@ -1453,6 +1453,7 @@ class HTMLConverter(object, LoggingInterface): self.page_break_found = True if self.options.add_chapters_to_toc: + self.current_block.must_append = True self.extra_toc_entries.append((self.get_text(tag, limit=1000), self.current_block)) @@ -1666,6 +1667,7 @@ class HTMLConverter(object, LoggingInterface): self.page_break_found = True if self.options.add_chapters_to_toc: + self.current_block.must_append = True self.extra_toc_entries.append((self.get_text(tag, limit=1000), self.current_block)) diff --git a/src/calibre/gui2/main.py b/src/calibre/gui2/main.py index d85acb9dcb..fa3896a4ca 100644 --- a/src/calibre/gui2/main.py +++ b/src/calibre/gui2/main.py @@ -40,7 +40,6 @@ from calibre.gui2.dialogs.search import SearchDialog from calibre.gui2.dialogs.user_profiles import UserProfiles from calibre.gui2.dialogs.choose_format import ChooseFormatDialog from calibre.gui2.dialogs.book_info import BookInfo -from calibre.library.database import DatabaseLocked from calibre.ebooks.metadata.meta import set_metadata from calibre.ebooks.metadata import MetaInformation from calibre.ebooks import BOOK_EXTENSIONS @@ -1247,12 +1246,7 @@ path_to_ebook to the database. '

%s is already running. %s

'%(__appname__, extra)) return 1 initialize_file_icon_provider() - try: - main = Main(single_instance, opts) - except DatabaseLocked, err: - QMessageBox.critical(None, 'Cannot Start '+__appname__, - '

Another program is using the database.
Perhaps %s is already running?
If not try deleting the file %s'%(__appname__, err.lock_file_path)) - return 1 + main = Main(single_instance, opts) sys.excepthook = main.unhandled_exception if len(args) > 1: main.add_filesystem_book(args[1]) diff --git a/src/calibre/library/database.py b/src/calibre/library/database.py index d3f4654969..006438746b 100644 --- a/src/calibre/library/database.py +++ b/src/calibre/library/database.py @@ -31,47 +31,9 @@ class Concatenate(object): return self.ans[:-len(self.sep)] return self.ans -_lock_file = None -class DatabaseLocked(Exception): - - def __init__(self, msg, lock_file_path): - Exception.__init__(self, msg) - self.lock_file_path = lock_file_path - -def _lock(path): - path = os.path.join(os.path.dirname(path), '.'+os.path.basename(path)+'.lock') - global _lock_file - if _lock_file is not None: - raise DatabaseLocked('Database already locked in this instance.', _lock_file.name) - try: - _lock_file = open(path, 'wb') - except IOError: - raise DatabaseLocked('Database in use by another instance', path) - try: - import fcntl, errno - try: - fcntl.lockf(_lock_file.fileno(), fcntl.LOCK_EX|fcntl.LOCK_NB) - except IOError, err: - path = _lock_file.name - _lock_file = None - if err.errno in (errno.EACCES, errno.EAGAIN): - raise DatabaseLocked('Database in use by another instance', path) - except ImportError: - try: - import msvcrt - try: - msvcrt.locking(_lock_file.fileno(), msvcrt.LK_NBLCK, 1) - except IOError: - path = _lock_file.name - _lock_file = None - raise DatabaseLocked('Database in use by another instance', path) - except ImportError: - pass - def _connect(path): if isinstance(path, unicode): path = path.encode('utf-8') - #_lock(path) conn = sqlite.connect(path, detect_types=sqlite.PARSE_DECLTYPES|sqlite.PARSE_COLNAMES) conn.row_factory = lambda cursor, row : list(row) conn.create_aggregate('concat', 1, Concatenate) @@ -794,14 +756,6 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE; conn.commit() - def __del__(self): - global _lock_file - import os - if _lock_file is not None: - _lock_file.close() - if os.path.exists(_lock_file.name): - os.unlink(_lock_file.name) - def __init__(self, dbpath, row_factory=False): self.dbpath = dbpath self.conn = _connect(dbpath) diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst index cb381cb8aa..d130bd9a1b 100644 --- a/src/calibre/manual/faq.rst +++ b/src/calibre/manual/faq.rst @@ -133,10 +133,9 @@ The graphical user interface of |app| is not starting on Windows? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If you've never used the graphical user interface before, try deleting the file library1.db (it will be somewhere under :file:`C:\\Documents and Settings` on Windows XP and :file:`C:\\Users` on Windows Vista. If that doesn't fix the problem, locate the file calibre.log (in the same places as library1.db) and post its contents in a help message on the `Forums `_. If you can't find either file, try using the windows find feature to search for them. If the files dont exist on your system, try the following: -Start a command prompt (press the windows key and R and type cmd.exe in the run dialog). At the command prompt type the command `calibre-debug` and press enter. You will se a new, green prompt. At theis prompt, type the following two lines:: +Start a command prompt (press the windows key and R and type cmd.exe in the run dialog). At the command prompt type the following command and press Enter:: - from calibre.gui2.main import main - main() + calibre-debug -c "from calibre.gui2.main import main; main()" Post any output you see when asking for help. From 555c41c48eda633c8aefbaf5876b45db679b46d3 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 21 Jul 2008 13:29:01 -0700 Subject: [PATCH 39/44] Miscellaneous bug fixes --- src/calibre/trac/bzr_commit_plugin.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/calibre/trac/bzr_commit_plugin.py b/src/calibre/trac/bzr_commit_plugin.py index 01e3bc7ab0..61d4fa1dd9 100644 --- a/src/calibre/trac/bzr_commit_plugin.py +++ b/src/calibre/trac/bzr_commit_plugin.py @@ -32,14 +32,11 @@ class cmd_commit(_cmd_commit): return url.replace('//', '//%s:%s@'%(username, password))+'/login/xmlrpc' def get_trac_summary(self, bug, url): - print 'Getting bug summary for bug #%s'%bug + print 'Getting bug summary for bug #%s'%bug, server = xmlrpclib.ServerProxy(url) - try: - attributes = server.ticket.get(int(bug))[-1] - return attributes['summary'] - except: - raise - pass + attributes = server.ticket.get(int(bug))[-1] + print attributes['summary'] + return attributes['summary'] def expand_bug(self, msg, nick, config, bug_tracker, type='trac'): From aa6cba1e8e44870330f98afaf87d3353a827cb0e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 21 Jul 2008 13:38:44 -0700 Subject: [PATCH 40/44] IGN:fixes to lit2oeb --- src/calibre/ebooks/lit/reader.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py index 583c621a55..c83342dc40 100644 --- a/src/calibre/ebooks/lit/reader.py +++ b/src/calibre/ebooks/lit/reader.py @@ -587,11 +587,12 @@ class LitReader(object): shared = mlist[0].path for item in mlist[1:]: path = item.path - while not path.startswith(shared): - shared = shared[:-1] - if shared == '': + while shared and not path.startswith(shared): + try: shared = shared[:shared.rindex("/", 0, -2) + 1] + except ValueError: shared = None + if not shared: break - else: + if shared: slen = len(shared) for item in mlist: item.path = item.path[slen:] From 318de321f133b5547cbdee54ded6c50890155c04 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 21 Jul 2008 13:40:25 -0700 Subject: [PATCH 41/44] IGN:fixes to lit2oeb --- src/calibre/ebooks/lit/reader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py index c83342dc40..2850b05a2b 100644 --- a/src/calibre/ebooks/lit/reader.py +++ b/src/calibre/ebooks/lit/reader.py @@ -757,12 +757,12 @@ class LitReader(object): opf_path = 'content.opf' opf_path = os.path.join(output_dir, opf_path) self._ensure_dir(opf_path) - with open(opf_path, 'w') as f: + with open(opf_path, 'wb') as f: f.write(self.meta.encode('utf-8')) for entry in self.manifest.values(): path = os.path.join(output_dir, entry.path) self._ensure_dir(path) - with open(path, 'w') as f: + with open(path, 'wb') as f: if 'spine' in entry.state: name = '/'.join(('/data', entry.internal, 'content')) f.write(self.get_markup_file(name).encode('utf-8')) From 349edbe472ea28d25516f4a503ef1b10110f5cda Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 21 Jul 2008 16:55:15 -0700 Subject: [PATCH 42/44] IGN:Integrate lit2oeb --- Makefile | 10 ++++----- installer/windows/freeze.py | 22 ++++++++++++++++++- osx_installer.py | 25 +++++++++++++++++++++- src/calibre/__init__.py | 36 ++++++++++++++++++-------------- src/calibre/ebooks/lit/reader.py | 11 +++------- src/calibre/gui2/cover_flow.py | 3 ++- src/calibre/gui2/main.py | 5 ++--- src/calibre/linux.py | 2 ++ src/calibre/translations/nl.po | 10 +++++---- src/calibre/utils/lzx/setup.py | 15 +++++++++++++ 10 files changed, 100 insertions(+), 39 deletions(-) create mode 100644 src/calibre/utils/lzx/setup.py diff --git a/Makefile b/Makefile index 399c086d95..e2dc7770df 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ PYTHON = python all : plugins gui2 translations resources -plugins : src/calibre/plugins pictureflow +plugins : src/calibre/plugins pictureflow lzx src/calibre/plugins: mkdir -p src/calibre/plugins @@ -39,10 +39,10 @@ pictureflow : rm -rf src/calibre/gui2/pictureflow/.build rm -rf src/calibre/gui2/pictureflow/PyQt/.build lzx : - mkdir -p src/calibre/plugins && rm -f src/calibre/plugins/*pictureflow* && \ - cd src/calibre/utils/lzx && mkdir .build && cd .build && \ - ${PYTHON} ../configure.py && make && cd - && \ - cp src/calibre/utils/lzx/.build/lzx.so src/calibre/plugins/ && \ + mkdir -p src/calibre/plugins && rm -f src/calibre/plugins/lzx.so && \ + cd src/calibre/utils/lzx && \ + ${PYTHON} setup.py build --build-base=.build && cd - && \ + cp src/calibre/utils/lzx/.build/lib*/lzx.so src/calibre/plugins/ && \ rm -rf src/calibre/utils/lzx/.build/ pot : diff --git a/installer/windows/freeze.py b/installer/windows/freeze.py index 7dc5a8bbf3..576ea5c5a9 100644 --- a/installer/windows/freeze.py +++ b/installer/windows/freeze.py @@ -50,6 +50,25 @@ class BuildEXE(py2exe.build_exe.py2exe): ''' + def build_distutil_plugins(self): + plugins = [ + ('lzx', os.path.join('utils', 'lzx')), + ] + for name, path in plugins: + print 'Building plugin', name + path = os.path.abspath(os.path.join('src', 'calibre', path)) + cwd = os.getcwd() + dd = os.path.join(cwd, self.dist_dir) + os.chdir(path) + try: + if os.path.exists('.build'): + shutil.rmtree('.build') + subprocess.check_call(('python', 'setup.py', 'build', '--build-base', '.build')) + plugin = os.path.abspath(glob.glob('.build\\lib*\\%s.pyd'%name)[0]) + shutil.copyfile(plugin, os.path.join(dd, os.path.basename(plugin))) + finally: + os.chdir(cwd) + def build_plugins(self): cwd = os.getcwd() dd = os.path.join(cwd, self.dist_dir) @@ -80,6 +99,7 @@ class BuildEXE(py2exe.build_exe.py2exe): if not os.path.exists(self.dist_dir): os.makedirs(self.dist_dir) print 'Building custom plugins...' + self.build_distutil_plugins() self.build_plugins() py2exe.build_exe.py2exe.run(self) qtsvgdll = None @@ -189,7 +209,7 @@ def main(args=sys.argv): 'calibre.ebooks.lrf.feeds.*', 'lxml', 'lxml._elementpath', 'genshi', 'path', 'pydoc', 'IPython.Extensions.*', - 'calibre.web.feeds.recipes.*', + 'calibre.web.feeds.recipes.*', 'PyQt4.QtWebKit', 'PyQt4.QtNetwork', ], 'packages' : ['PIL'], diff --git a/osx_installer.py b/osx_installer.py index c092650de6..c07702727d 100644 --- a/osx_installer.py +++ b/osx_installer.py @@ -171,6 +171,28 @@ _check_symlinks_prescript() subprocess.check_call(['/usr/bin/install_name_tool', '-change', '/Library/Frameworks/Python.framework/Versions/2.5/Python', '@executable_path/../Frameworks/Python.framework/Versions/2.5/Python', f]) + def build_distutils_plugins(self): + plugins = [ + ('lzx', os.path.join('utils', 'lzx')), + ] + files = [] + env = {'PATH':os.environ['PATH']} + for name, path in plugins: + print 'Building plugin', name + path = os.path.abspath(os.path.join('src', 'calibre', path)) + cwd = os.getcwd() + os.chdir(path) + try: + if os.path.exists('.build'): + shutil.rmtree('.build') + subprocess.check_call((sys.executable, 'setup.py', 'build', '--build-base', '.build'), + env=env) + plugin = os.path.abspath(glob.glob('.build/lib*/%s.so'%name)[0]) + files.append([plugin, os.path.basename(plugin)]) + finally: + os.chdir(cwd) + return files + def build_plugins(self): cwd = os.getcwd() qmake = '/Users/kovid/qt/bin/qmake' @@ -205,6 +227,7 @@ _check_symlinks_prescript() def run(self): + plugin_files = self.build_distutils_plugins() py2app.run(self) resource_dir = os.path.join(self.dist_dir, APPNAME + '.app', 'Contents', 'Resources') @@ -227,7 +250,7 @@ _check_symlinks_prescript() os.chmod(path, stat.S_IXUSR|stat.S_IXGRP|stat.S_IXOTH|stat.S_IREAD\ |stat.S_IWUSR|stat.S_IROTH|stat.S_IRGRP) self.add_qt_plugins() - plugin_files = self.build_plugins() + plugin_files += self.build_plugins() print print 'Adding clit' diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index 2237024963..0a11a02705 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -563,22 +563,6 @@ def strftime(fmt, t=time.localtime()): except: return unicode(result, 'utf-8', 'replace') -if islinux and not getattr(sys, 'frozen', False): - import pkg_resources - plugins = pkg_resources.resource_filename(__appname__, 'plugins') - sys.path.insert(1, plugins) - -if iswindows and hasattr(sys, 'frozen'): - sys.path.insert(1, os.path.dirname(sys.executable)) - -try: - import pictureflow - pictureflowerror = '' -except Exception, err: - pictureflow = None - pictureflowerror = str(err) - - def entity_to_unicode(match, exceptions=[], encoding='cp1252'): ''' @param match: A match object such that '&'+match.group(1)';' is the entity. @@ -621,3 +605,23 @@ if isosx: for font in fonts: exec 'from calibre.ebooks.lrf.fonts.liberation.'+font+' import font_data' open(os.path.join(fdir, font+'.ttf'), 'wb').write(font_data) + +if islinux and not getattr(sys, 'frozen', False): + import pkg_resources + plugins = pkg_resources.resource_filename(__appname__, 'plugins') + sys.path.insert(1, plugins) + +if iswindows and getattr(sys, 'frozen', False): + sys.path.insert(1, os.path.dirname(sys.executable)) + + +plugins = {} +for plugin in ['pictureflow', 'lzx']: + try: + p, err = __import__(plugin), '' + except Exception, err: + p = None + err = str(err) + plugins[plugin] = (p, err) + + diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py index 2850b05a2b..4a97571bfb 100644 --- a/src/calibre/ebooks/lit/reader.py +++ b/src/calibre/ebooks/lit/reader.py @@ -9,17 +9,12 @@ __copyright__ = '2008, Kovid Goyal ' \ import sys, struct, cStringIO, os import functools -import codecs -from itertools import repeat - -from calibre import relpath -from calibre.ebooks.metadata import MetaInformation -from calibre.ebooks.metadata.opf import OPFReader from calibre.ebooks.lit import LitError from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP import calibre.ebooks.lit.mssha1 as mssha1 import calibre.ebooks.lit.msdes as msdes -import calibre.utils.lzx as lzx +from calibre import plugins +lzx, lxzerror = plugins['lzx'] OPF_DECL = """ Date: Mon, 21 Jul 2008 20:14:15 -0400 Subject: [PATCH 43/44] Add a basic inline stylesheet to Mobipocket books --- src/calibre/ebooks/mobi/reader.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index dd44393672..c9045b4a8f 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -167,9 +167,15 @@ class MobiReader(object): self.replace_page_breaks() self.cleanup_html() - self.processed_html = re.compile('', re.IGNORECASE).sub( - '\n\n', - self.processed_html) + self.processed_html = \ + re.compile('', re.IGNORECASE).sub( + '\n' + '\n' + '\n', + self.processed_html) soup = BeautifulSoup(self.processed_html.replace('> <', '>\n<')) self.cleanup_soup(soup) @@ -214,6 +220,11 @@ class MobiReader(object): del tag['width'] except KeyError: pass + try: + styles.append('text-align: %s' % tag['align']) + del tag['align'] + except KeyError: + pass if styles: tag['style'] = '; '.join(styles) From 574831e69a4d19b076dc71be3b8d10494d0b13c7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 21 Jul 2008 18:55:15 -0700 Subject: [PATCH 44/44] Add ISBN field to calibredb list output --- src/calibre/library/cli.py | 2 +- src/calibre/library/database.py | 27 ++++++++++++++++++++++++++- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/src/calibre/library/cli.py b/src/calibre/library/cli.py index 0e972cb20b..2a3e58f99b 100644 --- a/src/calibre/library/cli.py +++ b/src/calibre/library/cli.py @@ -19,7 +19,7 @@ from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.opf import OPFCreator, OPFReader from calibre.library.database import LibraryDatabase, text_to_tokens -FIELDS = set(['title', 'authors', 'publisher', 'rating', 'timestamp', 'size', 'tags', 'comments', 'series', 'series_index', 'formats']) +FIELDS = set(['title', 'authors', 'publisher', 'rating', 'timestamp', 'size', 'tags', 'comments', 'series', 'series_index', 'formats', 'isbn']) def get_parser(usage): parser = OptionParser(usage) diff --git a/src/calibre/library/database.py b/src/calibre/library/database.py index 006438746b..d1f759e0f8 100644 --- a/src/calibre/library/database.py +++ b/src/calibre/library/database.py @@ -754,7 +754,32 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE; conn.execute('UPDATE books SET author_sort=? WHERE id=?', (aus, id)) conn.execute('pragma user_version=11') conn.commit() - + + @staticmethod + def upgrade_version11(conn): + conn.executescript( +''' +/***** Add isbn column to meta view ******/ + DROP VIEW meta; + CREATE VIEW meta AS + SELECT id, title, + (SELECT concat(name) FROM authors WHERE authors.id IN (SELECT author from books_authors_link WHERE book=books.id)) authors, + (SELECT name FROM publishers WHERE publishers.id IN (SELECT publisher from books_publishers_link WHERE book=books.id)) publisher, + (SELECT rating FROM ratings WHERE ratings.id IN (SELECT rating from books_ratings_link WHERE book=books.id)) rating, + timestamp, + (SELECT MAX(uncompressed_size) FROM data WHERE book=books.id) size, + (SELECT concat(name) FROM tags WHERE tags.id IN (SELECT tag from books_tags_link WHERE book=books.id)) tags, + (SELECT text FROM comments WHERE book=books.id) comments, + (SELECT name FROM series WHERE series.id IN (SELECT series FROM books_series_link WHERE book=books.id)) series, + series_index, + sort, + author_sort, + (SELECT concat(format) FROM data WHERE data.book=books.id) formats, + isbn + FROM books; +''') + conn.execute('pragma user_version=5') + conn.commit() def __init__(self, dbpath, row_factory=False): self.dbpath = dbpath