diff --git a/src/calibre/ebooks/lit/__init__.py b/src/calibre/ebooks/lit/__init__.py new file mode 100644 index 0000000000..412a52ab05 --- /dev/null +++ b/src/calibre/ebooks/lit/__init__.py @@ -0,0 +1,5 @@ +__license__ = 'GPL v3' +__copyright__ = '2008, Marshall T. Vandegrift ' + +class LitError(Exception): + pass diff --git a/src/calibre/ebooks/lit/lzxd.py b/src/calibre/ebooks/lit/lzxd.py new file mode 100644 index 0000000000..a09daf012b --- /dev/null +++ b/src/calibre/ebooks/lit/lzxd.py @@ -0,0 +1,138 @@ +import copy + +# some constants defined by the LZX specification +MIN_MATCH = 2 +MAX_MATCH = 257 +NUM_CHARS = 256 +BLOCKTYPE_INVALID = 0 # also blocktypes 4-7 invalid +BLOCKTYPE_VERBATIM = 1 +BLOCKTYPE_ALIGNED = 2 +BLOCKTYPE_UNCOMPRESSED = 3 +PRETREE_NUM_ELEMENTS = 20 +ALIGNED_NUM_ELEMENTS = 8 # aligned offset tree #elements +NUM_PRIMARY_LENGTHS = 7 # this one missing from spec! +NUM_SECONDARY_LENGTHS = 249 # length tree #elements + +# LZX huffman defines: tweak tablebits as desired +PRETREE_MAXSYMBOLS = LZX_PRETREE_NUM_ELEMENTS +PRETREE_TABLEBITS = 6 +MAINTREE_MAXSYMBOLS = LZX_NUM_CHARS + 50*8 +MAINTREE_TABLEBITS = 12 +LENGTH_MAXSYMBOLS = LZX_NUM_SECONDARY_LENGTHS+1 +LENGTH_TABLEBITS = 12 +ALIGNED_MAXSYMBOLS = LZX_ALIGNED_NUM_ELEMENTS +ALIGNED_TABLEBITS = 7 +LENTABLE_SAFETY = 64 # table decoding overruns are allowed + +FRAME_SIZE = 32768 # the size of a frame in LZX + + +class BitReader(object): + def __init__(self, data): + self.data, self.pos, self.nbits = \ + data + "\x00\x00\x00\x00", 0, len(data) * 8 + + def peek(self, n): + r, g = 0, 0 + while g < n: + r = (r << 8) | ord(self.data[(self.pos + g) >> 3]) + g = g + 8 - ((self.pos + g) & 7) + return (r >> (g - n)) & ((1 << n) - 1) + + def remove(self, n): + self.pos += n + return self.pos <= self.nbits + + def left(self): + return self.nbits - self.pos + + def read(self, n): + val = self.peek(n) + self.remove(n) + return val + +class LzxError(Exception): + pass + +POSITION_BASE = [0]*51 +EXTRA_BITS = [0]*51 + +def _static_init(): + j = 0 + for i in xrange(0, 51, 2): + EXTRA_BITS[i] = j + EXTRA_BITS[i + 1] = j + if i != 0 or j < 17): j += 1 + j = 0 + for i in xrange(0, 51, 1): + POSITION_BASE[i] = j + j += 1 << extra_bits[i] +_static_init() + +class LzxDecompressor(object): + def __init__(self, window_bits, reset_interval=0x7fff): + # LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb) + if window_bits < 15 or window_bits > 21: + raise LzxError("Invalid window size") + + self.window_size = 1 << window_bits + self.window_posn = 0 + self.frame_posn = 0 + self.frame = 0 + self.reset_interval = reset_interval + self.intel_filesize = 0 + self.intel_curpos = 0 + + # window bits: 15 16 17 18 19 20 21 + # position slots: 30 32 34 36 38 42 50 + self.posn_solts = 50 if window_bits == 21 \ + else 42 if window_bits == 20 else window_bits << 1 + self.intel_started = 0 + self.input_end = 0 + + # huffman code lengths + self.PRETREE_len = [0] * (PRETREE_MAXSYMBOLS + LENTABLE_SAFETY) + self.MAINTREE_len = [0] * (MAINTREE_MAXSYMBOLS + LENTABLE_SAFETY) + self.LENGTH_len = [0] * (LENGTH_MAXSYMBOLS + LENTABLE_SAFETY) + self.ALIGNED_len = [0] * (ALIGNED_MAXSYMBOLS + LENTABLE_SAFETY) + + # huffman decoding tables + self.PRETREE_table = \ + [0] * ((1 << PRETREE_TABLEBITS) + (PRETREE_MAXSYMBOLS * 2)) + self.MAINTREE_table = \ + [0] * ((1 << MAINTREE_TABLEBITS) + (MAINTREE_MAXSYMBOLS * 2)) + self.LENGTH_table = \ + [0] * ((1 << LENGTH_TABLEBITS) + (LENGTH_MAXSYMBOLS * 2)) + self.ALIGNED_table = \ + [0] * ((1 << ALIGNED_TABLEBITS) + (ALIGNED_MAXSYMBOLS * 2)) + + self.o_buf = self.i_buf = '' + + self._reset_state() + + def _reset_state(self): + self.R0 = 1 + self.R1 = 1 + self.R2 = 1 + self.header_read = 0 + self.block_remaining = 0 + self.block_type = BLOCKTYPE_INVALID + + # initialise tables to 0 (because deltas will be applied to them) + for i in xrange(MAINTREE_MAXSYMBOLS): self.MAINTREE_len[i] = 0 + for i in xrange(LENGTH_MAXSYMBOLS): self.LENGTH_len[i] = 0 + + def decompress(self, data, out_bytes): + return ''.join(self._decompress(data, out_bytes)) + + def _decompress(self, data, out_bytes): + # easy answers + if out_bytes < 0: + raise LzxError('Negative desired output bytes') + + # Initialize input and output + input = BitReader(data) + output = [] + + + diff --git a/src/calibre/ebooks/lit/maps/__init__.py b/src/calibre/ebooks/lit/maps/__init__.py new file mode 100644 index 0000000000..2abab3efe9 --- /dev/null +++ b/src/calibre/ebooks/lit/maps/__init__.py @@ -0,0 +1,2 @@ +from calibre.ebooks.lit.maps.opf import MAP as OPF_MAP +from calibre.ebooks.lit.maps.html import MAP as HTML_MAP diff --git a/src/calibre/ebooks/lit/maps/html.py b/src/calibre/ebooks/lit/maps/html.py new file mode 100644 index 0000000000..de0286c764 --- /dev/null +++ b/src/calibre/ebooks/lit/maps/html.py @@ -0,0 +1,1008 @@ +TAGS = [ + None, + None, + None, + "a", + "acronym", + "address", + "applet", + "area", + "b", + "base", + "basefont", + "bdo", + "bgsound", + "big", + "blink", + "blockquote", + "body", + "br", + "button", + "caption", + "center", + "cite", + "code", + "col", + "colgroup", + None, + None, + "dd", + "del", + "dfn", + "dir", + "div", + "dl", + "dt", + "em", + "embed", + "fieldset", + "font", + "form", + "frame", + "frameset", + None, + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "head", + "hr", + "html", + "i", + "iframe", + "img", + "input", + "ins", + "kbd", + "label", + "legend", + "li", + "link", + "tag61", + "map", + "tag63", + "tag64", + "meta", + "nextid", + "nobr", + "noembed", + "noframes", + "noscript", + "object", + "ol", + "option", + "p", + "param", + "plaintext", + "pre", + "q", + "rp", + "rt", + "ruby", + "s", + "samp", + "script", + "select", + "small", + "span", + "strike", + "strong", + "style", + "sub", + "sup", + "table", + "tbody", + "tc", + "td", + "textarea", + "tfoot", + "th", + "thead", + "title", + "tr", + "tt", + "u", + "ul", + "var", + "wbr", + None, + ] + +ATTRS0 = { + 0x8010: "tabindex", + 0x8046: "title", + 0x804b: "style", + 0x804d: "disabled", + 0x83ea: "class", + 0x83eb: "id", + 0x83fe: "datafld", + 0x83ff: "datasrc", + 0x8400: "dataformatas", + 0x87d6: "accesskey", + 0x9392: "lang", + 0x93ed: "language", + 0x93fe: "dir", + 0x9771: "onmouseover", + 0x9772: "onmouseout", + 0x9773: "onmousedown", + 0x9774: "onmouseup", + 0x9775: "onmousemove", + 0x9776: "onkeydown", + 0x9777: "onkeyup", + 0x9778: "onkeypress", + 0x9779: "onclick", + 0x977a: "ondblclick", + 0x977e: "onhelp", + 0x977f: "onfocus", + 0x9780: "onblur", + 0x9783: "onrowexit", + 0x9784: "onrowenter", + 0x9786: "onbeforeupdate", + 0x9787: "onafterupdate", + 0x978a: "onreadystatechange", + 0x9790: "onscroll", + 0x9794: "ondragstart", + 0x9795: "onresize", + 0x9796: "onselectstart", + 0x9797: "onerrorupdate", + 0x9799: "ondatasetchanged", + 0x979a: "ondataavailable", + 0x979b: "ondatasetcomplete", + 0x979c: "onfilterchange", + 0x979f: "onlosecapture", + 0x97a0: "onpropertychange", + 0x97a2: "ondrag", + 0x97a3: "ondragend", + 0x97a4: "ondragenter", + 0x97a5: "ondragover", + 0x97a6: "ondragleave", + 0x97a7: "ondrop", + 0x97a8: "oncut", + 0x97a9: "oncopy", + 0x97aa: "onpaste", + 0x97ab: "onbeforecut", + 0x97ac: "onbeforecopy", + 0x97ad: "onbeforepaste", + 0x97af: "onrowsdelete", + 0x97b0: "onrowsinserted", + 0x97b1: "oncellchange", + 0x97b2: "oncontextmenu", + 0x97b6: "onbeforeeditfocus", + } +ATTRS3 = { + 0x0001: "href", + 0x03ec: "target", + 0x03ee: "rel", + 0x03ef: "rev", + 0x03f0: "urn", + 0x03f1: "methods", + 0x8001: "name", + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS5 = { + 0x9399: "clear", + } +ATTRS6 = { + 0x8001: "name", + 0x8006: "width", + 0x8007: "height", + 0x804a: "align", + 0x8bbb: "classid", + 0x8bbc: "data", + 0x8bbf: "codebase", + 0x8bc0: "codetype", + 0x8bc1: "code", + 0x8bc2: "type", + 0x8bc5: "vspace", + 0x8bc6: "hspace", + 0x978e: "onerror", + } +ATTRS7 = { + 0x0001: "href", + 0x03ea: "shape", + 0x03eb: "coords", + 0x03ed: "target", + 0x03ee: "alt", + 0x03ef: "nohref", + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS8 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS9 = { + 0x03ec: "href", + 0x03ed: "target", + } +ATTRS10 = { + 0x938b: "color", + 0x939b: "face", + 0x93a3: "size", + } +ATTRS12 = { + 0x03ea: "src", + 0x03eb: "loop", + 0x03ec: "volume", + 0x03ed: "balance", + } +ATTRS13 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS15 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x9399: "clear", + } +ATTRS16 = { + 0x07db: "link", + 0x07dc: "alink", + 0x07dd: "vlink", + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x938a: "background", + 0x938b: "text", + 0x938e: "nowrap", + 0x93ae: "topmargin", + 0x93af: "rightmargin", + 0x93b0: "bottommargin", + 0x93b1: "leftmargin", + 0x93b6: "bgproperties", + 0x93d8: "scroll", + 0x977b: "onselect", + 0x9791: "onload", + 0x9792: "onunload", + 0x9798: "onbeforeunload", + 0x97b3: "onbeforeprint", + 0x97b4: "onafterprint", + 0xfe0c: "bgcolor", + } +ATTRS17 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x9399: "clear", + } +ATTRS18 = { + 0x07d1: "type", + 0x8001: "name", + } +ATTRS19 = { + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x93a8: "valign", + } +ATTRS20 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x9399: "clear", + } +ATTRS21 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS22 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS23 = { + 0x03ea: "span", + 0x8006: "width", + 0x8049: "align", + 0x93a8: "valign", + 0xfe0c: "bgcolor", + } +ATTRS24 = { + 0x03ea: "span", + 0x8006: "width", + 0x8049: "align", + 0x93a8: "valign", + 0xfe0c: "bgcolor", + } +ATTRS27 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x938e: "nowrap", + } +ATTRS29 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS31 = { + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x938e: "nowrap", + } +ATTRS32 = { + 0x03ea: "compact", + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS33 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x938e: "nowrap", + } +ATTRS34 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS35 = { + 0x8001: "name", + 0x8006: "width", + 0x8007: "height", + 0x804a: "align", + 0x8bbd: "palette", + 0x8bbe: "pluginspage", + 0x8bbf: "codebase", + 0x8bbf: "src", + 0x8bc1: "units", + 0x8bc2: "type", + 0x8bc3: "hidden", + } +ATTRS36 = { + 0x804a: "align", + } +ATTRS37 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x938b: "color", + 0x939b: "face", + 0x939c: "size", + } +ATTRS38 = { + 0x03ea: "action", + 0x03ec: "enctype", + 0x03ed: "method", + 0x03ef: "target", + 0x03f4: "accept-charset", + 0x8001: "name", + 0x977c: "onsubmit", + 0x977d: "onreset", + } +ATTRS39 = { + 0x8000: "align", + 0x8001: "name", + 0x8bb9: "src", + 0x8bbb: "border", + 0x8bbc: "frameborder", + 0x8bbd: "framespacing", + 0x8bbe: "marginwidth", + 0x8bbf: "marginheight", + 0x8bc0: "noresize", + 0x8bc1: "scrolling", + 0x8fa2: "bordercolor", + } +ATTRS40 = { + 0x03e9: "rows", + 0x03ea: "cols", + 0x03eb: "border", + 0x03ec: "bordercolor", + 0x03ed: "frameborder", + 0x03ee: "framespacing", + 0x8001: "name", + 0x9791: "onload", + 0x9792: "onunload", + 0x9798: "onbeforeunload", + 0x97b3: "onbeforeprint", + 0x97b4: "onafterprint", + } +ATTRS42 = { + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x9399: "clear", + } +ATTRS43 = { + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x9399: "clear", + } +ATTRS44 = { + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x9399: "clear", + } +ATTRS45 = { + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x9399: "clear", + } +ATTRS46 = { + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x9399: "clear", + } +ATTRS47 = { + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x9399: "clear", + } +ATTRS49 = { + 0x03ea: "noshade", + 0x8006: "width", + 0x8007: "size", + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x938b: "color", + } +ATTRS51 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS52 = { + 0x8001: "name", + 0x8006: "width", + 0x8007: "height", + 0x804a: "align", + 0x8bb9: "src", + 0x8bbb: "border", + 0x8bbc: "frameborder", + 0x8bbd: "framespacing", + 0x8bbe: "marginwidth", + 0x8bbf: "marginheight", + 0x8bc0: "noresize", + 0x8bc1: "scrolling", + 0x8fa2: "vspace", + 0x8fa3: "hspace", + } +ATTRS53 = { + 0x03eb: "alt", + 0x03ec: "src", + 0x03ed: "border", + 0x03ee: "vspace", + 0x03ef: "hspace", + 0x03f0: "lowsrc", + 0x03f1: "vrml", + 0x03f2: "dynsrc", + 0x03f4: "loop", + 0x03f6: "start", + 0x07d3: "ismap", + 0x07d9: "usemap", + 0x8001: "name", + 0x8006: "width", + 0x8007: "height", + 0x8046: "title", + 0x804a: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x978d: "onabort", + 0x978e: "onerror", + 0x9791: "onload", + } +ATTRS54 = { + 0x07d1: "type", + 0x07d3: "size", + 0x07d4: "maxlength", + 0x07d6: "readonly", + 0x07d8: "indeterminate", + 0x07da: "checked", + 0x07db: "alt", + 0x07dc: "src", + 0x07dd: "border", + 0x07de: "vspace", + 0x07df: "hspace", + 0x07e0: "lowsrc", + 0x07e1: "vrml", + 0x07e2: "dynsrc", + 0x07e4: "loop", + 0x07e5: "start", + 0x8001: "name", + 0x8006: "width", + 0x8007: "height", + 0x804a: "align", + 0x93ee: "value", + 0x977b: "onselect", + 0x978d: "onabort", + 0x978e: "onerror", + 0x978f: "onchange", + 0x9791: "onload", + } +ATTRS56 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS57 = { + 0x03e9: "for", + } +ATTRS58 = { + 0x804a: "align", + } +ATTRS59 = { + 0x03ea: "value", + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x939a: "type", + } +ATTRS60 = { + 0x03ee: "href", + 0x03ef: "rel", + 0x03f0: "rev", + 0x03f1: "type", + 0x03f9: "media", + 0x03fa: "target", + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x978e: "onerror", + 0x9791: "onload", + } +ATTRS61 = { + 0x9399: "clear", + } +ATTRS62 = { + 0x8001: "name", + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS63 = { + 0x1771: "scrolldelay", + 0x1772: "direction", + 0x1773: "behavior", + 0x1774: "scrollamount", + 0x1775: "loop", + 0x1776: "vspace", + 0x1777: "hspace", + 0x1778: "truespeed", + 0x8006: "width", + 0x8007: "height", + 0x9785: "onbounce", + 0x978b: "onfinish", + 0x978c: "onstart", + 0xfe0c: "bgcolor", + } +ATTRS65 = { + 0x03ea: "http-equiv", + 0x03eb: "content", + 0x03ec: "url", + 0x03f6: "charset", + 0x8001: "name", + } +ATTRS66 = { + 0x03f5: "n", + } +ATTRS71 = { + 0x8000: "border", + 0x8000: "usemap", + 0x8001: "name", + 0x8006: "width", + 0x8007: "height", + 0x8046: "title", + 0x804a: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x8bbb: "classid", + 0x8bbc: "data", + 0x8bbf: "codebase", + 0x8bc0: "codetype", + 0x8bc1: "code", + 0x8bc2: "type", + 0x8bc5: "vspace", + 0x8bc6: "hspace", + 0x978e: "onerror", + } +ATTRS72 = { + 0x03eb: "compact", + 0x03ec: "start", + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x939a: "type", + } +ATTRS73 = { + 0x03ea: "selected", + 0x03eb: "value", + } +ATTRS74 = { + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x9399: "clear", + } +ATTRS75 = { + 0x8000: "name", + 0x8000: "value", + 0x8000: "type", + } +ATTRS76 = { + 0x9399: "clear", + } +ATTRS77 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x9399: "clear", + } +ATTRS78 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS82 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS83 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS84 = { + 0x03ea: "src", + 0x03ed: "for", + 0x03ee: "event", + 0x03f0: "defer", + 0x03f2: "type", + 0x978e: "onerror", + } +ATTRS85 = { + 0x03eb: "size", + 0x03ec: "multiple", + 0x8000: "align", + 0x8001: "name", + 0x978f: "onchange", + } +ATTRS86 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS87 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS88 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS89 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS90 = { + 0x03eb: "type", + 0x03ef: "media", + 0x8046: "title", + 0x978e: "onerror", + 0x9791: "onload", + } +ATTRS91 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS92 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS93 = { + 0x03ea: "cols", + 0x03eb: "border", + 0x03ec: "rules", + 0x03ed: "frame", + 0x03ee: "cellspacing", + 0x03ef: "cellpadding", + 0x03fa: "datapagesize", + 0x8006: "width", + 0x8007: "height", + 0x8046: "title", + 0x804a: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x938a: "background", + 0x93a5: "bordercolor", + 0x93a6: "bordercolorlight", + 0x93a7: "bordercolordark", + 0xfe0c: "bgcolor", + } +ATTRS94 = { + 0x8049: "align", + 0x93a8: "valign", + 0xfe0c: "bgcolor", + } +ATTRS95 = { + 0x8049: "align", + 0x93a8: "valign", + } +ATTRS96 = { + 0x07d2: "rowspan", + 0x07d3: "colspan", + 0x8006: "width", + 0x8007: "height", + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x938a: "background", + 0x938e: "nowrap", + 0x93a5: "bordercolor", + 0x93a6: "bordercolorlight", + 0x93a7: "bordercolordark", + 0x93a8: "valign", + 0xfe0c: "bgcolor", + } +ATTRS97 = { + 0x1b5a: "rows", + 0x1b5b: "cols", + 0x1b5c: "wrap", + 0x1b5d: "readonly", + 0x8001: "name", + 0x977b: "onselect", + 0x978f: "onchange", + } +ATTRS98 = { + 0x8049: "align", + 0x93a8: "valign", + 0xfe0c: "bgcolor", + } +ATTRS99 = { + 0x07d2: "rowspan", + 0x07d3: "colspan", + 0x8006: "width", + 0x8007: "height", + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x938a: "background", + 0x938e: "nowrap", + 0x93a5: "bordercolor", + 0x93a6: "bordercolorlight", + 0x93a7: "bordercolordark", + 0x93a8: "valign", + 0xfe0c: "bgcolor", + } +ATTRS100 = { + 0x8049: "align", + 0x93a8: "valign", + 0xfe0c: "bgcolor", + } +ATTRS102 = { + 0x8007: "height", + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x93a5: "bordercolor", + 0x93a6: "bordercolorlight", + 0x93a7: "bordercolordark", + 0x93a8: "valign", + 0xfe0c: "bgcolor", + } +ATTRS103 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS104 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS105 = { + 0x03eb: "compact", + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x939a: "type", + } +ATTRS106 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS108 = { + 0x9399: "clear", + } + +TAGS_ATTRS = [ + None, + None, + None, + ATTRS3, # a + None, # acronym + ATTRS5, # address + ATTRS6, # applet + ATTRS7, # area + ATTRS8, # b + ATTRS9, # base + ATTRS10, # basefont + None, # bdo + ATTRS12, # bgsound + ATTRS13, # big + None, # blink + ATTRS15, # blockquote + ATTRS16, # body + ATTRS17, # br + ATTRS18, # button + ATTRS19, # caption + ATTRS20, # center + ATTRS21, # cite + ATTRS22, # code + ATTRS23, # col + ATTRS24, # colgroup + None, + None, + ATTRS27, # dd + None, # del + ATTRS29, # dfn + None, # dir + ATTRS31, # div + ATTRS32, # dl + ATTRS33, # dt + ATTRS34, # em + ATTRS35, # embed + ATTRS36, # fieldset + ATTRS37, # font + ATTRS38, # form + ATTRS39, # frame + ATTRS40, # frameset + None, + ATTRS42, # h1 + ATTRS43, # h2 + ATTRS44, # h3 + ATTRS45, # h4 + ATTRS46, # h5 + ATTRS47, # h6 + None, # head + ATTRS49, # hr + None, # html + ATTRS51, # i + ATTRS52, # iframe + ATTRS53, # img + ATTRS54, # input + None, # ins + ATTRS56, # kbd + ATTRS57, # label + ATTRS58, # legend + ATTRS59, # li + ATTRS60, # link + ATTRS61, # tag61 + ATTRS62, # map + ATTRS63, # tag63 + None, # tag64 + ATTRS65, # meta + ATTRS66, # nextid + None, # nobr + None, # noembed + None, # noframes + None, # noscript + ATTRS71, # object + ATTRS72, # ol + ATTRS73, # option + ATTRS74, # p + ATTRS75, # param + ATTRS76, # plaintext + ATTRS77, # pre + ATTRS78, # q + None, # rp + None, # rt + None, # ruby + ATTRS82, # s + ATTRS83, # samp + ATTRS84, # script + ATTRS85, # select + ATTRS86, # small + ATTRS87, # span + ATTRS88, # strike + ATTRS89, # strong + ATTRS90, # style + ATTRS91, # sub + ATTRS92, # sup + ATTRS93, # table + ATTRS94, # tbody + ATTRS95, # tc + ATTRS96, # td + ATTRS97, # textarea + ATTRS98, # tfoot + ATTRS99, # th + ATTRS100, # thead + None, # title + ATTRS102, # tr + ATTRS103, # tt + ATTRS104, # u + ATTRS105, # ul + ATTRS106, # var + None, # wbr + None, + ] + +MAP = (TAGS, ATTRS0, TAGS_ATTRS) diff --git a/src/calibre/ebooks/lit/maps/opf.py b/src/calibre/ebooks/lit/maps/opf.py new file mode 100644 index 0000000000..cc1acc4dfa --- /dev/null +++ b/src/calibre/ebooks/lit/maps/opf.py @@ -0,0 +1,74 @@ +TAGS = [ + None, + "package", + "dc:Title", + "dc:Creator", + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + "manifest", + "item", + "spine", + "itemref", + "metadata", + "dc-metadata", + "dc:Subject", + "dc:Description", + "dc:Publisher", + "dc:Contributor", + "dc:Date", + "dc:Type", + "dc:Format", + "dc:Identifier", + "dc:Source", + "dc:Language", + "dc:Relation", + "dc:Coverage", + "dc:Rights", + "x-metadata", + "meta", + "tours", + "tour", + "site", + "guide", + "reference", + None, + ] + +ATTRS = { + 0x0001: "href", + 0x0002: "%never-used", + 0x0003: "%guid", + 0x0004: "%minimum_level", + 0x0005: "%attr5", + 0x0006: "id", + 0x0007: "href", + 0x0008: "media-type", + 0x0009: "fallback", + 0x000A: "idref", + 0x000B: "xmlns:dc", + 0x000C: "xmlns:oebpackage", + 0x000D: "role", + 0x000E: "file-as", + 0x000F: "event", + 0x0010: "scheme", + 0x0011: "title", + 0x0012: "type", + 0x0013: "unique-identifier", + 0x0014: "name", + 0x0015: "content", + 0x0016: "xml:lang", + } + +TAGS_ATTRS = [{} for i in xrange(43)] + +MAP = (TAGS, ATTRS, TAGS_ATTRS) diff --git a/src/calibre/ebooks/lit/msdes.py b/src/calibre/ebooks/lit/msdes.py new file mode 100644 index 0000000000..5bc67b09bb --- /dev/null +++ b/src/calibre/ebooks/lit/msdes.py @@ -0,0 +1,481 @@ +# Re-modified for use in MS LIT decryption. Un-reversed the bytebit[] array. +# Substituted Microsoft's absurd modified S-boxes. Modified the encrypt/decrypt +# methods to handle more than one block at a time. +# +# And lo, all the previous notices follow: + +# Modified DES encryption for VNC password authentication. +# Ported from realvnc's java viewer by +# I chose this package name because it is not compatible with the +# original DES algorithm, e.g. found pycrypto. +# +# (C) 2003 chris +# Released as free software under the Python License. +# +# You're free to use it for commercial and noncommercial +# application, modify and redistribute it as long as the +# copyright notices are intact. There are no warranties, not +# even that it does what it says to do ;-) +# +# Original notice following: + +# This DES class has been extracted from package Acme.Crypto for use in VNC. +# The bytebit[] array has been reversed so that the most significant bit +# in each byte of the key is ignored, not the least significant. Also the +# unnecessary odd parity code has been removed. +# +# These changes are: +# Copyright (C) 1999 AT&T Laboratories Cambridge. All Rights Reserved. +# +# This software is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# + +# DesCipher - the DES encryption method +# +# The meat of this code is by Dave Zimmerman , and is: +# +# Copyright (c) 1996 Widget Workshop, Inc. All Rights Reserved. +# +# Permission to use, copy, modify, and distribute this software +# and its documentation for NON-COMMERCIAL or COMMERCIAL purposes and +# without fee is hereby granted, provided that this copyright notice is kept +# intact. +# +# WIDGET WORKSHOP MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY +# OF THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +# TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE, OR NON-INFRINGEMENT. WIDGET WORKSHOP SHALL NOT BE LIABLE +# FOR ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR +# DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES. +# +# THIS SOFTWARE IS NOT DESIGNED OR INTENDED FOR USE OR RESALE AS ON-LINE +# CONTROL EQUIPMENT IN HAZARDOUS ENVIRONMENTS REQUIRING FAIL-SAFE +# PERFORMANCE, SUCH AS IN THE OPERATION OF NUCLEAR FACILITIES, AIRCRAFT +# NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL, DIRECT LIFE +# SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH THE FAILURE OF THE +# SOFTWARE COULD LEAD DIRECTLY TO DEATH, PERSONAL INJURY, OR SEVERE +# PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH RISK ACTIVITIES"). WIDGET WORKSHOP +# SPECIFICALLY DISCLAIMS ANY EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR +# HIGH RISK ACTIVITIES. +# +# +# The rest is: +# +# Copyright (C) 1996 by Jef Poskanzer . All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# Visit the ACME Labs Java page for up-to-date versions of this and other +# fine Java utilities: http://www.acme.com/java/ + + +#/ The DES encryption method. +#

+# This is surprisingly fast, for pure Java. On a SPARC 20, wrapped +# in Acme.Crypto.EncryptedOutputStream or Acme.Crypto.EncryptedInputStream, +# it does around 7000 bytes/second. +#

+# Most of this code is by Dave Zimmerman , and is +# Copyright (c) 1996 Widget Workshop, Inc. See the source file for details. +#

+# Fetch the software.
+# Fetch the entire Acme package. +#

+# @see Des3Cipher +# @see EncryptedOutputStream +# @see EncryptedInputStream + +import struct + +class DesCipher: + # Constructor, byte-array key. + def __init__(self, key): + self.setKey(key) + + #/ Set the key. + def setKey(self, key): + self.encryptKeys = self.deskey([ord(x) for x in key], 1) + self.decryptKeys = self.deskey([ord(x) for x in key], 0) + + # Turn an 8-byte key into internal keys. + def deskey(self, keyBlock, encrypting): + #~ int i, j, l, m, n; + pc1m = [0]*56 #new int[56]; + pcr = [0]*56 #new int[56]; + kn = [0]*32 #new int[32]; + + for j in range(56): + l = pc1[j] + m = l & 07 + pc1m[j] = ((keyBlock[l >> 3] & bytebit[m]) != 0) + for i in range(16): + if encrypting: + m = i << 1 + else: + m = (15-i) << 1 + n = m + 1 + kn[m] = kn[n] = 0 + for j in range(28): + l = j + totrot[i] + if l < 28: + pcr[j] = pc1m[l] + else: + pcr[j] = pc1m[l - 28] + for j in range(28, 56): + l = j + totrot[i] + if l < 56: + pcr[j] = pc1m[l] + else: + pcr[j] = pc1m[l - 28] + for j in range(24): + if pcr[pc2[j]] != 0: + kn[m] |= bigbyte[j] + if pcr[pc2[j+24]] != 0: + kn[n] |= bigbyte[j] + return self.cookey(kn) + + def cookey(self, raw): + #~ int raw0, raw1; + #~ int rawi, KnLi; + #~ int i; + KnL = [0]*32 + + rawi = 0 + KnLi = 0 + for i in range(16): + raw0 = raw[rawi] + rawi += 1 + raw1 = raw[rawi] + rawi += 1 + KnL[KnLi] = (raw0 & 0x00fc0000L) << 6 + KnL[KnLi] |= (raw0 & 0x00000fc0L) << 10 + KnL[KnLi] |= (raw1 & 0x00fc0000L) >> 10 + KnL[KnLi] |= (raw1 & 0x00000fc0L) >> 6 + KnLi += 1 + KnL[KnLi] = (raw0 & 0x0003f000L) << 12 + KnL[KnLi] |= (raw0 & 0x0000003fL) << 16 + KnL[KnLi] |= (raw1 & 0x0003f000L) >> 4 + KnL[KnLi] |= (raw1 & 0x0000003fL) + KnLi += 1 + return KnL + + # Block encryption routines. + + #/ Encrypt a block of eight bytes. + def encrypt(self, clearText): + if len(clearText) % 8 != 0: + raise TypeError, "length must be multiple of block size" + result = [] + while clearText: + result.append(struct.pack( + ">LL", *self.des(struct.unpack(">LL", clearText[:8]), + self.encryptKeys))) + clearText = clearText[8:] + return ''.join(result) + + #/ Decrypt a block of eight bytes. + def decrypt(self, cipherText): + if len(cipherText) % 8 != 0: + raise TypeError, "length must be multiple of block size" + result = [] + while cipherText: + result.append(struct.pack( + ">LL", *self.des(struct.unpack(">LL", cipherText[:8]), + self.decryptKeys))) + cipherText = cipherText[8:] + return ''.join(result) + + # The DES function. + def des(self, (leftt, right), keys): + #~ int fval, work, right, leftt; + #~ int round + keysi = 0 + + work = ((leftt >> 4) ^ right) & 0x0f0f0f0fL + right ^= work + leftt ^= (work << 4) & 0xffffffffL + + work = ((leftt >> 16) ^ right) & 0x0000ffffL + right ^= work + leftt ^= (work << 16) & 0xffffffffL + + work = ((right >> 2) ^ leftt) & 0x33333333L + leftt ^= work + right ^= (work << 2) & 0xffffffffL + + work = ((right >> 8) ^ leftt) & 0x00ff00ffL + leftt ^= work + right ^= (work << 8) & 0xffffffffL + right = ((right << 1) | ((right >> 31) & 1)) & 0xffffffffL + + work = (leftt ^ right) & 0xaaaaaaaaL + leftt ^= work + right ^= work + leftt = ((leftt << 1) | ((leftt >> 31) & 1)) & 0xffffffffL + + for round in range(8): + work = ((right << 28) | (right >> 4)) & 0xffffffffL + work ^= keys[keysi] + keysi += 1 + fval = SP7[ work & 0x0000003fL ] + fval |= SP5[(work >> 8) & 0x0000003fL ] + fval |= SP3[(work >> 16) & 0x0000003fL ] + fval |= SP1[(work >> 24) & 0x0000003fL ] + work = right ^ keys[keysi] + keysi += 1 + fval |= SP8[ work & 0x0000003fL ] + fval |= SP6[(work >> 8) & 0x0000003fL ] + fval |= SP4[(work >> 16) & 0x0000003fL ] + fval |= SP2[(work >> 24) & 0x0000003fL ] + leftt ^= fval + work = ((leftt << 28) | (leftt >> 4)) & 0xffffffffL + work ^= keys[keysi] + keysi += 1 + fval = SP7[ work & 0x0000003fL ] + fval |= SP5[(work >> 8) & 0x0000003fL ] + fval |= SP3[(work >> 16) & 0x0000003fL ] + fval |= SP1[(work >> 24) & 0x0000003fL ] + work = leftt ^ keys[keysi] + keysi += 1 + fval |= SP8[ work & 0x0000003fL ] + fval |= SP6[(work >> 8) & 0x0000003fL ] + fval |= SP4[(work >> 16) & 0x0000003fL ] + fval |= SP2[(work >> 24) & 0x0000003fL ] + right ^= fval + + right = ((right << 31) | (right >> 1)) & 0xffffffffL + work = (leftt ^ right) & 0xaaaaaaaaL + leftt ^= work + right ^= work + leftt = ((leftt << 31) | (leftt >> 1)) & 0xffffffffL + work = ((leftt >> 8) ^ right) & 0x00ff00ffL + right ^= work + leftt ^= (work << 8) & 0xffffffffL + work = ((leftt >> 2) ^ right) & 0x33333333L + right ^= work + leftt ^= (work << 2) & 0xffffffffL + work = ((right >> 16) ^ leftt) & 0x0000ffffL + leftt ^= work + right ^= (work << 16) & 0xffffffffL + work = ((right >> 4) ^ leftt) & 0x0f0f0f0fL + leftt ^= work + right ^= (work << 4) & 0xffffffffL + return right, leftt + +# Tables, permutations, S-boxes, etc. + +bytebit = [0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01] + +bigbyte = [ + 0x800000, 0x400000, 0x200000, 0x100000, + 0x080000, 0x040000, 0x020000, 0x010000, + 0x008000, 0x004000, 0x002000, 0x001000, + 0x000800, 0x000400, 0x000200, 0x000100, + 0x000080, 0x000040, 0x000020, 0x000010, + 0x000008, 0x000004, 0x000002, 0x000001 +] + +pc1 = [ + 56, 48, 40, 32, 24, 16, 8, + 0, 57, 49, 41, 33, 25, 17, + 9, 1, 58, 50, 42, 34, 26, + 18, 10, 2, 59, 51, 43, 35, + 62, 54, 46, 38, 30, 22, 14, + 6, 61, 53, 45, 37, 29, 21, + 13, 5, 60, 52, 44, 36, 28, + 20, 12, 4, 27, 19, 11, 3 +] + +totrot = [ + 1, 2, 4, 6, 8, 10, 12, 14, 15, 17, 19, 21, 23, 25, 27, 28 +] + +pc2 = [ + 13, 16, 10, 23, 0, 4, + 2, 27, 14, 5, 20, 9, + 22, 18, 11, 3 , 25, 7, + 15, 6, 26, 19, 12, 1, + 40, 51, 30, 36, 46, 54, + 29, 39, 50, 44, 32, 47, + 43, 48, 38, 55, 33, 52, + 45, 41, 49, 35, 28, 31, +] + +SP1 = [ +0x02080800L, 0x00080000L, 0x02000002L, 0x02080802L, +0x02000000L, 0x00080802L, 0x00080002L, 0x02000002L, +0x00080802L, 0x02080800L, 0x02080000L, 0x00000802L, +0x02000802L, 0x02000000L, 0x00000000L, 0x00080002L, +0x00080000L, 0x00000002L, 0x02000800L, 0x00080800L, +0x02080802L, 0x02080000L, 0x00000802L, 0x02000800L, +0x00000002L, 0x00000800L, 0x00080800L, 0x02080002L, +0x00000800L, 0x02000802L, 0x02080002L, 0x00000000L, +0x00000000L, 0x02080802L, 0x02000800L, 0x00080002L, +0x02080800L, 0x00080000L, 0x00000802L, 0x02000800L, +0x02080002L, 0x00000800L, 0x00080800L, 0x02000002L, +0x00080802L, 0x00000002L, 0x02000002L, 0x02080000L, +0x02080802L, 0x00080800L, 0x02080000L, 0x02000802L, +0x02000000L, 0x00000802L, 0x00080002L, 0x00000000L, +0x00080000L, 0x02000000L, 0x02000802L, 0x02080800L, +0x00000002L, 0x02080002L, 0x00000800L, 0x00080802L +] +SP2 = [ +0x40108010L, 0x00000000L, 0x00108000L, 0x40100000L, +0x40000010L, 0x00008010L, 0x40008000L, 0x00108000L, +0x00008000L, 0x40100010L, 0x00000010L, 0x40008000L, +0x00100010L, 0x40108000L, 0x40100000L, 0x00000010L, +0x00100000L, 0x40008010L, 0x40100010L, 0x00008000L, +0x00108010L, 0x40000000L, 0x00000000L, 0x00100010L, +0x40008010L, 0x00108010L, 0x40108000L, 0x40000010L, +0x40000000L, 0x00100000L, 0x00008010L, 0x40108010L, +0x00100010L, 0x40108000L, 0x40008000L, 0x00108010L, +0x40108010L, 0x00100010L, 0x40000010L, 0x00000000L, +0x40000000L, 0x00008010L, 0x00100000L, 0x40100010L, +0x00008000L, 0x40000000L, 0x00108010L, 0x40008010L, +0x40108000L, 0x00008000L, 0x00000000L, 0x40000010L, +0x00000010L, 0x40108010L, 0x00108000L, 0x40100000L, +0x40100010L, 0x00100000L, 0x00008010L, 0x40008000L, +0x40008010L, 0x00000010L, 0x40100000L, 0x00108000L +] +SP3 = [ +0x04000001L, 0x04040100L, 0x00000100L, 0x04000101L, +0x00040001L, 0x04000000L, 0x04000101L, 0x00040100L, +0x04000100L, 0x00040000L, 0x04040000L, 0x00000001L, +0x04040101L, 0x00000101L, 0x00000001L, 0x04040001L, +0x00000000L, 0x00040001L, 0x04040100L, 0x00000100L, +0x00000101L, 0x04040101L, 0x00040000L, 0x04000001L, +0x04040001L, 0x04000100L, 0x00040101L, 0x04040000L, +0x00040100L, 0x00000000L, 0x04000000L, 0x00040101L, +0x04040100L, 0x00000100L, 0x00000001L, 0x00040000L, +0x00000101L, 0x00040001L, 0x04040000L, 0x04000101L, +0x00000000L, 0x04040100L, 0x00040100L, 0x04040001L, +0x00040001L, 0x04000000L, 0x04040101L, 0x00000001L, +0x00040101L, 0x04000001L, 0x04000000L, 0x04040101L, +0x00040000L, 0x04000100L, 0x04000101L, 0x00040100L, +0x04000100L, 0x00000000L, 0x04040001L, 0x00000101L, +0x04000001L, 0x00040101L, 0x00000100L, 0x04040000L +] +SP4 = [ +0x00401008L, 0x10001000L, 0x00000008L, 0x10401008L, +0x00000000L, 0x10400000L, 0x10001008L, 0x00400008L, +0x10401000L, 0x10000008L, 0x10000000L, 0x00001008L, +0x10000008L, 0x00401008L, 0x00400000L, 0x10000000L, +0x10400008L, 0x00401000L, 0x00001000L, 0x00000008L, +0x00401000L, 0x10001008L, 0x10400000L, 0x00001000L, +0x00001008L, 0x00000000L, 0x00400008L, 0x10401000L, +0x10001000L, 0x10400008L, 0x10401008L, 0x00400000L, +0x10400008L, 0x00001008L, 0x00400000L, 0x10000008L, +0x00401000L, 0x10001000L, 0x00000008L, 0x10400000L, +0x10001008L, 0x00000000L, 0x00001000L, 0x00400008L, +0x00000000L, 0x10400008L, 0x10401000L, 0x00001000L, +0x10000000L, 0x10401008L, 0x00401008L, 0x00400000L, +0x10401008L, 0x00000008L, 0x10001000L, 0x00401008L, +0x00400008L, 0x00401000L, 0x10400000L, 0x10001008L, +0x00001008L, 0x10000000L, 0x10000008L, 0x10401000L +] +SP5 = [ +0x08000000L, 0x00010000L, 0x00000400L, 0x08010420L, +0x08010020L, 0x08000400L, 0x00010420L, 0x08010000L, +0x00010000L, 0x00000020L, 0x08000020L, 0x00010400L, +0x08000420L, 0x08010020L, 0x08010400L, 0x00000000L, +0x00010400L, 0x08000000L, 0x00010020L, 0x00000420L, +0x08000400L, 0x00010420L, 0x00000000L, 0x08000020L, +0x00000020L, 0x08000420L, 0x08010420L, 0x00010020L, +0x08010000L, 0x00000400L, 0x00000420L, 0x08010400L, +0x08010400L, 0x08000420L, 0x00010020L, 0x08010000L, +0x00010000L, 0x00000020L, 0x08000020L, 0x08000400L, +0x08000000L, 0x00010400L, 0x08010420L, 0x00000000L, +0x00010420L, 0x08000000L, 0x00000400L, 0x00010020L, +0x08000420L, 0x00000400L, 0x00000000L, 0x08010420L, +0x08010020L, 0x08010400L, 0x00000420L, 0x00010000L, +0x00010400L, 0x08010020L, 0x08000400L, 0x00000420L, +0x00000020L, 0x00010420L, 0x08010000L, 0x08000020L +] +SP6 = [ +0x80000040L, 0x00200040L, 0x00000000L, 0x80202000L, +0x00200040L, 0x00002000L, 0x80002040L, 0x00200000L, +0x00002040L, 0x80202040L, 0x00202000L, 0x80000000L, +0x80002000L, 0x80000040L, 0x80200000L, 0x00202040L, +0x00200000L, 0x80002040L, 0x80200040L, 0x00000000L, +0x00002000L, 0x00000040L, 0x80202000L, 0x80200040L, +0x80202040L, 0x80200000L, 0x80000000L, 0x00002040L, +0x00000040L, 0x00202000L, 0x00202040L, 0x80002000L, +0x00002040L, 0x80000000L, 0x80002000L, 0x00202040L, +0x80202000L, 0x00200040L, 0x00000000L, 0x80002000L, +0x80000000L, 0x00002000L, 0x80200040L, 0x00200000L, +0x00200040L, 0x80202040L, 0x00202000L, 0x00000040L, +0x80202040L, 0x00202000L, 0x00200000L, 0x80002040L, +0x80000040L, 0x80200000L, 0x00202040L, 0x00000000L, +0x00002000L, 0x80000040L, 0x80002040L, 0x80202000L, +0x80200000L, 0x00002040L, 0x00000040L, 0x80200040L, +] +SP7 = [ +0x00004000L, 0x00000200L, 0x01000200L, 0x01000004L, +0x01004204L, 0x00004004L, 0x00004200L, 0x00000000L, +0x01000000L, 0x01000204L, 0x00000204L, 0x01004000L, +0x00000004L, 0x01004200L, 0x01004000L, 0x00000204L, +0x01000204L, 0x00004000L, 0x00004004L, 0x01004204L, +0x00000000L, 0x01000200L, 0x01000004L, 0x00004200L, +0x01004004L, 0x00004204L, 0x01004200L, 0x00000004L, +0x00004204L, 0x01004004L, 0x00000200L, 0x01000000L, +0x00004204L, 0x01004000L, 0x01004004L, 0x00000204L, +0x00004000L, 0x00000200L, 0x01000000L, 0x01004004L, +0x01000204L, 0x00004204L, 0x00004200L, 0x00000000L, +0x00000200L, 0x01000004L, 0x00000004L, 0x01000200L, +0x00000000L, 0x01000204L, 0x01000200L, 0x00004200L, +0x00000204L, 0x00004000L, 0x01004204L, 0x01000000L, +0x01004200L, 0x00000004L, 0x00004004L, 0x01004204L, +0x01000004L, 0x01004200L, 0x01004000L, 0x00004004L, +] +SP8 = [ +0x20800080L, 0x20820000L, 0x00020080L, 0x00000000L, +0x20020000L, 0x00800080L, 0x20800000L, 0x20820080L, +0x00000080L, 0x20000000L, 0x00820000L, 0x00020080L, +0x00820080L, 0x20020080L, 0x20000080L, 0x20800000L, +0x00020000L, 0x00820080L, 0x00800080L, 0x20020000L, +0x20820080L, 0x20000080L, 0x00000000L, 0x00820000L, +0x20000000L, 0x00800000L, 0x20020080L, 0x20800080L, +0x00800000L, 0x00020000L, 0x20820000L, 0x00000080L, +0x00800000L, 0x00020000L, 0x20000080L, 0x20820080L, +0x00020080L, 0x20000000L, 0x00000000L, 0x00820000L, +0x20800080L, 0x20020080L, 0x20020000L, 0x00800080L, +0x20820000L, 0x00000080L, 0x00800080L, 0x20020000L, +0x20820080L, 0x00800000L, 0x20800000L, 0x20000080L, +0x00820000L, 0x00020080L, 0x20020080L, 0x20800000L, +0x00000080L, 0x20820000L, 0x00820080L, 0x00000000L, +0x20000000L, 0x20800080L, 0x00020000L, 0x00820080L, +] + +def new(key): + return DesCipher(key) + +block_size = 8 +key_size = 8 + +#test only: +if __name__ == '__main__': + des = DesCipher("\x01\x23\x45\x67\x89\xab\xcd\xef") + print ''.join( + "%02x" % ord(x) for x in des.encrypt("Now is t")) + diff --git a/src/calibre/ebooks/lit/mssha1.py b/src/calibre/ebooks/lit/mssha1.py new file mode 100644 index 0000000000..d61bd39094 --- /dev/null +++ b/src/calibre/ebooks/lit/mssha1.py @@ -0,0 +1,343 @@ +#!/usr/bin/env python +# -*- coding: iso-8859-1 + +"""A sample implementation of SHA-1 in pure Python. + + Framework adapted from Dinu Gherman's MD5 implementation by + J. Hallén and L. Creighton. SHA-1 implementation based directly on + the text of the NIST standard FIPS PUB 180-1. +""" + + +__date__ = '2004-11-17' +__version__ = 0.91 # Modernised by J. Hallén and L. Creighton for Pypy + + +import struct, copy + + +# ====================================================================== +# Bit-Manipulation helpers +# +# _long2bytes() was contributed by Barry Warsaw +# and is reused here with tiny modifications. +# ====================================================================== + +def _long2bytesBigEndian(n, blocksize=0): + """Convert a long integer to a byte string. + + If optional blocksize is given and greater than zero, pad the front + of the byte string with binary zeros so that the length is a multiple + of blocksize. + """ + + # After much testing, this algorithm was deemed to be the fastest. + s = '' + pack = struct.pack + while n > 0: + s = pack('>I', n & 0xffffffffL) + s + n = n >> 32 + + # Strip off leading zeros. + for i in range(len(s)): + if s[i] != '\000': + break + else: + # Only happens when n == 0. + s = '\000' + i = 0 + + s = s[i:] + + # Add back some pad bytes. This could be done more efficiently + # w.r.t. the de-padding being done above, but sigh... + if blocksize > 0 and len(s) % blocksize: + s = (blocksize - len(s) % blocksize) * '\000' + s + + return s + + +def _bytelist2longBigEndian(list): + "Transform a list of characters into a list of longs." + + imax = len(list)/4 + hl = [0L] * imax + + j = 0 + i = 0 + while i < imax: + b0 = long(ord(list[j])) << 24 + b1 = long(ord(list[j+1])) << 16 + b2 = long(ord(list[j+2])) << 8 + b3 = long(ord(list[j+3])) + hl[i] = b0 | b1 | b2 | b3 + i = i+1 + j = j+4 + + return hl + + +def _rotateLeft(x, n): + "Rotate x (32 bit) left n bits circularly." + + return (x << n) | (x >> (32-n)) + + +# ====================================================================== +# The SHA transformation functions +# +# ====================================================================== + +def f0_19(B, C, D): + return (B & (C ^ D)) ^ D + +def f20_39(B, C, D): + return B ^ C ^ D + +def f40_59(B, C, D): + return ((B | C) & D) | (B & C) + +def f60_79(B, C, D): + return B ^ C ^ D + +def f6_42(B, C, D): + return (B + C) ^ C + +f = [f0_19]*20 + [f20_39]*20 + [f40_59]*20 + [f60_79]*20 +f[3] = f20_39 +f[6] = f6_42 +f[10] = f20_39 +f[15] = f20_39 +f[26] = f0_19 +f[31] = f40_59 +f[42] = f6_42 +f[51] = f20_39 +f[68] = f0_19 + + +# Constants to be used +K = [ + 0x5A827999L, # ( 0 <= t <= 19) + 0x6ED9EBA1L, # (20 <= t <= 39) + 0x8F1BBCDCL, # (40 <= t <= 59) + 0xCA62C1D6L # (60 <= t <= 79) + ] + +class mssha1(object): + "An implementation of the MD5 hash function in pure Python." + + def __init__(self): + "Initialisation." + + # Initial message length in bits(!). + self.length = 0L + self.count = [0, 0] + + # Initial empty message as a sequence of bytes (8 bit characters). + self.input = [] + + # Call a separate init function, that can be used repeatedly + # to start from scratch on the same object. + self.init() + + + def init(self): + "Initialize the message-digest and set all fields to zero." + + self.length = 0L + self.input = [] + + # Initial 160 bit message digest (5 times 32 bit). + self.H0 = 0x32107654L + self.H1 = 0x23016745L + self.H2 = 0xC4E680A2L + self.H3 = 0xDC679823L + self.H4 = 0xD0857A34L + + def _transform(self, W): + for t in range(16, 80): + W.append(_rotateLeft( + W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16], 1) & 0xffffffffL) + + A = self.H0 + B = self.H1 + C = self.H2 + D = self.H3 + E = self.H4 + + for t in xrange(0, 80): + TEMP = _rotateLeft(A, 5) + f[t](B, C, D) + E + W[t] + K[t/20] + E = D + D = C + C = _rotateLeft(B, 30) & 0xffffffffL + B = A + A = TEMP & 0xffffffffL + + self.H0 = (self.H0 + A) & 0xffffffffL + self.H1 = (self.H1 + B) & 0xffffffffL + self.H2 = (self.H2 + C) & 0xffffffffL + self.H3 = (self.H3 + D) & 0xffffffffL + self.H4 = (self.H4 + E) & 0xffffffffL + + + # Down from here all methods follow the Python Standard Library + # API of the sha module. + + def update(self, inBuf): + """Add to the current message. + + Update the mssha1 object with the string arg. Repeated calls + are equivalent to a single call with the concatenation of all + the arguments, i.e. s.update(a); s.update(b) is equivalent + to s.update(a+b). + + The hash is immediately calculated for all full blocks. The final + calculation is made in digest(). It will calculate 1-2 blocks, + depending on how much padding we have to add. This allows us to + keep an intermediate value for the hash, so that we only need to + make minimal recalculation if we call update() to add more data + to the hashed string. + """ + + leninBuf = long(len(inBuf)) + + # Compute number of bytes mod 64. + index = (self.count[1] >> 3) & 0x3FL + + # Update number of bits. + self.count[1] = self.count[1] + (leninBuf << 3) + if self.count[1] < (leninBuf << 3): + self.count[0] = self.count[0] + 1 + self.count[0] = self.count[0] + (leninBuf >> 29) + + partLen = 64 - index + + if leninBuf >= partLen: + self.input[index:] = list(inBuf[:partLen]) + self._transform(_bytelist2longBigEndian(self.input)) + i = partLen + while i + 63 < leninBuf: + self._transform(_bytelist2longBigEndian(list(inBuf[i:i+64]))) + i = i + 64 + else: + self.input = list(inBuf[i:leninBuf]) + else: + i = 0 + self.input = self.input + list(inBuf) + + + def digest(self): + """Terminate the message-digest computation and return digest. + + Return the digest of the strings passed to the update() + method so far. This is a 16-byte string which may contain + non-ASCII characters, including null bytes. + """ + + H0 = self.H0 + H1 = self.H1 + H2 = self.H2 + H3 = self.H3 + H4 = self.H4 + input = [] + self.input + count = [] + self.count + + index = (self.count[1] >> 3) & 0x3fL + + if index < 56: + padLen = 56 - index + else: + padLen = 120 - index + + padding = ['\200'] + ['\000'] * 63 + self.update(padding[:padLen]) + + # Append length (before padding). + bits = _bytelist2longBigEndian(self.input[:56]) + count + + self._transform(bits) + + # Store state in digest. + digest = _long2bytesBigEndian(self.H0, 4) + \ + _long2bytesBigEndian(self.H1, 4) + \ + _long2bytesBigEndian(self.H2, 4) + \ + _long2bytesBigEndian(self.H3, 4) + \ + _long2bytesBigEndian(self.H4, 4) + + self.H0 = H0 + self.H1 = H1 + self.H2 = H2 + self.H3 = H3 + self.H4 = H4 + self.input = input + self.count = count + + return digest + + + def hexdigest(self): + """Terminate and return digest in HEX form. + + Like digest() except the digest is returned as a string of + length 32, containing only hexadecimal digits. This may be + used to exchange the value safely in email or other non- + binary environments. + """ + return ''.join(['%02x' % ord(c) for c in self.digest()]) + + def copy(self): + """Return a clone object. + + Return a copy ('clone') of the md5 object. This can be used + to efficiently compute the digests of strings that share + a common initial substring. + """ + + return copy.deepcopy(self) + + +# ====================================================================== +# Mimic Python top-level functions from standard library API +# for consistency with the md5 module of the standard library. +# ====================================================================== + +# These are mandatory variables in the module. They have constant values +# in the SHA standard. + +digest_size = digestsize = 20 +blocksize = 1 + +def new(arg=None): + """Return a new mssha1 crypto object. + + If arg is present, the method call update(arg) is made. + """ + + crypto = mssha1() + if arg: + crypto.update(arg) + + return crypto + +if __name__ == '__main__': + def main(): + import sys + file = None + if len(sys.argv) > 2: + print "usage: %s [FILE]" % sys.argv[0] + return + elif len(sys.argv) < 2: + file = sys.stdin + else: + file = open(sys.argv[1], 'rb') + context = new() + data = file.read(16384) + while data: + context.update(data) + data = file.read(16384) + file.close() + digest = context.hexdigest().upper() + for i in xrange(0, 40, 8): + print digest[i:i+8], + print + main() diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py new file mode 100644 index 0000000000..2608d63399 --- /dev/null +++ b/src/calibre/ebooks/lit/reader.py @@ -0,0 +1,725 @@ +__license__ = 'GPL v3' +__copyright__ = '2008, Kovid Goyal ' +''' +Support for reading the metadata from a lit file. +''' + +import sys, struct, cStringIO, os +import functools +from itertools import repeat + +from calibre import relpath +from calibre.ebooks.metadata import MetaInformation +from calibre.ebooks.metadata.opf import OPFReader +from calibre.ebooks.lit import LitError +from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP +import calibre.ebooks.lit.mssha1 as mssha1 +import calibre.ebooks.lit.msdes as msdes + +OPF_DECL = """" + +""" +XHTML_DECL = """ + +""" + +DESENCRYPT_GUID = "{67F6E4A2-60BF-11D3-8540-00C04F58C3CF}" +LZXCOMPRESS_GUID = "{0A9007C6-4076-11D3-8789-0000F8105754}" + +def u32(bytes): + return struct.unpack(' 0: + b = ord(bytes[pos]) + pos += 1 + remaining -= 1 + val <<= 7 + val |= (b & 0x7f) + if b & 0x80 == 0: break + return val, bytes[pos:], remaining + +def msguid(bytes): + values = struct.unpack(">= 1 + elsize += 1 + if (mask <= 1) or (mask == 0x40): + raise LitError('Invalid UTF8 character: %s' % repr(bytes[pos])) + else: + elsize = 1 + if elsize > 1: + if elsize + pos > len(bytes): + raise LitError('Invalid UTF8 character: %s' % repr(bytes[pos])) + c &= (mask - 1) + for i in range(1, elsize): + b = ord(bytes[pos+i]) + if (b & 0xC0) != 0x80: + raise LitError( + 'Invalid UTF8 character: %s' % repr(bytes[pos:pos+i])) + c = (c << 6) | (b & 0x3F) + return unichr(c), pos+elsize + +FLAG_OPENING = 1 +FLAG_CLOSING = 2 +FLAG_BLOCK = 4 +FLAG_HEAD = 8 +FLAG_ATOM = 16 +XML_ENTITIES = ['&', ''', '<', '>', '"'] + +class UnBinary(object): + def __init__(self, bin, manifest, map=OPF_MAP): + self.manifest = manifest + self.tag_map, self.attr_map, self.tag_to_attr_map = map + self.opf = map is OPF_MAP + self.bin = bin + self.buf = cStringIO.StringIO() + self.ampersands = [] + self.binary_to_text() + self.raw = self.buf.getvalue().lstrip().decode('utf-8') + self.escape_ampersands() + + def escape_ampersands(self): + offset = 0 + for pos in self.ampersands: + test = self.raw[pos+offset:pos+offset+6] + if test.startswith('&#') and ';' in test: + continue + escape = True + for ent in XML_ENTITIES: + if test.startswith(ent): + escape = False + break + if not escape: + continue + self.raw = self.raw[:pos+offset] + '&' + self.raw[pos+offset+1:] + offset += 4 + + def item_path(self, internal_id): + for i in self.manifest: + if i == internal_id: + return i.path + raise LitError('Could not find item %s'%(internal_id,)) + + def __unicode__(self): + return self.raw + + def binary_to_text(self, base=0, depth=0): + tag_name = current_map = None + dynamic_tag = errors = 0 + in_censorship = is_goingdown = False + state = 'text' + index = base + flags = 0 + + while index < len(self.bin): + c, index = read_utf8_char(self.bin, index) + oc = ord(c) + + if state == 'text': + if oc == 0: + state = 'get flags' + continue + elif c == '\v': + c = '\n' + elif c == '&': + self.ampersands.append(self.buf.tell()-1) + self.buf.write(c.encode('utf-8')) + + elif state == 'get flags': + if oc == 0: + state = 'text' + continue + flags = oc + state = 'get tag' + + elif state == 'get tag': + state = 'text' if oc == 0 else 'get attr' + if flags & FLAG_OPENING: + tag = oc + self.buf.write('<') + if not (flags & FLAG_CLOSING): + is_goingdown = True + if tag == 0x8000: + state = 'get custom length' + continue + if flags & FLAG_ATOM: + raise LitError('TODO: Atoms not yet implemented') + elif tag < len(self.tag_map): + tag_name = self.tag_map[tag] + current_map = self.tag_to_attr_map[tag] + else: + dynamic_tag += 1 + errors += 1 + tag_name = '?'+unichr(tag)+'?' + current_map = self.tag_to_attr_map[tag] + print 'WARNING: tag %s unknown' % unichr(tag) + self.buf.write(unicode(tag_name).encode('utf-8')) + elif flags & FLAG_CLOSING: + if depth == 0: + raise LitError('Extra closing tag') + return index + + elif state == 'get attr': + in_censorship = False + if oc == 0: + if not is_goingdown: + tag_name = None + dynamic_tag = 0 + self.buf.write(' />') + else: + self.buf.write('>') + index = self.binary_to_text(base=index, depth=depth+1) + is_goingdown = False + if not tag_name: + raise LitError('Tag ends before it begins.') + self.buf.write('') + dynamic_tag = 0 + tag_name = None + state = 'text' + else: + if oc == 0x8000: + state = 'get attr length' + continue + attr = None + if oc in current_map and current_map[oc]: + attr = current_map[oc] + elif oc in self.attr_map: + attr = self.attr_map[oc] + if not attr or not isinstance(attr, basestring): + raise LitError( + 'Unknown attribute %d in tag %s' % (oc, tag_name)) + if attr.startswith('%'): + in_censorship = True + state = 'get value length' + continue + self.buf.write(' ' + unicode(attr).encode('utf-8') + '=') + if attr in ['href', 'src']: + state = 'get href length' + else: + state = 'get value length' + + elif state == 'get value length': + if not in_censorship: + self.buf.write('"') + count = oc - 1 + if count == 0: + if not in_censorship: + self.buf.write('"') + in_censorship = False + state = 'get attr' + continue + state = 'get value' + if oc == 0xffff: + continue + if count < 0 or count > (len(self.bin) - index): + raise LitError('Invalid character count %d' % count) + + elif state == 'get value': + if count == 0xfffe: + if not in_censorship: + self.buf.write('%s"' % (oc - 1)) + in_censorship = False + state = 'get attr' + elif count > 0: + if not in_censorship: + self.buf.write(c) + count -= 1 + if count == 0: + if not in_censorship: + self.buf.write('"') + in_censorship = False + state = 'get attr' + + elif state == 'get custom length': + count = oc - 1 + if count <= 0 or count > len(self.bin)-index: + raise LitError('Invalid character count %d' % count) + dynamic_tag += 1 + state = 'get custom' + tag_name = '' + + elif state == 'get custom': + tag_name += c + count -= 1 + if count == 0: + self.buf.write(tag_name) + state = 'get attr' + + elif state == 'get attr length': + count = oc - 1 + if count <= 0 or count > (len(self.bin) - index): + raise LitError('Invalid character count %d' % count) + self.buf.write(' ') + state = 'get custom attr' + + elif state == 'get custom attr': + self.buf.write(c) + count -= 1 + if count == 0: + self.buf.write('=') + state = 'get value length' + + elif state == 'get href length': + count = oc - 1 + if count <= 0 or count > (len(self.bin) - index): + raise LitError('Invalid character count %d' % count) + href = '' + state = 'get href' + + elif state == 'get href': + href += c + count -= 1 + if count == 0: + doc, m, frag = href[1:].partition('#') + path = self.item_path(doc) + if m and frag: + path += m + frag + self.buf.write((u'"%s"' % path).encode('utf-8')) + state = 'get attr' + return index + +class DirectoryEntry(object): + def __init__(self, name, section, offset, size): + self.name = name + self.section = section + self.offset = offset + self.size = size + + def __repr__(self): + return "DirectoryEntry(name=%s, section=%d, offset=%d, size=%d)" \ + % (repr(self.name), self.section, self.offset, self.size) + + def __str__(self): + return repr(self) + +class ManifestItem(object): + def __init__(self, original, internal, mime_type, offset, root, state): + self.original = original + self.internal = internal + self.mime_type = mime_type + self.offset = offset + self.root = root + self.state = state + self.prefix = state if state in ('images', 'css') else '' + self.prefix = self.prefix + os.sep if self.prefix else '' + self.path = self.prefix + self.original + + def __eq__(self, other): + if hasattr(other, 'internal'): + return self.internal == other.internal + return self.internal == other + + def __repr__(self): + return "ManifestItem(internal='%s', path='%s')" \ + % (repr(self.internal), repr(self.path)) + +def preserve(function): + def wrapper(self, *args, **kwargs): + opos = self._stream.tell() + try: + return function(self, *args, **kwargs) + finally: + self._stream.seek(opos) + functools.update_wrapper(wrapper, function) + return wrapper + +class LitFile(object): + PIECE_SIZE = 16 + + def magic(): + @preserve + def fget(self): + self._stream.seek(0) + return self._stream.read(8) + return property(fget=fget) + magic = magic() + + def version(): + def fget(self): + self._stream.seek(8) + return u32(self._stream.read(4)) + return property(fget=fget) + version = version() + + def hdr_len(): + @preserve + def fget(self): + self._stream.seek(12) + return int32(self._stream.read(4)) + return property(fget=fget) + hdr_len = hdr_len() + + def num_pieces(): + @preserve + def fget(self): + self._stream.seek(16) + return int32(self._stream.read(4)) + return property(fget=fget) + num_pieces = num_pieces() + + def sec_hdr_len(): + @preserve + def fget(self): + self._stream.seek(20) + return int32(self._stream.read(4)) + return property(fget=fget) + sec_hdr_len = sec_hdr_len() + + def guid(): + @preserve + def fget(self): + self._stream.seek(24) + return self._stream.read(16) + return property(fget=fget) + guid = guid() + + + def header(): + @preserve + def fget(self): + size = self.hdr_len \ + + (self.num_pieces * self.PIECE_SIZE) \ + + self.sec_hdr_len + self._stream.seek(0) + return self._stream.read(size) + return property(fget=fget) + header = header() + + def __init__(self, stream): + self._stream = stream + if self.magic != 'ITOLITLS': + raise LitError('Not a valid LIT file') + if self.version != 1: + raise LitError('Unknown LIT version %d'%(self.version,)) + self.read_secondary_header() + self.read_header_pieces() + + @preserve + def __len__(self): + self._stream.seek(0, 2) + return self._stream.tell() + + @preserve + def _read_raw(self, offset, size): + self._stream.seek(offset) + return self._stream.read(size) + + def _read_content(self, offset, size): + return self._read_raw(self.content_offset + offset, size) + + @preserve + def read_secondary_header(self): + self._stream.seek(self.hdr_len + self.num_pieces*self.PIECE_SIZE) + bytes = self._stream.read(self.sec_hdr_len) + offset = int32(bytes[4:]) + while offset < len(bytes): + blocktype = bytes[offset:offset+4] + blockver = u32(bytes[offset+4:]) + if blocktype == 'CAOL': + if blockver != 2: + raise LitError( + 'Unknown CAOL block format %d' % blockver) + self.creator_id = u32(bytes[offset+12:]) + self.entry_chunklen = u32(bytes[offset+20:]) + self.count_chunklen = u32(bytes[offset+24:]) + self.entry_unknown = u32(bytes[offset+28:]) + self.count_unknown = u32(bytes[offset+32:]) + offset += 48 + elif blocktype == 'ITSF': + if blockver != 4: + raise LitError( + 'Unknown ITSF block format %d' % blockver) + if u32(bytes[offset+4+16:]): + raise LitError('This file has a 64bit content offset') + self.content_offset = u32(bytes[offset+16:]) + self.timestamp = u32(bytes[offset+24:]) + self.language_id = u32(bytes[offset+28:]) + offset += 48 + if not hasattr(self, 'content_offset'): + raise LitError('Could not figure out the content offset') + + @preserve + def read_header_pieces(self): + src = self.header[self.hdr_len:] + for i in range(self.num_pieces): + piece = src[i*self.PIECE_SIZE:(i+1)*self.PIECE_SIZE] + if u32(piece[4:]) != 0 or u32(piece[12:]) != 0: + raise LitError('Piece %s has 64bit value' % repr(piece)) + offset, size = u32(piece), int32(piece[8:]) + self._stream.seek(offset) + piece = self._stream.read(size) + if i == 0: + continue # Dont need this piece + elif i == 1: + if u32(piece[8:]) != self.entry_chunklen or \ + u32(piece[12:]) != self.entry_unknown: + raise LitError('Secondary header does not match piece') + self.read_directory(piece) + elif i == 2: + if u32(piece[8:]) != self.count_chunklen or \ + u32(piece[12:]) != self.count_unknown: + raise LitError('Secondary header does not match piece') + continue # No data needed from this piece + elif i == 3: + self.piece3_guid = piece + elif i == 4: + self.piece4_guid = piece + + def read_directory(self, piece): + self.entries = {} + if not piece.startswith('IFCM'): + raise LitError('Header piece #1 is not main directory.') + chunk_size, num_chunks = int32(piece[8:12]), int32(piece[24:28]) + + if (32 + chunk_size * num_chunks) != len(piece): + raise LitError('IFCM HEADER has incorrect length') + + for chunk in range(num_chunks): + p = 32 + chunk * chunk_size + if piece[p:p+4] != 'AOLL': + continue + remaining = chunk_size - int32(piece[p+4:p+8]) - 48 + if remaining < 0: + raise LitError('AOLL remaining count is negative') + entries = u16(piece[p+chunk_size-2:]) + if entries <= 0: + # Hopefully everything will work even without a correct entries + # count + entries = (2 ** 16) - 1 + piece = piece[p+48:] + i = 0 + while i < entries: + if remaining <= 0: break + namelen, piece, remaining = encint(piece, remaining) + if namelen != (namelen & 0x7fffffff): + raise LitError('Directory entry had 64bit name length.') + if namelen > remaining - 3: + raise LitError('Read past end of directory chunk') + name = piece[:namelen] + piece = piece[namelen:] + section, piece, remaining = encint(piece, remaining) + offset, piece, remaining = encint(piece, remaining) + size, piece, remaining = encint(piece, remaining) + + entry = DirectoryEntry(name, section, offset, size) + + if name == '::DataSpace/NameList': + self.read_section_names(entry) + elif name == '/manifest': + self.read_manifest(entry) + elif name == '/meta': + self.read_meta(entry) + self.entries[name] = entry + i += 1 + if not hasattr(self, 'section_names'): + raise LitError('Lit file does not have a valid NameList') + if not hasattr(self, 'manifest'): + raise LitError('Lit file does not have a valid manifest') + self.read_drm() + + def read_section_names(self, entry): + raw = self._read_content(entry.offset, entry.size) + if len(raw) < 4: + raise LitError('Invalid Namelist section') + pos = 4 + self.num_sections = u16(raw[2:pos]) + self.section_names = [""]*self.num_sections + self.section_data = [None]*self.num_sections + for section in range(self.num_sections): + size = u16(raw[pos:pos+2]) + pos += 2 + size = size*2 + 2 + if pos + size > len(raw): + raise LitError('Invalid Namelist section') + self.section_names[section] = \ + raw[pos:pos+size].decode('utf-16-le').rstrip('\000') + pos += size + + def read_manifest(self, entry): + self.manifest = [] + raw = self._read_content(entry.offset, entry.size) + pos = 0 + while pos < len(raw): + size = ord(raw[pos]) + if size == 0: break + pos += 1 + root = raw[pos:pos+size].decode('utf8') + pos += size + if pos >= len(raw): + raise LitError('Truncated manifest.') + for state in ['spine', 'not spine', 'css', 'images']: + num_files = int32(raw[pos:pos+4]) + pos += 4 + if num_files == 0: continue + + i = 0 + while i < num_files: + if pos+5 >= len(raw): + raise LitError('Truncated manifest.') + offset = u32(raw[pos:pos+4]) + pos += 4 + + slen = ord(raw[pos]) + pos += 1 + internal = raw[pos:pos+slen].decode('utf8') + pos += slen + + slen = ord(raw[pos]) + pos += 1 + original = raw[pos:pos+slen].decode('utf8') + pos += slen + + slen = ord(raw[pos]) + pos += 1 + mime_type = raw[pos:pos+slen].decode('utf8') + pos += slen + 1 + + self.manifest.append( + ManifestItem(original, internal, mime_type, + offset, root, state)) + i += 1 + + def read_meta(self, entry): + raw = self._read_content(entry.offset, entry.size) + xml = OPF_DECL + unicode(UnBinary(raw, self.manifest)) + self.meta = xml + + def read_drm(self): + def exists_file(name): + try: self.get_file(name) + except KeyError: return False + return True + self.drmlevel = 0 + if exists_file('/DRMStorage/Licenses/EUL'): + self.drmlevel = 5 + elif exists_file('/DRMStorage/DRMBookplate'): + self.drmlevel = 3 + elif exists_file('/DRMStorage/DRMSealed'): + self.drmlevel = 1 + else: + return + des = msdes.new(self.calculate_deskey()) + bookkey = des.decrypt(self.get_file('/DRMStorage/DRMSealed')) + if bookkey[0] != '\000': + raise LitError('Unable to decrypt title key!') + self.bookkey = bookkey[1:9] + + def calculate_deskey(self): + hashfiles = ['/meta', '/DRMStorage/DRMSource'] + if self.drmlevel == 3: + hashfiles.append('/DRMStorage/DRMBookplate') + prepad = 2 + hash = mssha1.new() + for name in hashfiles: + data = self.get_file(name) + if prepad > 0: + data = ("\000" * prepad) + data + prepad = 0 + postpad = 64 - (len(data) % 64) + if postpad < 64: + data = data + ("\000" * postpad) + hash.update(data) + digest = hash.digest() + key = [0] * 8 + for i in xrange(0, len(digest)): + key[i % 8] ^= ord(digest[i]) + return ''.join(chr(x) for x in key) + + def get_file(self, name): + entry = self.entries[name] + if entry.section == 0: + return self._read_content(entry.offset, entry.size) + section = self.get_section(entry.section) + return section[entry.offset:entry.offset+entry.size] + + def get_section(self, section): + data = self.section_data[section] + if not data: + data = self._get_section(section) + self.section_data[section] = data + return data + + def _get_section(self, section): + name = self.section_names[section] + path = '::DataSpace/Storage/' + name + transform = self.get_file(path + '/Transform/List') + content = self.get_file(path + '/Content') + control = self.get_file(path + '/ControlData') + idx_transform = idx_control = 0 + while (len(transform) - idx_transform) >= 16: + ndwords = int32(control[idx_control:]) + 1 + if (idx_control + (ndwords * 4)) > len(control) or ndwords <= 0: + raise LitError("ControlData is too short") + guid = msguid(transform[idx_transform:]) + if guid == DESENCRYPT_GUID: + content = self._decrypt(content) + idx_control += ndwords * 4 + elif guid == LZXCOMPRESS_GUID: + raise LitError("LZX decompression not implemented") + else: + raise LitError("Unrecognized transform: %s." % repr(guid)) + idx_transform += 16 + return content + + def _decrypt(self, content): + if self.drmlevel == 5: + raise LitError('Cannot extract content from a DRM protected ebook') + return msdes.new(self.bookkey).decrypt(content) + +def get_metadata(stream): + try: + litfile = LitFile(stream) + src = litfile.meta.encode('utf-8') + mi = OPFReader(cStringIO.StringIO(src), dir=os.getcwd()) + cover_url, cover_item = mi.cover, None + if cover_url: + cover_url = relpath(cover_url, os.getcwd()) + for item in litfile.manifest: + if item.path == cover_url: + cover_item = item.internal + if cover_item is not None: + ext = cover_url.rpartition('.')[-1] + if not ext: + ext = 'jpg' + else: + ext = ext.lower() + cd = litfile.get_file(cover_item) + mi.cover_data = (ext, cd) if cd else (None, None) + except: + title = stream.name if hasattr(stream, 'name') and stream.name else 'Unknown' + mi = MetaInformation(title, ['Unknown']) + return mi + +def main(args=sys.argv): + if len(args) != 2: + print >>sys.stderr, _('Usage: %s file.lit')%(args[0],) + return 1 + mi = get_metadata(open(args[1], 'rb')) + print unicode(mi) + if mi.cover_data[1]: + cover = os.path.abspath(os.path.splitext(os.path.basename(args[1]))[0] + '.' + mi.cover_data[0]) + open(cover, 'wb').write(mi.cover_data[1]) + print _('Cover saved to'), cover + return 0 + +if __name__ == '__main__': + sys.exit(main())