From 615d5ea2795563f8af9dc34c2c2c03c84c9c9714 Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Wed, 16 Jul 2008 10:00:49 -0400 Subject: [PATCH] Checkpoint state to move to office --- src/calibre/ebooks/lit/maps/__init__.py | 7 +- src/calibre/ebooks/lit/maps/html.py | 1568 +++++++++++------------ src/calibre/ebooks/lit/maps/opf.py | 54 +- src/calibre/ebooks/lit/mssha1.py | 343 +++++ src/calibre/ebooks/lit/reader.py | 418 +++--- 5 files changed, 1352 insertions(+), 1038 deletions(-) create mode 100644 src/calibre/ebooks/lit/mssha1.py diff --git a/src/calibre/ebooks/lit/maps/__init__.py b/src/calibre/ebooks/lit/maps/__init__.py index eb99464d9b..2abab3efe9 100644 --- a/src/calibre/ebooks/lit/maps/__init__.py +++ b/src/calibre/ebooks/lit/maps/__init__.py @@ -1,5 +1,2 @@ -import calibre.ebooks.maps.opf as opf -import calibre.ebooks.maps.html as html - -OPF_MAP = opf.MAP -HTML_MAP = html.MAP +from calibre.ebooks.lit.maps.opf import MAP as OPF_MAP +from calibre.ebooks.lit.maps.html import MAP as HTML_MAP diff --git a/src/calibre/ebooks/lit/maps/html.py b/src/calibre/ebooks/lit/maps/html.py index 095b0bcc3e..de0286c764 100644 --- a/src/calibre/ebooks/lit/maps/html.py +++ b/src/calibre/ebooks/lit/maps/html.py @@ -1,786 +1,3 @@ -ATTRS0 = { - 0x8010 => "tabindex", - 0x8046 => "title", - 0x804b => "style", - 0x804d => "disabled", - 0x83ea => "class", - 0x83eb => "id", - 0x83fe => "datafld", - 0x83ff => "datasrc", - 0x8400 => "dataformatas", - 0x87d6 => "accesskey", - 0x9392 => "lang", - 0x93ed => "language", - 0x93fe => "dir", - 0x9771 => "onmouseover", - 0x9772 => "onmouseout", - 0x9773 => "onmousedown", - 0x9774 => "onmouseup", - 0x9775 => "onmousemove", - 0x9776 => "onkeydown", - 0x9777 => "onkeyup", - 0x9778 => "onkeypress", - 0x9779 => "onclick", - 0x977a => "ondblclick", - 0x977e => "onhelp", - 0x977f => "onfocus", - 0x9780 => "onblur", - 0x9783 => "onrowexit", - 0x9784 => "onrowenter", - 0x9786 => "onbeforeupdate", - 0x9787 => "onafterupdate", - 0x978a => "onreadystatechange", - 0x9790 => "onscroll", - 0x9794 => "ondragstart", - 0x9795 => "onresize", - 0x9796 => "onselectstart", - 0x9797 => "onerrorupdate", - 0x9799 => "ondatasetchanged", - 0x979a => "ondataavailable", - 0x979b => "ondatasetcomplete", - 0x979c => "onfilterchange", - 0x979f => "onlosecapture", - 0x97a0 => "onpropertychange", - 0x97a2 => "ondrag", - 0x97a3 => "ondragend", - 0x97a4 => "ondragenter", - 0x97a5 => "ondragover", - 0x97a6 => "ondragleave", - 0x97a7 => "ondrop", - 0x97a8 => "oncut", - 0x97a9 => "oncopy", - 0x97aa => "onpaste", - 0x97ab => "onbeforecut", - 0x97ac => "onbeforecopy", - 0x97ad => "onbeforepaste", - 0x97af => "onrowsdelete", - 0x97b0 => "onrowsinserted", - 0x97b1 => "oncellchange", - 0x97b2 => "oncontextmenu", - 0x97b6 => "onbeforeeditfocus", - } -ATTRS3 = { - 0x0001 => "href", - 0x03ec => "target", - 0x03ee => "rel", - 0x03ef => "rev", - 0x03f0 => "urn", - 0x03f1 => "methods", - 0x8001 => "name", - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS5 = { - 0x9399 => "clear", - } -ATTRS6 = { - 0x8001 => "name", - 0x8006 => "width", - 0x8007 => "height", - 0x804a => "align", - 0x8bbb => "classid", - 0x8bbc => "data", - 0x8bbf => "codebase", - 0x8bc0 => "codetype", - 0x8bc1 => "code", - 0x8bc2 => "type", - 0x8bc5 => "vspace", - 0x8bc6 => "hspace", - 0x978e => "onerror", - } -ATTRS7 = { - 0x0001 => "href", - 0x03ea => "shape", - 0x03eb => "coords", - 0x03ed => "target", - 0x03ee => "alt", - 0x03ef => "nohref", - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS8 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS9 = { - 0x03ec => "href", - 0x03ed => "target", - } -ATTRS10 = { - 0x938b => "color", - 0x939b => "face", - 0x93a3 => "size", - } -ATTRS12 = { - 0x03ea => "src", - 0x03eb => "loop", - 0x03ec => "volume", - 0x03ed => "balance", - } -ATTRS13 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS15 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x9399 => "clear", - } -ATTRS16 = { - 0x07db => "link", - 0x07dc => "alink", - 0x07dd => "vlink", - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x938a => "background", - 0x938b => "text", - 0x938e => "nowrap", - 0x93ae => "topmargin", - 0x93af => "rightmargin", - 0x93b0 => "bottommargin", - 0x93b1 => "leftmargin", - 0x93b6 => "bgproperties", - 0x93d8 => "scroll", - 0x977b => "onselect", - 0x9791 => "onload", - 0x9792 => "onunload", - 0x9798 => "onbeforeunload", - 0x97b3 => "onbeforeprint", - 0x97b4 => "onafterprint", - 0xfe0c => "bgcolor", - } -ATTRS17 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x9399 => "clear", - } -ATTRS18 = { - 0x07d1 => "type", - 0x8001 => "name", - } -ATTRS19 = { - 0x8046 => "title", - 0x8049 => "align", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x93a8 => "valign", - } -ATTRS20 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x9399 => "clear", - } -ATTRS21 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS22 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS23 = { - 0x03ea => "span", - 0x8006 => "width", - 0x8049 => "align", - 0x93a8 => "valign", - 0xfe0c => "bgcolor", - } -ATTRS24 = { - 0x03ea => "span", - 0x8006 => "width", - 0x8049 => "align", - 0x93a8 => "valign", - 0xfe0c => "bgcolor", - } -ATTRS27 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x938e => "nowrap", - } -ATTRS29 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS31 = { - 0x8046 => "title", - 0x8049 => "align", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x938e => "nowrap", - } -ATTRS32 = { - 0x03ea => "compact", - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS33 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x938e => "nowrap", - } -ATTRS34 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS35 = { - 0x8001 => "name", - 0x8006 => "width", - 0x8007 => "height", - 0x804a => "align", - 0x8bbd => "palette", - 0x8bbe => "pluginspage", - 0x8bbf => "codebase", - 0x8bbf => "src", - 0x8bc1 => "units", - 0x8bc2 => "type", - 0x8bc3 => "hidden", - } -ATTRS36 = { - 0x804a => "align", - } -ATTRS37 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x938b => "color", - 0x939b => "face", - 0x939c => "size", - } -ATTRS38 = { - 0x03ea => "action", - 0x03ec => "enctype", - 0x03ed => "method", - 0x03ef => "target", - 0x03f4 => "accept-charset", - 0x8001 => "name", - 0x977c => "onsubmit", - 0x977d => "onreset", - } -ATTRS39 = { - 0x8000 => "align", - 0x8001 => "name", - 0x8bb9 => "src", - 0x8bbb => "border", - 0x8bbc => "frameborder", - 0x8bbd => "framespacing", - 0x8bbe => "marginwidth", - 0x8bbf => "marginheight", - 0x8bc0 => "noresize", - 0x8bc1 => "scrolling", - 0x8fa2 => "bordercolor", - } -ATTRS40 = { - 0x03e9 => "rows", - 0x03ea => "cols", - 0x03eb => "border", - 0x03ec => "bordercolor", - 0x03ed => "frameborder", - 0x03ee => "framespacing", - 0x8001 => "name", - 0x9791 => "onload", - 0x9792 => "onunload", - 0x9798 => "onbeforeunload", - 0x97b3 => "onbeforeprint", - 0x97b4 => "onafterprint", - } -ATTRS42 = { - 0x8046 => "title", - 0x8049 => "align", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x9399 => "clear", - } -ATTRS43 = { - 0x8046 => "title", - 0x8049 => "align", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x9399 => "clear", - } -ATTRS44 = { - 0x8046 => "title", - 0x8049 => "align", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x9399 => "clear", - } -ATTRS45 = { - 0x8046 => "title", - 0x8049 => "align", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x9399 => "clear", - } -ATTRS46 = { - 0x8046 => "title", - 0x8049 => "align", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x9399 => "clear", - } -ATTRS47 = { - 0x8046 => "title", - 0x8049 => "align", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x9399 => "clear", - } -ATTRS49 = { - 0x03ea => "noshade", - 0x8006 => "width", - 0x8007 => "size", - 0x8046 => "title", - 0x8049 => "align", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x938b => "color", - } -ATTRS51 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS52 = { - 0x8001 => "name", - 0x8006 => "width", - 0x8007 => "height", - 0x804a => "align", - 0x8bb9 => "src", - 0x8bbb => "border", - 0x8bbc => "frameborder", - 0x8bbd => "framespacing", - 0x8bbe => "marginwidth", - 0x8bbf => "marginheight", - 0x8bc0 => "noresize", - 0x8bc1 => "scrolling", - 0x8fa2 => "vspace", - 0x8fa3 => "hspace", - } -ATTRS53 = { - 0x03eb => "alt", - 0x03ec => "src", - 0x03ed => "border", - 0x03ee => "vspace", - 0x03ef => "hspace", - 0x03f0 => "lowsrc", - 0x03f1 => "vrml", - 0x03f2 => "dynsrc", - 0x03f4 => "loop", - 0x03f6 => "start", - 0x07d3 => "ismap", - 0x07d9 => "usemap", - 0x8001 => "name", - 0x8006 => "width", - 0x8007 => "height", - 0x8046 => "title", - 0x804a => "align", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x978d => "onabort", - 0x978e => "onerror", - 0x9791 => "onload", - } -ATTRS54 = { - 0x07d1 => "type", - 0x07d3 => "size", - 0x07d4 => "maxlength", - 0x07d6 => "readonly", - 0x07d8 => "indeterminate", - 0x07da => "checked", - 0x07db => "alt", - 0x07dc => "src", - 0x07dd => "border", - 0x07de => "vspace", - 0x07df => "hspace", - 0x07e0 => "lowsrc", - 0x07e1 => "vrml", - 0x07e2 => "dynsrc", - 0x07e4 => "loop", - 0x07e5 => "start", - 0x8001 => "name", - 0x8006 => "width", - 0x8007 => "height", - 0x804a => "align", - 0x93ee => "value", - 0x977b => "onselect", - 0x978d => "onabort", - 0x978e => "onerror", - 0x978f => "onchange", - 0x9791 => "onload", - } -ATTRS56 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS57 = { - 0x03e9 => "for", - } -ATTRS58 = { - 0x804a => "align", - } -ATTRS59 = { - 0x03ea => "value", - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x939a => "type", - } -ATTRS60 = { - 0x03ee => "href", - 0x03ef => "rel", - 0x03f0 => "rev", - 0x03f1 => "type", - 0x03f9 => "media", - 0x03fa => "target", - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x978e => "onerror", - 0x9791 => "onload", - } -ATTRS61 = { - 0x9399 => "clear", - } -ATTRS62 = { - 0x8001 => "name", - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS63 = { - 0x1771 => "scrolldelay", - 0x1772 => "direction", - 0x1773 => "behavior", - 0x1774 => "scrollamount", - 0x1775 => "loop", - 0x1776 => "vspace", - 0x1777 => "hspace", - 0x1778 => "truespeed", - 0x8006 => "width", - 0x8007 => "height", - 0x9785 => "onbounce", - 0x978b => "onfinish", - 0x978c => "onstart", - 0xfe0c => "bgcolor", - } -ATTRS65 = { - 0x03ea => "http-equiv", - 0x03eb => "content", - 0x03ec => "url", - 0x03f6 => "charset", - 0x8001 => "name", - } -ATTRS66 = { - 0x03f5 => "n", - } -ATTRS71 = { - 0x8000 => "border", - 0x8000 => "usemap", - 0x8001 => "name", - 0x8006 => "width", - 0x8007 => "height", - 0x8046 => "title", - 0x804a => "align", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x8bbb => "classid", - 0x8bbc => "data", - 0x8bbf => "codebase", - 0x8bc0 => "codetype", - 0x8bc1 => "code", - 0x8bc2 => "type", - 0x8bc5 => "vspace", - 0x8bc6 => "hspace", - 0x978e => "onerror", - } -ATTRS72 = { - 0x03eb => "compact", - 0x03ec => "start", - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x939a => "type", - } -ATTRS73 = { - 0x03ea => "selected", - 0x03eb => "value", - } -ATTRS74 = { - 0x8046 => "title", - 0x8049 => "align", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x9399 => "clear", - } -ATTRS75 = { - 0x8000 => "name", - 0x8000 => "value", - 0x8000 => "type", - } -ATTRS76 = { - 0x9399 => "clear", - } -ATTRS77 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x9399 => "clear", - } -ATTRS78 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS82 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS83 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS84 = { - 0x03ea => "src", - 0x03ed => "for", - 0x03ee => "event", - 0x03f0 => "defer", - 0x03f2 => "type", - 0x978e => "onerror", - } -ATTRS85 = { - 0x03eb => "size", - 0x03ec => "multiple", - 0x8000 => "align", - 0x8001 => "name", - 0x978f => "onchange", - } -ATTRS86 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS87 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS88 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS89 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS90 = { - 0x03eb => "type", - 0x03ef => "media", - 0x8046 => "title", - 0x978e => "onerror", - 0x9791 => "onload", - } -ATTRS91 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS92 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS93 = { - 0x03ea => "cols", - 0x03eb => "border", - 0x03ec => "rules", - 0x03ed => "frame", - 0x03ee => "cellspacing", - 0x03ef => "cellpadding", - 0x03fa => "datapagesize", - 0x8006 => "width", - 0x8007 => "height", - 0x8046 => "title", - 0x804a => "align", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x938a => "background", - 0x93a5 => "bordercolor", - 0x93a6 => "bordercolorlight", - 0x93a7 => "bordercolordark", - 0xfe0c => "bgcolor", - } -ATTRS94 = { - 0x8049 => "align", - 0x93a8 => "valign", - 0xfe0c => "bgcolor", - } -ATTRS95 = { - 0x8049 => "align", - 0x93a8 => "valign", - } -ATTRS96 = { - 0x07d2 => "rowspan", - 0x07d3 => "colspan", - 0x8006 => "width", - 0x8007 => "height", - 0x8046 => "title", - 0x8049 => "align", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x938a => "background", - 0x938e => "nowrap", - 0x93a5 => "bordercolor", - 0x93a6 => "bordercolorlight", - 0x93a7 => "bordercolordark", - 0x93a8 => "valign", - 0xfe0c => "bgcolor", - } -ATTRS97 = { - 0x1b5a => "rows", - 0x1b5b => "cols", - 0x1b5c => "wrap", - 0x1b5d => "readonly", - 0x8001 => "name", - 0x977b => "onselect", - 0x978f => "onchange", - } -ATTRS98 = { - 0x8049 => "align", - 0x93a8 => "valign", - 0xfe0c => "bgcolor", - } -ATTRS99 = { - 0x07d2 => "rowspan", - 0x07d3 => "colspan", - 0x8006 => "width", - 0x8007 => "height", - 0x8046 => "title", - 0x8049 => "align", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x938a => "background", - 0x938e => "nowrap", - 0x93a5 => "bordercolor", - 0x93a6 => "bordercolorlight", - 0x93a7 => "bordercolordark", - 0x93a8 => "valign", - 0xfe0c => "bgcolor", - } -ATTRS100 = { - 0x8049 => "align", - 0x93a8 => "valign", - 0xfe0c => "bgcolor", - } -ATTRS102 = { - 0x8007 => "height", - 0x8046 => "title", - 0x8049 => "align", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x93a5 => "bordercolor", - 0x93a6 => "bordercolorlight", - 0x93a7 => "bordercolordark", - 0x93a8 => "valign", - 0xfe0c => "bgcolor", - } -ATTRS103 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS104 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS105 = { - 0x03eb => "compact", - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - 0x939a => "type", - } -ATTRS106 = { - 0x8046 => "title", - 0x804b => "style", - 0x83ea => "class", - 0x83eb => "id", - } -ATTRS108 = { - 0x9399 => "clear", - } - TAGS = [ None, None, @@ -893,6 +110,789 @@ TAGS = [ None, ] +ATTRS0 = { + 0x8010: "tabindex", + 0x8046: "title", + 0x804b: "style", + 0x804d: "disabled", + 0x83ea: "class", + 0x83eb: "id", + 0x83fe: "datafld", + 0x83ff: "datasrc", + 0x8400: "dataformatas", + 0x87d6: "accesskey", + 0x9392: "lang", + 0x93ed: "language", + 0x93fe: "dir", + 0x9771: "onmouseover", + 0x9772: "onmouseout", + 0x9773: "onmousedown", + 0x9774: "onmouseup", + 0x9775: "onmousemove", + 0x9776: "onkeydown", + 0x9777: "onkeyup", + 0x9778: "onkeypress", + 0x9779: "onclick", + 0x977a: "ondblclick", + 0x977e: "onhelp", + 0x977f: "onfocus", + 0x9780: "onblur", + 0x9783: "onrowexit", + 0x9784: "onrowenter", + 0x9786: "onbeforeupdate", + 0x9787: "onafterupdate", + 0x978a: "onreadystatechange", + 0x9790: "onscroll", + 0x9794: "ondragstart", + 0x9795: "onresize", + 0x9796: "onselectstart", + 0x9797: "onerrorupdate", + 0x9799: "ondatasetchanged", + 0x979a: "ondataavailable", + 0x979b: "ondatasetcomplete", + 0x979c: "onfilterchange", + 0x979f: "onlosecapture", + 0x97a0: "onpropertychange", + 0x97a2: "ondrag", + 0x97a3: "ondragend", + 0x97a4: "ondragenter", + 0x97a5: "ondragover", + 0x97a6: "ondragleave", + 0x97a7: "ondrop", + 0x97a8: "oncut", + 0x97a9: "oncopy", + 0x97aa: "onpaste", + 0x97ab: "onbeforecut", + 0x97ac: "onbeforecopy", + 0x97ad: "onbeforepaste", + 0x97af: "onrowsdelete", + 0x97b0: "onrowsinserted", + 0x97b1: "oncellchange", + 0x97b2: "oncontextmenu", + 0x97b6: "onbeforeeditfocus", + } +ATTRS3 = { + 0x0001: "href", + 0x03ec: "target", + 0x03ee: "rel", + 0x03ef: "rev", + 0x03f0: "urn", + 0x03f1: "methods", + 0x8001: "name", + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS5 = { + 0x9399: "clear", + } +ATTRS6 = { + 0x8001: "name", + 0x8006: "width", + 0x8007: "height", + 0x804a: "align", + 0x8bbb: "classid", + 0x8bbc: "data", + 0x8bbf: "codebase", + 0x8bc0: "codetype", + 0x8bc1: "code", + 0x8bc2: "type", + 0x8bc5: "vspace", + 0x8bc6: "hspace", + 0x978e: "onerror", + } +ATTRS7 = { + 0x0001: "href", + 0x03ea: "shape", + 0x03eb: "coords", + 0x03ed: "target", + 0x03ee: "alt", + 0x03ef: "nohref", + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS8 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS9 = { + 0x03ec: "href", + 0x03ed: "target", + } +ATTRS10 = { + 0x938b: "color", + 0x939b: "face", + 0x93a3: "size", + } +ATTRS12 = { + 0x03ea: "src", + 0x03eb: "loop", + 0x03ec: "volume", + 0x03ed: "balance", + } +ATTRS13 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS15 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x9399: "clear", + } +ATTRS16 = { + 0x07db: "link", + 0x07dc: "alink", + 0x07dd: "vlink", + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x938a: "background", + 0x938b: "text", + 0x938e: "nowrap", + 0x93ae: "topmargin", + 0x93af: "rightmargin", + 0x93b0: "bottommargin", + 0x93b1: "leftmargin", + 0x93b6: "bgproperties", + 0x93d8: "scroll", + 0x977b: "onselect", + 0x9791: "onload", + 0x9792: "onunload", + 0x9798: "onbeforeunload", + 0x97b3: "onbeforeprint", + 0x97b4: "onafterprint", + 0xfe0c: "bgcolor", + } +ATTRS17 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x9399: "clear", + } +ATTRS18 = { + 0x07d1: "type", + 0x8001: "name", + } +ATTRS19 = { + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x93a8: "valign", + } +ATTRS20 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x9399: "clear", + } +ATTRS21 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS22 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS23 = { + 0x03ea: "span", + 0x8006: "width", + 0x8049: "align", + 0x93a8: "valign", + 0xfe0c: "bgcolor", + } +ATTRS24 = { + 0x03ea: "span", + 0x8006: "width", + 0x8049: "align", + 0x93a8: "valign", + 0xfe0c: "bgcolor", + } +ATTRS27 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x938e: "nowrap", + } +ATTRS29 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS31 = { + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x938e: "nowrap", + } +ATTRS32 = { + 0x03ea: "compact", + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS33 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x938e: "nowrap", + } +ATTRS34 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS35 = { + 0x8001: "name", + 0x8006: "width", + 0x8007: "height", + 0x804a: "align", + 0x8bbd: "palette", + 0x8bbe: "pluginspage", + 0x8bbf: "codebase", + 0x8bbf: "src", + 0x8bc1: "units", + 0x8bc2: "type", + 0x8bc3: "hidden", + } +ATTRS36 = { + 0x804a: "align", + } +ATTRS37 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x938b: "color", + 0x939b: "face", + 0x939c: "size", + } +ATTRS38 = { + 0x03ea: "action", + 0x03ec: "enctype", + 0x03ed: "method", + 0x03ef: "target", + 0x03f4: "accept-charset", + 0x8001: "name", + 0x977c: "onsubmit", + 0x977d: "onreset", + } +ATTRS39 = { + 0x8000: "align", + 0x8001: "name", + 0x8bb9: "src", + 0x8bbb: "border", + 0x8bbc: "frameborder", + 0x8bbd: "framespacing", + 0x8bbe: "marginwidth", + 0x8bbf: "marginheight", + 0x8bc0: "noresize", + 0x8bc1: "scrolling", + 0x8fa2: "bordercolor", + } +ATTRS40 = { + 0x03e9: "rows", + 0x03ea: "cols", + 0x03eb: "border", + 0x03ec: "bordercolor", + 0x03ed: "frameborder", + 0x03ee: "framespacing", + 0x8001: "name", + 0x9791: "onload", + 0x9792: "onunload", + 0x9798: "onbeforeunload", + 0x97b3: "onbeforeprint", + 0x97b4: "onafterprint", + } +ATTRS42 = { + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x9399: "clear", + } +ATTRS43 = { + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x9399: "clear", + } +ATTRS44 = { + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x9399: "clear", + } +ATTRS45 = { + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x9399: "clear", + } +ATTRS46 = { + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x9399: "clear", + } +ATTRS47 = { + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x9399: "clear", + } +ATTRS49 = { + 0x03ea: "noshade", + 0x8006: "width", + 0x8007: "size", + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x938b: "color", + } +ATTRS51 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS52 = { + 0x8001: "name", + 0x8006: "width", + 0x8007: "height", + 0x804a: "align", + 0x8bb9: "src", + 0x8bbb: "border", + 0x8bbc: "frameborder", + 0x8bbd: "framespacing", + 0x8bbe: "marginwidth", + 0x8bbf: "marginheight", + 0x8bc0: "noresize", + 0x8bc1: "scrolling", + 0x8fa2: "vspace", + 0x8fa3: "hspace", + } +ATTRS53 = { + 0x03eb: "alt", + 0x03ec: "src", + 0x03ed: "border", + 0x03ee: "vspace", + 0x03ef: "hspace", + 0x03f0: "lowsrc", + 0x03f1: "vrml", + 0x03f2: "dynsrc", + 0x03f4: "loop", + 0x03f6: "start", + 0x07d3: "ismap", + 0x07d9: "usemap", + 0x8001: "name", + 0x8006: "width", + 0x8007: "height", + 0x8046: "title", + 0x804a: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x978d: "onabort", + 0x978e: "onerror", + 0x9791: "onload", + } +ATTRS54 = { + 0x07d1: "type", + 0x07d3: "size", + 0x07d4: "maxlength", + 0x07d6: "readonly", + 0x07d8: "indeterminate", + 0x07da: "checked", + 0x07db: "alt", + 0x07dc: "src", + 0x07dd: "border", + 0x07de: "vspace", + 0x07df: "hspace", + 0x07e0: "lowsrc", + 0x07e1: "vrml", + 0x07e2: "dynsrc", + 0x07e4: "loop", + 0x07e5: "start", + 0x8001: "name", + 0x8006: "width", + 0x8007: "height", + 0x804a: "align", + 0x93ee: "value", + 0x977b: "onselect", + 0x978d: "onabort", + 0x978e: "onerror", + 0x978f: "onchange", + 0x9791: "onload", + } +ATTRS56 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS57 = { + 0x03e9: "for", + } +ATTRS58 = { + 0x804a: "align", + } +ATTRS59 = { + 0x03ea: "value", + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x939a: "type", + } +ATTRS60 = { + 0x03ee: "href", + 0x03ef: "rel", + 0x03f0: "rev", + 0x03f1: "type", + 0x03f9: "media", + 0x03fa: "target", + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x978e: "onerror", + 0x9791: "onload", + } +ATTRS61 = { + 0x9399: "clear", + } +ATTRS62 = { + 0x8001: "name", + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS63 = { + 0x1771: "scrolldelay", + 0x1772: "direction", + 0x1773: "behavior", + 0x1774: "scrollamount", + 0x1775: "loop", + 0x1776: "vspace", + 0x1777: "hspace", + 0x1778: "truespeed", + 0x8006: "width", + 0x8007: "height", + 0x9785: "onbounce", + 0x978b: "onfinish", + 0x978c: "onstart", + 0xfe0c: "bgcolor", + } +ATTRS65 = { + 0x03ea: "http-equiv", + 0x03eb: "content", + 0x03ec: "url", + 0x03f6: "charset", + 0x8001: "name", + } +ATTRS66 = { + 0x03f5: "n", + } +ATTRS71 = { + 0x8000: "border", + 0x8000: "usemap", + 0x8001: "name", + 0x8006: "width", + 0x8007: "height", + 0x8046: "title", + 0x804a: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x8bbb: "classid", + 0x8bbc: "data", + 0x8bbf: "codebase", + 0x8bc0: "codetype", + 0x8bc1: "code", + 0x8bc2: "type", + 0x8bc5: "vspace", + 0x8bc6: "hspace", + 0x978e: "onerror", + } +ATTRS72 = { + 0x03eb: "compact", + 0x03ec: "start", + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x939a: "type", + } +ATTRS73 = { + 0x03ea: "selected", + 0x03eb: "value", + } +ATTRS74 = { + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x9399: "clear", + } +ATTRS75 = { + 0x8000: "name", + 0x8000: "value", + 0x8000: "type", + } +ATTRS76 = { + 0x9399: "clear", + } +ATTRS77 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x9399: "clear", + } +ATTRS78 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS82 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS83 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS84 = { + 0x03ea: "src", + 0x03ed: "for", + 0x03ee: "event", + 0x03f0: "defer", + 0x03f2: "type", + 0x978e: "onerror", + } +ATTRS85 = { + 0x03eb: "size", + 0x03ec: "multiple", + 0x8000: "align", + 0x8001: "name", + 0x978f: "onchange", + } +ATTRS86 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS87 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS88 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS89 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS90 = { + 0x03eb: "type", + 0x03ef: "media", + 0x8046: "title", + 0x978e: "onerror", + 0x9791: "onload", + } +ATTRS91 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS92 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS93 = { + 0x03ea: "cols", + 0x03eb: "border", + 0x03ec: "rules", + 0x03ed: "frame", + 0x03ee: "cellspacing", + 0x03ef: "cellpadding", + 0x03fa: "datapagesize", + 0x8006: "width", + 0x8007: "height", + 0x8046: "title", + 0x804a: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x938a: "background", + 0x93a5: "bordercolor", + 0x93a6: "bordercolorlight", + 0x93a7: "bordercolordark", + 0xfe0c: "bgcolor", + } +ATTRS94 = { + 0x8049: "align", + 0x93a8: "valign", + 0xfe0c: "bgcolor", + } +ATTRS95 = { + 0x8049: "align", + 0x93a8: "valign", + } +ATTRS96 = { + 0x07d2: "rowspan", + 0x07d3: "colspan", + 0x8006: "width", + 0x8007: "height", + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x938a: "background", + 0x938e: "nowrap", + 0x93a5: "bordercolor", + 0x93a6: "bordercolorlight", + 0x93a7: "bordercolordark", + 0x93a8: "valign", + 0xfe0c: "bgcolor", + } +ATTRS97 = { + 0x1b5a: "rows", + 0x1b5b: "cols", + 0x1b5c: "wrap", + 0x1b5d: "readonly", + 0x8001: "name", + 0x977b: "onselect", + 0x978f: "onchange", + } +ATTRS98 = { + 0x8049: "align", + 0x93a8: "valign", + 0xfe0c: "bgcolor", + } +ATTRS99 = { + 0x07d2: "rowspan", + 0x07d3: "colspan", + 0x8006: "width", + 0x8007: "height", + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x938a: "background", + 0x938e: "nowrap", + 0x93a5: "bordercolor", + 0x93a6: "bordercolorlight", + 0x93a7: "bordercolordark", + 0x93a8: "valign", + 0xfe0c: "bgcolor", + } +ATTRS100 = { + 0x8049: "align", + 0x93a8: "valign", + 0xfe0c: "bgcolor", + } +ATTRS102 = { + 0x8007: "height", + 0x8046: "title", + 0x8049: "align", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x93a5: "bordercolor", + 0x93a6: "bordercolorlight", + 0x93a7: "bordercolordark", + 0x93a8: "valign", + 0xfe0c: "bgcolor", + } +ATTRS103 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS104 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS105 = { + 0x03eb: "compact", + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + 0x939a: "type", + } +ATTRS106 = { + 0x8046: "title", + 0x804b: "style", + 0x83ea: "class", + 0x83eb: "id", + } +ATTRS108 = { + 0x9399: "clear", + } + TAGS_ATTRS = [ None, None, @@ -1005,4 +1005,4 @@ TAGS_ATTRS = [ None, ] -MAP = (TAGS, TAGS_ATTRS, ATTRS0) +MAP = (TAGS, ATTRS0, TAGS_ATTRS) diff --git a/src/calibre/ebooks/lit/maps/opf.py b/src/calibre/ebooks/lit/maps/opf.py index a39e6bf8e8..cc1acc4dfa 100644 --- a/src/calibre/ebooks/lit/maps/opf.py +++ b/src/calibre/ebooks/lit/maps/opf.py @@ -1,28 +1,3 @@ -ATTRS = { - 0x0001 => "href", - 0x0002 => "%never-used", - 0x0003 => "%guid", - 0x0004 => "%minimum_level", - 0x0005 => "%attr5", - 0x0006 => "id", - 0x0007 => "href", - 0x0008 => "media-type", - 0x0009 => "fallback", - 0x000A => "idref", - 0x000B => "xmlns:dc", - 0x000C => "xmlns:oebpackage", - 0x000D => "role", - 0x000E => "file-as", - 0x000F => "event", - 0x0010 => "scheme", - 0x0011 => "title", - 0x0012 => "type", - 0x0013 => "unique-identifier", - 0x0014 => "name", - 0x0015 => "content", - 0x0016 => "xml:lang", - } - TAGS = [ None, "package", @@ -69,6 +44,31 @@ TAGS = [ None, ] -TAGS_ATTR = [{} for i in xrange(43)] +ATTRS = { + 0x0001: "href", + 0x0002: "%never-used", + 0x0003: "%guid", + 0x0004: "%minimum_level", + 0x0005: "%attr5", + 0x0006: "id", + 0x0007: "href", + 0x0008: "media-type", + 0x0009: "fallback", + 0x000A: "idref", + 0x000B: "xmlns:dc", + 0x000C: "xmlns:oebpackage", + 0x000D: "role", + 0x000E: "file-as", + 0x000F: "event", + 0x0010: "scheme", + 0x0011: "title", + 0x0012: "type", + 0x0013: "unique-identifier", + 0x0014: "name", + 0x0015: "content", + 0x0016: "xml:lang", + } -MAP = (TAGS, TAGS_ATTRS, ATTRS0) +TAGS_ATTRS = [{} for i in xrange(43)] + +MAP = (TAGS, ATTRS, TAGS_ATTRS) diff --git a/src/calibre/ebooks/lit/mssha1.py b/src/calibre/ebooks/lit/mssha1.py new file mode 100644 index 0000000000..f6f7c33444 --- /dev/null +++ b/src/calibre/ebooks/lit/mssha1.py @@ -0,0 +1,343 @@ +#!/usr/bin/env python +# -*- coding: iso-8859-1 + +"""A sample implementation of SHA-1 in pure Python. + + Framework adapted from Dinu Gherman's MD5 implementation by + J. Hallén and L. Creighton. SHA-1 implementation based directly on + the text of the NIST standard FIPS PUB 180-1. +""" + + +__date__ = '2004-11-17' +__version__ = 0.91 # Modernised by J. Hallén and L. Creighton for Pypy + + +import struct, copy + + +# ====================================================================== +# Bit-Manipulation helpers +# +# _long2bytes() was contributed by Barry Warsaw +# and is reused here with tiny modifications. +# ====================================================================== + +def _long2bytesBigEndian(n, blocksize=0): + """Convert a long integer to a byte string. + + If optional blocksize is given and greater than zero, pad the front + of the byte string with binary zeros so that the length is a multiple + of blocksize. + """ + + # After much testing, this algorithm was deemed to be the fastest. + s = '' + pack = struct.pack + while n > 0: + s = pack('>I', n & 0xffffffffL) + s + n = n >> 32 + + # Strip off leading zeros. + for i in range(len(s)): + if s[i] != '\000': + break + else: + # Only happens when n == 0. + s = '\000' + i = 0 + + s = s[i:] + + # Add back some pad bytes. This could be done more efficiently + # w.r.t. the de-padding being done above, but sigh... + if blocksize > 0 and len(s) % blocksize: + s = (blocksize - len(s) % blocksize) * '\000' + s + + return s + + +def _bytelist2longBigEndian(list): + "Transform a list of characters into a list of longs." + + imax = len(list)/4 + hl = [0L] * imax + + j = 0 + i = 0 + while i < imax: + b0 = long(ord(list[j])) << 24 + b1 = long(ord(list[j+1])) << 16 + b2 = long(ord(list[j+2])) << 8 + b3 = long(ord(list[j+3])) + hl[i] = b0 | b1 | b2 | b3 + i = i+1 + j = j+4 + + return hl + + +def _rotateLeft(x, n): + "Rotate x (32 bit) left n bits circularly." + + return (x << n) | (x >> (32-n)) + + +# ====================================================================== +# The SHA transformation functions +# +# ====================================================================== + +def f0_19(B, C, D): + return (B & (C ^ D)) ^ D + +def f20_39(B, C, D): + return B ^ C ^ D + +def f40_59(B, C, D): + return ((B | C) & D) | (B & C) + +def f60_79(B, C, D): + return B ^ C ^ D + +def f6_42(B, C, D): + return (B + C) ^ C + +f = [f0_19]*20 + [f20_39]*20 + [f40_59]*20 + [f60_79]*20 +f[3] = f20_39 +f[6] = f6_42 +f[10] = f20_39 +f[15] = f20_39 +f[26] = f0_19 +f[31] = f40_59 +f[42] = f6_42 +f[51] = f20_39 +f[68] = f0_19 + + +# Constants to be used +K = [ + 0x5A827999L, # ( 0 <= t <= 19) + 0x6ED9EBA1L, # (20 <= t <= 39) + 0x8F1BBCDCL, # (40 <= t <= 59) + 0xCA62C1D6L # (60 <= t <= 79) + ] + +class sha: + "An implementation of the MD5 hash function in pure Python." + + def __init__(self): + "Initialisation." + + # Initial message length in bits(!). + self.length = 0L + self.count = [0, 0] + + # Initial empty message as a sequence of bytes (8 bit characters). + self.input = [] + + # Call a separate init function, that can be used repeatedly + # to start from scratch on the same object. + self.init() + + + def init(self): + "Initialize the message-digest and set all fields to zero." + + self.length = 0L + self.input = [] + + # Initial 160 bit message digest (5 times 32 bit). + self.H0 = 0x32107654L + self.H1 = 0x23016745L + self.H2 = 0xC4E680A2L + self.H3 = 0xDC679823L + self.H4 = 0xD0857A34L + + def _transform(self, W): + for t in range(16, 80): + W.append(_rotateLeft( + W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16], 1) & 0xffffffffL) + + A = self.H0 + B = self.H1 + C = self.H2 + D = self.H3 + E = self.H4 + + for t in xrange(0, 80): + TEMP = _rotateLeft(A, 5) + f[t](B, C, D) + E + W[t] + K[t/20] + E = D + D = C + C = _rotateLeft(B, 30) & 0xffffffffL + B = A + A = TEMP & 0xffffffffL + + self.H0 = (self.H0 + A) & 0xffffffffL + self.H1 = (self.H1 + B) & 0xffffffffL + self.H2 = (self.H2 + C) & 0xffffffffL + self.H3 = (self.H3 + D) & 0xffffffffL + self.H4 = (self.H4 + E) & 0xffffffffL + + + # Down from here all methods follow the Python Standard Library + # API of the sha module. + + def update(self, inBuf): + """Add to the current message. + + Update the sha object with the string arg. Repeated calls + are equivalent to a single call with the concatenation of all + the arguments, i.e. s.update(a); s.update(b) is equivalent + to s.update(a+b). + + The hash is immediately calculated for all full blocks. The final + calculation is made in digest(). It will calculate 1-2 blocks, + depending on how much padding we have to add. This allows us to + keep an intermediate value for the hash, so that we only need to + make minimal recalculation if we call update() to add more data + to the hashed string. + """ + + leninBuf = long(len(inBuf)) + + # Compute number of bytes mod 64. + index = (self.count[1] >> 3) & 0x3FL + + # Update number of bits. + self.count[1] = self.count[1] + (leninBuf << 3) + if self.count[1] < (leninBuf << 3): + self.count[0] = self.count[0] + 1 + self.count[0] = self.count[0] + (leninBuf >> 29) + + partLen = 64 - index + + if leninBuf >= partLen: + self.input[index:] = list(inBuf[:partLen]) + self._transform(_bytelist2longBigEndian(self.input)) + i = partLen + while i + 63 < leninBuf: + self._transform(_bytelist2longBigEndian(list(inBuf[i:i+64]))) + i = i + 64 + else: + self.input = list(inBuf[i:leninBuf]) + else: + i = 0 + self.input = self.input + list(inBuf) + + + def digest(self): + """Terminate the message-digest computation and return digest. + + Return the digest of the strings passed to the update() + method so far. This is a 16-byte string which may contain + non-ASCII characters, including null bytes. + """ + + H0 = self.H0 + H1 = self.H1 + H2 = self.H2 + H3 = self.H3 + H4 = self.H4 + input = [] + self.input + count = [] + self.count + + index = (self.count[1] >> 3) & 0x3fL + + if index < 56: + padLen = 56 - index + else: + padLen = 120 - index + + padding = ['\200'] + ['\000'] * 63 + self.update(padding[:padLen]) + + # Append length (before padding). + bits = _bytelist2longBigEndian(self.input[:56]) + count + + self._transform(bits) + + # Store state in digest. + digest = _long2bytesBigEndian(self.H0, 4) + \ + _long2bytesBigEndian(self.H1, 4) + \ + _long2bytesBigEndian(self.H2, 4) + \ + _long2bytesBigEndian(self.H3, 4) + \ + _long2bytesBigEndian(self.H4, 4) + + self.H0 = H0 + self.H1 = H1 + self.H2 = H2 + self.H3 = H3 + self.H4 = H4 + self.input = input + self.count = count + + return digest + + + def hexdigest(self): + """Terminate and return digest in HEX form. + + Like digest() except the digest is returned as a string of + length 32, containing only hexadecimal digits. This may be + used to exchange the value safely in email or other non- + binary environments. + """ + return ''.join(['%02x' % ord(c) for c in self.digest()]) + + def copy(self): + """Return a clone object. + + Return a copy ('clone') of the md5 object. This can be used + to efficiently compute the digests of strings that share + a common initial substring. + """ + + return copy.deepcopy(self) + + +# ====================================================================== +# Mimic Python top-level functions from standard library API +# for consistency with the md5 module of the standard library. +# ====================================================================== + +# These are mandatory variables in the module. They have constant values +# in the SHA standard. + +digest_size = digestsize = 20 +blocksize = 1 + +def new(arg=None): + """Return a new sha crypto object. + + If arg is present, the method call update(arg) is made. + """ + + crypto = sha() + if arg: + crypto.update(arg) + + return crypto + +if __name__ == '__main__': + def main(): + import sys + file = None + if len(sys.argv) > 2: + print "usage: %s [FILE]" % sys.argv[0] + return + elif len(sys.argv) < 2: + file = sys.stdin + else: + file = open(sys.argv[1], 'rb') + context = new() + data = file.read(16384) + while data: + context.update(data) + data = file.read(16384) + file.close() + digest = context.hexdigest().upper() + for i in xrange(0, 40, 8): + print digest[i:i+8], + print + main() diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py index 1a0f42f8db..711aef6586 100644 --- a/src/calibre/ebooks/lit/reader.py +++ b/src/calibre/ebooks/lit/reader.py @@ -5,6 +5,7 @@ Support for reading the metadata from a lit file. ''' import sys, struct, cStringIO, os +import functools from itertools import repeat from calibre import relpath @@ -13,6 +14,31 @@ from calibre.ebooks.metadata.opf import OPFReader from calibre.ebooks.lit import LitError from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP +OPF_DECL = """" + +""" +XHTML_DECL = """ + +""" + +class DirectoryEntry(object): + def __init__(self, name, section, offset, size): + self.name = name + self.section = section + self.offset = offset + self.size = size + + def __repr__(self): + return "" \ + % (self.name, self.section, self.offset, self.size) + + def __str__(self): + return repr(self) + def u32(bytes): return struct.unpack('') index = self.binary_to_text(base=index, depth=depth+1) - is_goingdown = 0 + is_goingdown = False if not tag_name: raise LitError('Tag ends before it begins.') self.buf.write('') @@ -222,7 +248,7 @@ class UnBinary(object): if not in_censorship: self.buf.write(c) count -= 1 - elif count == 0: + if count == 0: if not in_censorship: self.buf.write('"') in_censorship = False @@ -268,7 +294,7 @@ class UnBinary(object): href += c count -= 1 if count == 0: - doc, m, frag = href.partition('#') + doc, m, frag = href[1:].partition('#') path = self.item_path(doc) if m and frag: path += m + frag @@ -297,100 +323,74 @@ class ManifestItem(object): def __repr__(self): return self.internal + u'->' + self.path +def preserve(function): + def wrapper(self, *args, **kwargs): + opos = self._stream.tell() + try: + return function(self, *args, **kwargs) + finally: + self._stream.seek(opos) + functools.update_wrapper(wrapper, function) + return wrapper + class LitFile(object): PIECE_SIZE = 16 def magic(): + @preserve def fget(self): - val = None - opos = self._stream.tell() - try: - self._stream.seek(0) - val = self._stream.read(8) - finally: - self._stream.seek(opos) - return val + self._stream.seek(0) + return self._stream.read(8) return property(fget=fget) magic = magic() def version(): def fget(self): - val = None - opos = self._stream.tell() - try: - self._stream.seek(8) - val = u32(self._stream.read(4)) - finally: - self._stream.seek(opos) - return val + self._stream.seek(8) + return u32(self._stream.read(4)) return property(fget=fget) version = version() def hdr_len(): + @preserve def fget(self): - val = None - opos = self._stream.tell() - try: - self._stream.seek(12) - val = int32(self._stream.read(4)) - finally: - self._stream.seek(opos) - return val + self._stream.seek(12) + return int32(self._stream.read(4)) return property(fget=fget) hdr_len = hdr_len() def num_pieces(): + @preserve def fget(self): - val = None - opos = self._stream.tell() - try: - self._stream.seek(16) - val = int32(self._stream.read(4)) - finally: - self._stream.seek(opos) - return val + self._stream.seek(16) + return int32(self._stream.read(4)) return property(fget=fget) num_pieces = num_pieces() def sec_hdr_len(): + @preserve def fget(self): - val = None - opos = self._stream.tell() - try: - self._stream.seek(20) - val = int32(self._stream.read(4)) - finally: - self._stream.seek(opos) - return val + self._stream.seek(20) + return int32(self._stream.read(4)) return property(fget=fget) sec_hdr_len = sec_hdr_len() def guid(): + @preserve def fget(self): - val = None - opos = self._stream.tell() - try: - self._stream.seek(24) - val = self._stream.read(16) - finally: - self._stream.seek(opos) - return val + self._stream.seek(24) + return self._stream.read(16) return property(fget=fget) guid = guid() def header(): + @preserve def fget(self): - val = None - opos = self._stream.tell() - try: - size = self.hdr_len \ - + (self.num_pieces * self.PIECE_SIZE) \ - + self.sec_hdr_len - self._stream.seek(0) - val = self._stream.read(size) - finally: - self._stream.seek(opos) - return val + size = self.hdr_len \ + + (self.num_pieces * self.PIECE_SIZE) \ + + self.sec_hdr_len + self._stream.seek(0) + return self._stream.read(size) return property(fget=fget) header = header() @@ -402,70 +402,64 @@ class LitFile(object): raise LitError('Unknown LIT version %d'%(self.version,)) self.read_secondary_header() self.read_header_pieces() - - def read_secondary_header(self): - opos = self._stream.tell() - try: - self._stream.seek(self.hdr_len + self.num_pieces*self.PIECE_SIZE) - bytes = self._stream.read(self.sec_hdr_len) - offset = int32(bytes[4:]) - while offset < len(bytes): - blocktype = bytes[offset:offset+4] - blockver = u32(bytes[offset+4:]) - if blocktype == 'CAOL': - if blockver != 2: - raise LitError( - 'Unknown CAOL block format %d' % blockver) - self.creator_id = u32(bytes[offset+12:]) - self.entry_chunklen = u32(bytes[offset+20:]) - self.count_chunklen = u32(bytes[offset+24:]) - self.entry_unknown = u32(bytes[offset+28:]) - self.count_unknown = u32(bytes[offset+32:]) - offset += 48 - elif blocktype == 'ITSF': - if blockver != 4: - raise LitError( - 'Unknown ITSF block format %d' % blockver) - if u32(bytes[offset+4+16:]): - raise LitError('This file has a 64bit content offset') - self.content_offset = u32(bytes[offset+16:]) - self.timestamp = u32(bytes[offset+24:]) - self.language_id = u32(bytes[offset+28:]) - offset += 48 - if not hasattr(self, 'content_offset'): - raise LitError('Could not figure out the content offset') - finally: - self._stream.seek(opos) + @preserve + def read_secondary_header(self): + self._stream.seek(self.hdr_len + self.num_pieces*self.PIECE_SIZE) + bytes = self._stream.read(self.sec_hdr_len) + offset = int32(bytes[4:]) + while offset < len(bytes): + blocktype = bytes[offset:offset+4] + blockver = u32(bytes[offset+4:]) + if blocktype == 'CAOL': + if blockver != 2: + raise LitError( + 'Unknown CAOL block format %d' % blockver) + self.creator_id = u32(bytes[offset+12:]) + self.entry_chunklen = u32(bytes[offset+20:]) + self.count_chunklen = u32(bytes[offset+24:]) + self.entry_unknown = u32(bytes[offset+28:]) + self.count_unknown = u32(bytes[offset+32:]) + offset += 48 + elif blocktype == 'ITSF': + if blockver != 4: + raise LitError( + 'Unknown ITSF block format %d' % blockver) + if u32(bytes[offset+4+16:]): + raise LitError('This file has a 64bit content offset') + self.content_offset = u32(bytes[offset+16:]) + self.timestamp = u32(bytes[offset+24:]) + self.language_id = u32(bytes[offset+28:]) + offset += 48 + if not hasattr(self, 'content_offset'): + raise LitError('Could not figure out the content offset') + + @preserve def read_header_pieces(self): - opos = self._stream.tell() - try: - src = self.header[self.hdr_len:] - for i in range(self.num_pieces): - piece = src[i*self.PIECE_SIZE:(i+1)*self.PIECE_SIZE] - if u32(piece[4:]) != 0 or u32(piece[12:]) != 0: - raise LitError('Piece %s has 64bit value' % repr(piece)) - offset, size = u32(piece), int32(piece[8:]) - self._stream.seek(offset) - piece = self._stream.read(size) - if i == 0: - continue # Dont need this piece - elif i == 1: - if u32(piece[8:]) != self.entry_chunklen or \ - u32(piece[12:]) != self.entry_unknown: - raise LitError('Secondary header does not match piece') - self.read_directory(piece) - elif i == 2: - if u32(piece[8:]) != self.count_chunklen or \ - u32(piece[12:]) != self.count_unknown: - raise LitError('Secondary header does not match piece') - continue # No data needed from this piece - elif i == 3: - self.piece3_guid = piece - elif i == 4: - self.piece4_guid = piece - finally: - self._stream.seek(opos) + src = self.header[self.hdr_len:] + for i in range(self.num_pieces): + piece = src[i*self.PIECE_SIZE:(i+1)*self.PIECE_SIZE] + if u32(piece[4:]) != 0 or u32(piece[12:]) != 0: + raise LitError('Piece %s has 64bit value' % repr(piece)) + offset, size = u32(piece), int32(piece[8:]) + self._stream.seek(offset) + piece = self._stream.read(size) + if i == 0: + continue # Dont need this piece + elif i == 1: + if u32(piece[8:]) != self.entry_chunklen or \ + u32(piece[12:]) != self.entry_unknown: + raise LitError('Secondary header does not match piece') + self.read_directory(piece) + elif i == 2: + if u32(piece[8:]) != self.count_chunklen or \ + u32(piece[12:]) != self.count_unknown: + raise LitError('Secondary header does not match piece') + continue # No data needed from this piece + elif i == 3: + self.piece3_guid = piece + elif i == 4: + self.piece4_guid = piece def read_directory(self, piece): self.entries = [] @@ -521,108 +515,88 @@ class LitFile(object): if not hasattr(self, 'manifest'): raise LitError('Lit file does not have a valid manifest') - - def read_section_names(self, entry): - opos = self._stream.tell() - try: - self._stream.seek(self.content_offset + entry.offset) - raw = self._stream.read(entry.size) - if len(raw) < 4: - raise LitError('Invalid Namelist section') - pos = 4 - self.num_sections = u16(raw[2:pos]) - - self.sections = {} - for section in range(self.num_sections): - size = u16(raw[pos:pos+2]) - pos += 2 - size = size*2 + 2 - if pos + size > len(raw): - raise LitError('Invalid Namelist section') - self.sections[section] = raw[pos:pos+size].decode('utf-16-le') - pos += size - finally: - self._stream.seek(opos) - - def read_manifest(self, entry): - opos = self._stream.tell() - try: - self.manifest = [] - self._stream.seek(self.content_offset + entry.offset) - raw = self._stream.read(entry.size) - pos = 0 - while pos < len(raw): - size = ord(raw[pos]) - if size == 0: break - pos += 1 - root = raw[pos:pos+size].decode('utf8') - pos += size - if pos >= len(raw): - raise LitError('Truncated manifest.') - for state in ['spine', 'not spine', 'css', 'images']: - num_files = int32(raw[pos:pos+4]) - pos += 4 - if num_files == 0: continue - - i = 0 - while i < num_files: - if pos+5 >= len(raw): - raise LitError('Truncated manifest.') - offset = u32(raw[pos:pos+4]) - pos += 4 - - slen = ord(raw[pos]) - pos += 1 - internal = raw[pos:pos+slen].decode('utf8') - pos += slen - - slen = ord(raw[pos]) - pos += 1 - original = raw[pos:pos+slen].decode('utf8') - pos += slen - - slen = ord(raw[pos]) - pos += 1 - mime_type = raw[pos:pos+slen].decode('utf8') - pos += slen + 1 - - self.manifest.append( - ManifestItem(original, internal, mime_type, - offset, root, state)) - i += 1 - finally: - self._stream.seek(opos) - - def read_meta(self, entry): - opos = self._stream.tell() - try: - self._stream.seek(self.content_offset + entry.offset) - raw = self._stream.read(entry.size) - xml = \ -'''\ - - -'''+\ - unicode(UnBinary(raw, self.manifest)) - self.meta = xml - finally: - self._stream.seek(opos) - + @preserve + def read_section_names(self, entry): + self._stream.seek(self.content_offset + entry.offset) + raw = self._stream.read(entry.size) + if len(raw) < 4: + raise LitError('Invalid Namelist section') + pos = 4 + self.num_sections = u16(raw[2:pos]) + + self.sections = {} + for section in range(self.num_sections): + size = u16(raw[pos:pos+2]) + pos += 2 + size = size*2 + 2 + if pos + size > len(raw): + raise LitError('Invalid Namelist section') + self.sections[section] = raw[pos:pos+size].decode('utf-16-le') + pos += size + + @preserve + def read_manifest(self, entry): + self.manifest = [] + self._stream.seek(self.content_offset + entry.offset) + raw = self._stream.read(entry.size) + pos = 0 + while pos < len(raw): + size = ord(raw[pos]) + if size == 0: break + pos += 1 + root = raw[pos:pos+size].decode('utf8') + pos += size + if pos >= len(raw): + raise LitError('Truncated manifest.') + for state in ['spine', 'not spine', 'css', 'images']: + num_files = int32(raw[pos:pos+4]) + pos += 4 + if num_files == 0: continue + + i = 0 + while i < num_files: + if pos+5 >= len(raw): + raise LitError('Truncated manifest.') + offset = u32(raw[pos:pos+4]) + pos += 4 + + slen = ord(raw[pos]) + pos += 1 + internal = raw[pos:pos+slen].decode('utf8') + pos += slen + + slen = ord(raw[pos]) + pos += 1 + original = raw[pos:pos+slen].decode('utf8') + pos += slen + + slen = ord(raw[pos]) + pos += 1 + mime_type = raw[pos:pos+slen].decode('utf8') + pos += slen + 1 + + self.manifest.append( + ManifestItem(original, internal, mime_type, + offset, root, state)) + i += 1 + + @preserve + def read_meta(self, entry): + self._stream.seek(self.content_offset + entry.offset) + raw = self._stream.read(entry.size) + xml = OPF_DECL + unicode(UnBinary(raw, self.manifest)) + self.meta = xml + + @preserve def read_image(self, internal_name): cover_entry = None for entry in self.entries: if internal_name in entry.name: cover_entry = entry break - opos = self._stream.tell() - try: - self._stream.seek(self.content_offset + cover_entry.offset) - return self._stream.read(cover_entry.size) - finally: - self._stream.seek(opos) + self._stream.seek(self.content_offset + cover_entry.offset) + return self._stream.read(cover_entry.size) def get_metadata(stream): try: