mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Adding initial LitWriter and oeb2lit code.
This commit is contained in:
parent
039572d937
commit
f740d20f32
1
setup.py
1
setup.py
@ -146,6 +146,7 @@ if __name__ == '__main__':
|
||||
metadata_sqlite = 'library/metadata_sqlite.sql',
|
||||
jquery = 'gui2/viewer/jquery.js',
|
||||
jquery_scrollTo = 'gui2/viewer/jquery_scrollTo.js',
|
||||
html_css = 'ebooks/lit/html.css',
|
||||
)
|
||||
|
||||
DEST = os.path.join('src', APPNAME, 'resources.py')
|
||||
|
420
src/calibre/ebooks/lit/html.css
Normal file
420
src/calibre/ebooks/lit/html.css
Normal file
@ -0,0 +1,420 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is mozilla.org code.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* Netscape Communications Corporation.
|
||||
* Portions created by the Initial Developer are Copyright (C) 1998
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
* Blake Ross <BlakeR1234@aol.com>
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either of the GNU General Public License Version 2 or later (the "GPL"),
|
||||
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
@namespace url(http://www.w3.org/1999/xhtml); /* set default namespace to HTML */
|
||||
|
||||
/* blocks */
|
||||
|
||||
html, div, map, dt, isindex, form {
|
||||
display: block;
|
||||
}
|
||||
|
||||
body {
|
||||
display: block;
|
||||
margin: 8px;
|
||||
}
|
||||
|
||||
p, dl, multicol {
|
||||
display: block;
|
||||
margin: 1em 0;
|
||||
}
|
||||
|
||||
dd {
|
||||
display: block;
|
||||
}
|
||||
|
||||
blockquote {
|
||||
display: block;
|
||||
margin: 1em 40px;
|
||||
}
|
||||
|
||||
address {
|
||||
display: block;
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
center {
|
||||
display: block;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
blockquote[type=cite] {
|
||||
display: block;
|
||||
margin: 1em 0px;
|
||||
border-color: blue;
|
||||
border-width: thin;
|
||||
}
|
||||
|
||||
span[_moz_quote=true] {
|
||||
color: blue;
|
||||
}
|
||||
|
||||
pre[_moz_quote=true] {
|
||||
color: blue;
|
||||
}
|
||||
|
||||
h1 {
|
||||
display: block;
|
||||
font-size: 2em;
|
||||
font-weight: bold;
|
||||
margin: .67em 0;
|
||||
}
|
||||
|
||||
h2 {
|
||||
display: block;
|
||||
font-size: 1.5em;
|
||||
font-weight: bold;
|
||||
margin: .83em 0;
|
||||
}
|
||||
|
||||
h3 {
|
||||
display: block;
|
||||
font-size: 1.17em;
|
||||
font-weight: bold;
|
||||
margin: 1em 0;
|
||||
}
|
||||
|
||||
h4 {
|
||||
display: block;
|
||||
font-weight: bold;
|
||||
margin: 1.33em 0;
|
||||
}
|
||||
|
||||
h5 {
|
||||
display: block;
|
||||
font-size: 0.83em;
|
||||
font-weight: bold;
|
||||
margin: 1.67em 0;
|
||||
}
|
||||
|
||||
h6 {
|
||||
display: block;
|
||||
font-size: 0.67em;
|
||||
font-weight: bold;
|
||||
margin: 2.33em 0;
|
||||
}
|
||||
|
||||
listing {
|
||||
display: block;
|
||||
font-family: monospace;
|
||||
font-size: medium;
|
||||
white-space: pre;
|
||||
margin: 1em 0;
|
||||
}
|
||||
|
||||
xmp, pre, plaintext {
|
||||
display: block;
|
||||
font-family: monospace;
|
||||
white-space: pre;
|
||||
margin: 1em 0;
|
||||
}
|
||||
|
||||
/* tables */
|
||||
|
||||
table {
|
||||
display: table;
|
||||
border-spacing: 2px;
|
||||
border-collapse: separate;
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
text-indent: 0;
|
||||
}
|
||||
|
||||
table[align="left"] {
|
||||
float: left;
|
||||
}
|
||||
|
||||
table[align="right"] {
|
||||
float: right;
|
||||
}
|
||||
|
||||
table[rules]:not([rules="none"]) {
|
||||
border-collapse: collapse;
|
||||
}
|
||||
|
||||
/* caption inherits from table not table-outer */
|
||||
caption {
|
||||
display: table-caption;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
table[align="center"] > caption {
|
||||
margin-left: auto;
|
||||
margin-right: auto;
|
||||
}
|
||||
|
||||
table[align="center"] > caption[align="left"] {
|
||||
margin-right: 0;
|
||||
}
|
||||
|
||||
table[align="center"] > caption[align="right"] {
|
||||
margin-left: 0;
|
||||
}
|
||||
|
||||
tr {
|
||||
display: table-row;
|
||||
vertical-align: inherit;
|
||||
}
|
||||
|
||||
col {
|
||||
display: table-column;
|
||||
}
|
||||
|
||||
colgroup {
|
||||
display: table-column-group;
|
||||
}
|
||||
|
||||
tbody {
|
||||
display: table-row-group;
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
thead {
|
||||
display: table-header-group;
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
tfoot {
|
||||
display: table-footer-group;
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
/* for XHTML tables without tbody */
|
||||
table > tr {
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
td {
|
||||
display: table-cell;
|
||||
vertical-align: inherit;
|
||||
text-align: inherit;
|
||||
padding: 1px;
|
||||
}
|
||||
|
||||
th {
|
||||
display: table-cell;
|
||||
vertical-align: inherit;
|
||||
font-weight: bold;
|
||||
padding: 1px;
|
||||
}
|
||||
|
||||
/* inlines */
|
||||
|
||||
q:before {
|
||||
content: open-quote;
|
||||
}
|
||||
|
||||
q:after {
|
||||
content: close-quote;
|
||||
}
|
||||
|
||||
b, strong {
|
||||
font-weight: bolder;
|
||||
}
|
||||
|
||||
i, cite, em, var, dfn {
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
tt, code, kbd, samp {
|
||||
font-family: monospace;
|
||||
}
|
||||
|
||||
u, ins {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
s, strike, del {
|
||||
text-decoration: line-through;
|
||||
}
|
||||
|
||||
blink {
|
||||
text-decoration: blink;
|
||||
}
|
||||
|
||||
big {
|
||||
font-size: larger;
|
||||
}
|
||||
|
||||
small {
|
||||
font-size: smaller;
|
||||
}
|
||||
|
||||
sub {
|
||||
vertical-align: sub;
|
||||
font-size: smaller;
|
||||
line-height: normal;
|
||||
}
|
||||
|
||||
sup {
|
||||
vertical-align: super;
|
||||
font-size: smaller;
|
||||
line-height: normal;
|
||||
}
|
||||
|
||||
nobr {
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
/* titles */
|
||||
abbr[title], acronym[title] {
|
||||
border-bottom: dotted 1px;
|
||||
}
|
||||
|
||||
/* lists */
|
||||
|
||||
ul, menu, dir {
|
||||
display: block;
|
||||
list-style-type: disc;
|
||||
margin: 1em 0;
|
||||
}
|
||||
|
||||
ol {
|
||||
display: block;
|
||||
list-style-type: decimal;
|
||||
margin: 1em 0;
|
||||
}
|
||||
|
||||
li {
|
||||
display: list-item;
|
||||
}
|
||||
|
||||
/* nested lists have no top/bottom margins */
|
||||
ul ul, ul ol, ul dir, ul menu, ul dl,
|
||||
ol ul, ol ol, ol dir, ol menu, ol dl,
|
||||
dir ul, dir ol, dir dir, dir menu, dir dl,
|
||||
menu ul, menu ol, menu dir, menu menu, menu dl,
|
||||
dl ul, dl ol, dl dir, dl menu, dl dl {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
}
|
||||
|
||||
/* 2 deep unordered lists use a circle */
|
||||
ol ul, ul ul, menu ul, dir ul,
|
||||
ol menu, ul menu, menu menu, dir menu,
|
||||
ol dir, ul dir, menu dir, dir dir {
|
||||
list-style-type: circle;
|
||||
}
|
||||
|
||||
/* 3 deep (or more) unordered lists use a square */
|
||||
ol ol ul, ol ul ul, ol menu ul, ol dir ul,
|
||||
ol ol menu, ol ul menu, ol menu menu, ol dir menu,
|
||||
ol ol dir, ol ul dir, ol menu dir, ol dir dir,
|
||||
ul ol ul, ul ul ul, ul menu ul, ul dir ul,
|
||||
ul ol menu, ul ul menu, ul menu menu, ul dir menu,
|
||||
ul ol dir, ul ul dir, ul menu dir, ul dir dir,
|
||||
menu ol ul, menu ul ul, menu menu ul, menu dir ul,
|
||||
menu ol menu, menu ul menu, menu menu menu, menu dir menu,
|
||||
menu ol dir, menu ul dir, menu menu dir, menu dir dir,
|
||||
dir ol ul, dir ul ul, dir menu ul, dir dir ul,
|
||||
dir ol menu, dir ul menu, dir menu menu, dir dir menu,
|
||||
dir ol dir, dir ul dir, dir menu dir, dir dir dir {
|
||||
list-style-type: square;
|
||||
}
|
||||
|
||||
|
||||
/* leafs */
|
||||
|
||||
/* <hr> noshade and color attributes are handled completely by
|
||||
* the nsHTMLHRElement attribute mapping code
|
||||
*/
|
||||
hr {
|
||||
display: block;
|
||||
height: 2px;
|
||||
border: 1px inset;
|
||||
margin: 0.5em auto 0.5em auto;
|
||||
color: gray;
|
||||
}
|
||||
|
||||
hr[size="1"] {
|
||||
border-style: solid none none none;
|
||||
}
|
||||
|
||||
img[usemap], object[usemap] {
|
||||
color: blue;
|
||||
}
|
||||
|
||||
frameset {
|
||||
display: block ! important;
|
||||
position: static ! important;
|
||||
float: none ! important;
|
||||
border: none ! important;
|
||||
}
|
||||
|
||||
frame {
|
||||
border: none ! important;
|
||||
}
|
||||
|
||||
iframe {
|
||||
border: 2px inset;
|
||||
}
|
||||
|
||||
noframes {
|
||||
display: none;
|
||||
}
|
||||
|
||||
spacer {
|
||||
position: static ! important;
|
||||
float: none ! important;
|
||||
}
|
||||
|
||||
/* focusable content: anything w/ tabindex >=0 is focusable */
|
||||
abbr:focus, acronym:focus, address:focus, applet:focus, b:focus,
|
||||
base:focus, big:focus, blockquote:focus, br:focus, canvas:focus, caption:focus,
|
||||
center:focus, cite:focus, code:focus, col:focus, colgroup:focus, dd:focus,
|
||||
del:focus, dfn:focus, dir:focus, div:focus, dl:focus, dt:focus, em:focus,
|
||||
fieldset:focus, font:focus, form:focus, h1:focus, h2:focus, h3:focus, h4:focus,
|
||||
h5:focus, h6:focus, hr:focus, i:focus, img:focus, ins:focus,
|
||||
kbd:focus, label:focus, legend:focus, li:focus, link:focus, menu:focus,
|
||||
object:focus, ol:focus, p:focus, pre:focus, q:focus, s:focus, samp:focus,
|
||||
small:focus, span:focus, strike:focus, strong:focus, sub:focus, sup:focus,
|
||||
table:focus, tbody:focus, td:focus, tfoot:focus, th:focus, thead:focus,
|
||||
tr:focus, tt:focus, u:focus, ul:focus, var:focus {
|
||||
/* Don't specify the outline-color, we should always use initial value. */
|
||||
outline: 1px dotted;
|
||||
}
|
||||
|
||||
/* hidden elements */
|
||||
area, base, basefont, head, meta, script, style, title,
|
||||
noembed, param {
|
||||
display: none;
|
||||
}
|
||||
|
||||
/* Page breaks at body tags, to help out with LIT-generation */
|
||||
body {
|
||||
page-break-before: always;
|
||||
}
|
176
src/calibre/ebooks/lit/lzxcomp.py
Normal file
176
src/calibre/ebooks/lit/lzxcomp.py
Normal file
@ -0,0 +1,176 @@
|
||||
from __future__ import with_statement
|
||||
import sys
|
||||
import os
|
||||
from cStringIO import StringIO
|
||||
from ctypes import *
|
||||
|
||||
__all__ = ['Compressor']
|
||||
|
||||
liblzxcomp = cdll.LoadLibrary('liblzxcomp.so')
|
||||
|
||||
class lzx_data(Structure):
|
||||
pass
|
||||
|
||||
lzx_get_bytes_t = CFUNCTYPE(c_int, c_voidp, c_int, c_voidp)
|
||||
lzx_put_bytes_t = CFUNCTYPE(c_int, c_voidp, c_int, c_voidp)
|
||||
lzx_mark_frame_t = CFUNCTYPE(None, c_voidp, c_uint32, c_uint32)
|
||||
lzx_at_eof_t = CFUNCTYPE(c_int, c_voidp)
|
||||
|
||||
class lzx_results(Structure):
|
||||
_fields_ = [('len_compressed_output', c_long),
|
||||
('len_uncompressed_input', c_long)]
|
||||
|
||||
# int lzx_init(struct lzx_data **lzxdp, int wsize_code,
|
||||
# lzx_get_bytes_t get_bytes, void *get_bytes_arg,
|
||||
# lzx_at_eof_t at_eof,
|
||||
# lzx_put_bytes_t put_bytes, void *put_bytes_arg,
|
||||
# lzx_mark_frame_t mark_frame, void *mark_frame_arg);
|
||||
lzx_init = liblzxcomp.lzx_init
|
||||
lzx_init.restype = c_int
|
||||
lzx_init.argtypes = [POINTER(POINTER(lzx_data)), c_int,
|
||||
lzx_get_bytes_t, c_voidp,
|
||||
lzx_at_eof_t,
|
||||
lzx_put_bytes_t, c_voidp,
|
||||
lzx_mark_frame_t, c_voidp]
|
||||
|
||||
# void lzx_reset(lzx_data *lzxd);
|
||||
lzx_reset = liblzxcomp.lzx_reset
|
||||
lzx_reset.restype = None
|
||||
lzx_reset.argtypes = [POINTER(lzx_data)]
|
||||
|
||||
# int lzx_compress_block(lzx_data *lzxd, int block_size, int subdivide);
|
||||
lzx_compress_block = liblzxcomp.lzx_compress_block
|
||||
lzx_compress_block.restype = c_int
|
||||
lzx_compress_block.argtypes = [POINTER(lzx_data), c_int, c_int]
|
||||
|
||||
# int lzx_finish(struct lzx_data *lzxd, struct lzx_results *lzxr);
|
||||
lzx_finish = liblzxcomp.lzx_finish
|
||||
lzx_finish.restype = c_int
|
||||
lzx_finish.argtypes = [POINTER(lzx_data), POINTER(lzx_results)]
|
||||
|
||||
|
||||
class LzxError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class Compressor(object):
|
||||
def __init__(self, wbits, reset=True):
|
||||
self._reset = reset
|
||||
self._blocksize = 1 << wbits
|
||||
self._buffered = 0
|
||||
self._input = StringIO()
|
||||
self._output = StringIO()
|
||||
self._flushing = False
|
||||
self._rtable = []
|
||||
self._get_bytes = lzx_get_bytes_t(self._get_bytes)
|
||||
self._at_eof = lzx_at_eof_t(self._at_eof)
|
||||
self._put_bytes = lzx_put_bytes_t(self._put_bytes)
|
||||
self._mark_frame = lzx_mark_frame_t(self._mark_frame)
|
||||
self._lzx = POINTER(lzx_data)()
|
||||
self._results = lzx_results()
|
||||
rv = lzx_init(self._lzx, wbits, self._get_bytes, c_voidp(),
|
||||
self._at_eof, self._put_bytes, c_voidp(),
|
||||
self._mark_frame, c_voidp())
|
||||
if rv != 0:
|
||||
raise LzxError("lzx_init() failed with %d" % rv)
|
||||
|
||||
def _add_input(self, data):
|
||||
self._input.seek(0, 2)
|
||||
self._input.write(data)
|
||||
self._input.seek(0)
|
||||
self._buffered += len(data)
|
||||
|
||||
def _reset_input(self):
|
||||
data = self._input.read()
|
||||
self._input.seek(0)
|
||||
self._input.truncate()
|
||||
self._input.write(data)
|
||||
self._input.seek(0)
|
||||
|
||||
def _reset_output(self):
|
||||
data = self._output.getvalue()
|
||||
self._output.seek(0)
|
||||
self._output.truncate()
|
||||
return data
|
||||
|
||||
def _reset_rtable(self):
|
||||
rtable = list(self._rtable)
|
||||
del self._rtable[:]
|
||||
return rtable
|
||||
|
||||
def _get_bytes(self, arg, n, buf):
|
||||
data = self._input.read(n)
|
||||
memmove(buf, data, len(data))
|
||||
self._buffered -= len(data)
|
||||
return len(data)
|
||||
|
||||
def _put_bytes(self, arg, n, buf):
|
||||
self._output.write(string_at(buf, n))
|
||||
return n
|
||||
|
||||
def _at_eof(self, arg):
|
||||
if self._flushing and self._buffered == 0:
|
||||
return 1
|
||||
return 0
|
||||
|
||||
def _mark_frame(self, arg, uncomp, comp):
|
||||
self._rtable.append((uncomp, comp))
|
||||
return
|
||||
|
||||
def _compress_block(self):
|
||||
rv = lzx_compress_block(self._lzx, self._blocksize, 1)
|
||||
if rv != 0:
|
||||
raise LzxError("lzx_compress_block() failed with %d" % rv)
|
||||
if self._reset:
|
||||
lzx_reset(self._lzx)
|
||||
|
||||
def compress(self, data, flush=False):
|
||||
self._add_input(data)
|
||||
self._flushing = flush
|
||||
while self._buffered >= self._blocksize:
|
||||
self._compress_block()
|
||||
if self._buffered > 0 and flush:
|
||||
self._compress_block()
|
||||
self._reset_input()
|
||||
data = self._reset_output()
|
||||
rtable = self._reset_rtable()
|
||||
return (data, rtable)
|
||||
|
||||
def flush(self):
|
||||
self._flushing = True
|
||||
if self._buffered > 0:
|
||||
self._compress_block()
|
||||
self._reset_input()
|
||||
data = self._reset_output()
|
||||
rtable = self._reset_rtable()
|
||||
return (data, rtable)
|
||||
|
||||
def close(self):
|
||||
if self._lzx:
|
||||
lzx_finish(self._lzx, self._results)
|
||||
self._lzx = None
|
||||
pass
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *exc_info):
|
||||
self.close()
|
||||
|
||||
def __del__(self):
|
||||
self.close()
|
||||
|
||||
|
||||
def main(argv=sys.argv):
|
||||
wbits, inf, outf = argv[1:]
|
||||
with open(inf, 'rb') as f:
|
||||
data = f.read()
|
||||
with Compressor(int(wbits)) as lzx:
|
||||
data, rtable = lzx.compress(data, flush=True)
|
||||
print rtable
|
||||
with open(outf, 'wb') as f:
|
||||
f.write(data)
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
690
src/calibre/ebooks/lit/oeb.py
Normal file
690
src/calibre/ebooks/lit/oeb.py
Normal file
@ -0,0 +1,690 @@
|
||||
from __future__ import with_statement
|
||||
import os
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from types import StringTypes
|
||||
from itertools import izip, count
|
||||
from urlparse import urldefrag
|
||||
from lxml import etree
|
||||
|
||||
XML_PARSER = etree.XMLParser(
|
||||
remove_blank_text=True, recover=True, resolve_entities=False)
|
||||
XHTML_NS = 'http://www.w3.org/1999/xhtml'
|
||||
OPF1_NS = 'http://openebook.org/namespaces/oeb-package/1.0/'
|
||||
OPF2_NS = 'http://www.idpf.org/2007/opf'
|
||||
DC09_NS = 'http://purl.org/metadata/dublin_core'
|
||||
DC10_NS = 'http://purl.org/dc/elements/1.0/'
|
||||
DC11_NS = 'http://purl.org/dc/elements/1.1/'
|
||||
XSI_NS = 'http://www.w3.org/2001/XMLSchema-instance'
|
||||
DCTERMS_NS = 'http://purl.org/dc/terms/'
|
||||
NCX_NS = 'http://www.daisy.org/z3986/2005/ncx/'
|
||||
XPNSMAP = {'h': XHTML_NS, 'o1': OPF1_NS, 'o2': OPF2_NS,
|
||||
'd09': DC09_NS, 'd10': DC10_NS, 'd11': DC11_NS,
|
||||
'xsi': XSI_NS, 'dt': DCTERMS_NS, 'ncx': NCX_NS}
|
||||
|
||||
def XHTML(name): return '{%s}%s' % (XHTML_NS, name)
|
||||
def OPF(name): return '{%s}%s' % (OPF2_NS, name)
|
||||
def DC(name): return '{%s}%s' % (DC11_NS, name)
|
||||
def NCX(name): return '{%s}%s' % (NCX_NS, name)
|
||||
|
||||
XHTML_MIME = 'application/xhtml+xml'
|
||||
CSS_MIME = 'text/css'
|
||||
NCX_MIME = 'application/x-dtbncx+xml'
|
||||
OPF_MIME = 'application/oebps-package+xml'
|
||||
|
||||
OEB_STYLES = set([CSS_MIME, 'text/x-oeb1-css', 'text/x-oeb-css'])
|
||||
OEB_DOCS = set([XHTML_MIME, 'text/html', 'text/x-oeb1-document',
|
||||
'text/x-oeb-document'])
|
||||
|
||||
|
||||
def element(parent, *args, **kwargs):
|
||||
if parent is not None:
|
||||
return etree.SubElement(parent, *args, **kwargs)
|
||||
return etree.Element(*args, **kwargs)
|
||||
|
||||
def namespace(name):
|
||||
if '}' in name:
|
||||
return name.split('}', 1)[0][1:]
|
||||
return ''
|
||||
|
||||
def barename(name):
|
||||
if '}' in name:
|
||||
return name.split('}', 1)[1]
|
||||
return name
|
||||
|
||||
def xpath(elem, expr):
|
||||
return elem.xpath(expr, namespaces=XPNSMAP)
|
||||
|
||||
|
||||
class AbstractContainer(object):
|
||||
def read_xml(self, path):
|
||||
return etree.fromstring(
|
||||
self.read(path), parser=XML_PARSER,
|
||||
base_url=os.path.dirname(path))
|
||||
|
||||
class DirContainer(AbstractContainer):
|
||||
def __init__(self, rootdir):
|
||||
self.rootdir = rootdir
|
||||
|
||||
def read(self, path):
|
||||
path = os.path.join(self.rootdir, path)
|
||||
with open(path, 'rb') as f:
|
||||
return f.read()
|
||||
|
||||
def write(self, path, data):
|
||||
path = os.path.join(self.rootdir, path)
|
||||
with open(path, 'wb') as f:
|
||||
return f.write(data)
|
||||
|
||||
|
||||
class Metadata(object):
|
||||
TERMS = set(['contributor', 'coverage', 'creator', 'date', 'description',
|
||||
'format', 'identifier', 'language', 'publisher', 'relation',
|
||||
'rights', 'source', 'subject', 'title', 'type'])
|
||||
OPF1_NSMAP = {'dc': DC11_NS, 'oebpackage': OPF1_NS}
|
||||
OPF2_NSMAP = {'opf': OPF2_NS, 'dc': DC11_NS, 'dcterms': DCTERMS_NS,
|
||||
'xsi': XSI_NS}
|
||||
|
||||
class Item(object):
|
||||
def __init__(self, term, value, fq_attrib={}):
|
||||
if term == OPF('meta') and not value:
|
||||
fq_attrib = dict(fq_attrib)
|
||||
term = fq_attrib.pop('name')
|
||||
value = fq_attrib.pop('content')
|
||||
elif term in Metadata.TERMS and not namespace(term):
|
||||
term = DC(term)
|
||||
self.term = term
|
||||
self.value = value
|
||||
self.fq_attrib = dict(fq_attrib)
|
||||
self.attrib = attrib = {}
|
||||
for fq_attr in fq_attrib:
|
||||
attr = barename(fq_attr)
|
||||
attrib[attr] = fq_attrib[fq_attr]
|
||||
|
||||
def __getattr__(self, name):
|
||||
name = name.replace('_', '-')
|
||||
try:
|
||||
return self.attrib[name]
|
||||
except KeyError:
|
||||
raise AttributeError(
|
||||
'%r object has no attribute %r' \
|
||||
% (self.__class__.__name__, name))
|
||||
|
||||
def __repr__(self):
|
||||
return 'Item(term=%r, value=%r, attrib=%r)' \
|
||||
% (barename(self.term), self.value, self.attrib)
|
||||
|
||||
def __str__(self):
|
||||
return str(self.value)
|
||||
|
||||
def __unicode__(self):
|
||||
return unicode(self.value)
|
||||
|
||||
def to_opf1(self, dcmeta=None, xmeta=None):
|
||||
if namespace(self.term) == DC11_NS:
|
||||
name = DC(barename(self.term).title())
|
||||
elem = element(dcmeta, name, attrib=self.attrib)
|
||||
elem.text = self.value
|
||||
else:
|
||||
elem = element(xmeta, 'meta', attrib=self.attrib)
|
||||
elem.attrib['name'] = self.term
|
||||
elem.attrib['content'] = self.value
|
||||
return elem
|
||||
|
||||
def to_opf2(self, parent=None):
|
||||
if namespace(self.term) == DC11_NS:
|
||||
elem = element(parent, self.term, attrib=self.fq_attrib)
|
||||
elem.text = self.value
|
||||
else:
|
||||
elem = element(parent, OPF('meta'), attrib=self.fq_attrib)
|
||||
elem.attrib['name'] = self.term
|
||||
elem.attrib['content'] = self.value
|
||||
return elem
|
||||
|
||||
def __init__(self, oeb):
|
||||
self.oeb = oeb
|
||||
self.items = defaultdict(list)
|
||||
|
||||
def add(self, term, value, attrib):
|
||||
item = self.Item(term, value, attrib)
|
||||
items = self.items[barename(term)]
|
||||
items.append(item)
|
||||
return item
|
||||
|
||||
def iterkeys(self):
|
||||
for key in self.items:
|
||||
yield key
|
||||
__iter__ = iterkeys
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self.items[key]
|
||||
|
||||
def __contains__(self, key):
|
||||
return key in self.items
|
||||
|
||||
def __getattr__(self, term):
|
||||
return self.items[term]
|
||||
|
||||
def to_opf1(self, parent=None):
|
||||
elem = element(parent, 'metadata')
|
||||
dcmeta = element(elem, 'dc-metadata', nsmap=self.OPF1_NSMAP)
|
||||
xmeta = element(elem, 'x-metadata')
|
||||
for term in self.items:
|
||||
for item in self.items[term]:
|
||||
item.to_opf1(dcmeta, xmeta)
|
||||
if 'ms-chaptertour' not in self.items:
|
||||
chaptertour = self.Item('ms-chaptertour', 'chaptertour')
|
||||
chaptertour.to_opf1(dcmeta, xmeta)
|
||||
return elem
|
||||
|
||||
def to_opf2(self, parent=None):
|
||||
elem = element(parent, OPF('metadata'), nsmap=self.NSMAP)
|
||||
for term in self.items:
|
||||
for item in self.items[term]:
|
||||
item.to_opf2(elem)
|
||||
return elem
|
||||
|
||||
|
||||
class Manifest(object):
|
||||
class Item(object):
|
||||
def __init__(self, id, href, media_type, loader=str):
|
||||
self.id = id
|
||||
self.href = self.path = href.replace('%20', ' ')
|
||||
self.media_type = media_type
|
||||
self.spine_position = None
|
||||
self.linear = True
|
||||
self._loader = loader
|
||||
self._data = None
|
||||
|
||||
def __repr__(self):
|
||||
return 'Item(id=%r, href=%r, media_type=%r)' \
|
||||
% (self.id, self.href, self.media_type)
|
||||
|
||||
def data():
|
||||
def fget(self):
|
||||
if self._data:
|
||||
return self._data
|
||||
data = self._loader(self.href)
|
||||
if self.media_type == XHTML_MIME:
|
||||
data = etree.fromstring(data, parser=XML_PARSER)
|
||||
if namespace(data.tag) != XHTML_NS:
|
||||
data.attrib['xmlns'] = XHTML_NS
|
||||
data = etree.tostring(data)
|
||||
data = etree.fromstring(data, parser=XML_PARSER)
|
||||
elif self.media_type.startswith('application/') \
|
||||
and self.media_type.endswith('+xml'):
|
||||
data = etree.fromstring(data, parser=XML_PARSER)
|
||||
return data
|
||||
def fset(self, value):
|
||||
self._data = value
|
||||
def fdel(self):
|
||||
self._data = None
|
||||
return property(fget, fset, fdel)
|
||||
data = data()
|
||||
|
||||
def __cmp__(self, other):
|
||||
result = cmp(self.spine_position, other.spine_position)
|
||||
if result != 0:
|
||||
return result
|
||||
return cmp(self.id, other.id)
|
||||
|
||||
def __init__(self, oeb):
|
||||
self.oeb = oeb
|
||||
self.items = {}
|
||||
self.hrefs = {}
|
||||
|
||||
def add(self, id, href, media_type):
|
||||
item = self.Item(id, href, media_type, self.oeb.container.read)
|
||||
self.items[id] = item
|
||||
self.hrefs[href] = item
|
||||
return item
|
||||
|
||||
def remove(self, id):
|
||||
href = self.items[id].href
|
||||
del self.items[id]
|
||||
del self.hrefs[href]
|
||||
|
||||
def __iter__(self):
|
||||
for id in self.items:
|
||||
yield id
|
||||
|
||||
def __getitem__(self, id):
|
||||
return self.items[id]
|
||||
|
||||
def values(self):
|
||||
for item in self.items.values():
|
||||
yield item
|
||||
|
||||
def items(self):
|
||||
for id, item in self.refs.items():
|
||||
yield id, items
|
||||
|
||||
def __contains__(self, key):
|
||||
return id in self.items
|
||||
|
||||
def to_opf1(self, parent=None):
|
||||
elem = element(parent, 'manifest')
|
||||
for item in self.items.values():
|
||||
attrib = {'id': item.id, 'href': item.href,
|
||||
'media-type': item.media_type}
|
||||
element(elem, 'item', attrib=attrib)
|
||||
return elem
|
||||
|
||||
def to_opf2(self, parent=None):
|
||||
elem = element(parent, OPF('manifest'))
|
||||
for item in self.items.values():
|
||||
attrib = {'id': item.id, 'href': item.href,
|
||||
'media-type': item.media_type}
|
||||
element(elem, OPF('item'), attrib=attrib)
|
||||
return elem
|
||||
|
||||
|
||||
class Spine(object):
|
||||
def __init__(self, oeb):
|
||||
self.oeb = oeb
|
||||
self.items = []
|
||||
|
||||
def add(self, item, linear):
|
||||
if isinstance(linear, StringTypes):
|
||||
linear = linear.lower()
|
||||
if linear is None or linear in ('yes', 'true'):
|
||||
linear = True
|
||||
elif linear in ('no', 'false'):
|
||||
linear = False
|
||||
item.linear = linear
|
||||
item.spine_position = len(self.items)
|
||||
self.items.append(item)
|
||||
return item
|
||||
|
||||
def __iter__(self):
|
||||
for item in self.items:
|
||||
yield item
|
||||
|
||||
def __getitem__(self, index):
|
||||
return self.items[index]
|
||||
|
||||
def __len__(self):
|
||||
return len(self.items)
|
||||
|
||||
def __contains__(self, item):
|
||||
return (item in self.items)
|
||||
|
||||
def to_opf1(self, parent=None):
|
||||
elem = element(parent, 'spine')
|
||||
for item in self.items:
|
||||
if item.linear:
|
||||
element(elem, 'itemref', attrib={'idref': item.id})
|
||||
return elem
|
||||
|
||||
def to_opf2(self, parent=None):
|
||||
elem = element(parent, OPF('spine'))
|
||||
for item in self.items:
|
||||
attrib = {'idref': item.id}
|
||||
if not item.linear:
|
||||
attrib['linear'] = 'no'
|
||||
element(elem, OPF('itemref'), attrib=attrib)
|
||||
return elem
|
||||
|
||||
|
||||
class Guide(object):
|
||||
class Reference(object):
|
||||
def __init__(self, type, title, href):
|
||||
self.type = type
|
||||
self.title = title
|
||||
self.href = href
|
||||
|
||||
def __repr__(self):
|
||||
return 'Reference(type=%r, title=%r, href=%r)' \
|
||||
% (self.type, self.title, self.href)
|
||||
|
||||
def __init__(self, oeb):
|
||||
self.oeb = oeb
|
||||
self.refs = {}
|
||||
|
||||
def add(self, type, title, href):
|
||||
ref = self.Reference(type, title, href)
|
||||
self.refs[type] = ref
|
||||
return ref
|
||||
|
||||
def by_type(self, type):
|
||||
return self.ref_types[type]
|
||||
|
||||
def iterkeys(self):
|
||||
for type in self.refs:
|
||||
yield type
|
||||
__iter__ = iterkeys
|
||||
|
||||
def values(self):
|
||||
for ref in self.refs.values():
|
||||
yield ref
|
||||
|
||||
def items(self):
|
||||
for type, ref in self.refs.items():
|
||||
yield type, ref
|
||||
|
||||
def __getitem__(self, index):
|
||||
return self.refs[index]
|
||||
|
||||
def __contains__(self, key):
|
||||
return key in self.refs
|
||||
|
||||
def to_opf1(self, parent=None):
|
||||
elem = element(parent, 'guide')
|
||||
for ref in self.refs.values():
|
||||
attrib = {'type': ref.type, 'href': ref.href}
|
||||
if ref.title:
|
||||
attrib['title'] = ref.title
|
||||
element(elem, 'reference', attrib=attrib)
|
||||
return elem
|
||||
|
||||
def to_opf2(self, parent=None):
|
||||
elem = element(parent, OPF('guide'))
|
||||
for ref in self.refs.values():
|
||||
attrib = {'type': ref.type, 'href': ref.href}
|
||||
if ref.title:
|
||||
attrib['title'] = ref.title
|
||||
element(elem, OPF('reference'), attrib=attrib)
|
||||
return elem
|
||||
|
||||
|
||||
class Toc(object):
|
||||
def __init__(self, title=None, href=None, klass=None, id=None):
|
||||
self.title = title
|
||||
self.href = href
|
||||
self.klass = klass
|
||||
self.id = id
|
||||
self.nodes = []
|
||||
|
||||
def add(self, title, href, klass=None, id=None):
|
||||
node = Toc(title, href, klass, id)
|
||||
self.nodes.append(node)
|
||||
return node
|
||||
|
||||
def __iter__(self):
|
||||
for node in self.nodes:
|
||||
yield node
|
||||
|
||||
def __getitem__(self, index):
|
||||
return self.nodes[index]
|
||||
|
||||
def depth(self, level=0):
|
||||
if self.nodes:
|
||||
return self.nodes[0].depth(level+1)
|
||||
return level
|
||||
|
||||
def to_opf1(self, tour):
|
||||
for node in self.nodes:
|
||||
element(tour, 'site',
|
||||
attrib={'title': node.title, 'href': node.href})
|
||||
node.to_opf1(tour)
|
||||
return tour
|
||||
|
||||
def to_ncx(self, parent, playorder=None, depth=1):
|
||||
if not playorder: playorder = [0]
|
||||
for node in self.nodes:
|
||||
playorder[0] += 1
|
||||
point = etree.SubElement(parent,
|
||||
NCX('navPoint'), attrib={'playOrder': str(playorder[0])})
|
||||
if self.klass:
|
||||
point.attrib['class'] = self.klass
|
||||
if self.id:
|
||||
point.attrib['id'] = self.id
|
||||
label = etree.SubElement(point, NCX('navLabel'))
|
||||
etree.SubElement(label, NCX('text')).text = node.title
|
||||
href = node.href if depth > 1 else node.href.split('#', 1)[0]
|
||||
etree.SubElement(point, NCX('content'), attrib={'src': href})
|
||||
node.to_ncx(point, playorder, depth+1)
|
||||
return parent
|
||||
|
||||
|
||||
class Oeb(object):
|
||||
def __init__(self, opfpath, container=None):
|
||||
if not container:
|
||||
container = DirContainer(os.path.dirname(opfpath))
|
||||
opfpath = os.path.basename(opfpath)
|
||||
self.container = container
|
||||
opf = self._read_opf(opfpath)
|
||||
self._all_from_opf(opf)
|
||||
|
||||
def _convert_opf1(self, opf):
|
||||
nroot = etree.Element(OPF('package'),
|
||||
nsmap={None: OPF2_NS}, version="2.0", **dict(opf.attrib))
|
||||
metadata = etree.SubElement(nroot, OPF('metadata'),
|
||||
nsmap={'opf': OPF2_NS, 'dc': DC11_NS,
|
||||
'xsi': XSI_NS, 'dcterms': DCTERMS_NS})
|
||||
for prefix in ('d11', 'd10', 'd09'):
|
||||
elements = xpath(opf, 'metadata/dc-metadata/%s:*' % prefix)
|
||||
if elements: break
|
||||
for element in elements:
|
||||
if not element.text: continue
|
||||
tag = barename(element.tag).lower()
|
||||
element.tag = '{%s}%s' % (DC11_NS, tag)
|
||||
for name in element.attrib:
|
||||
if name in ('role', 'file-as', 'scheme'):
|
||||
nsname = '{%s}%s' % (OPF2_NS, name)
|
||||
element.attrib[nsname] = element.attrib[name]
|
||||
del element.attrib[name]
|
||||
metadata.append(element)
|
||||
for element in opf.xpath('metadata/x-metadata/meta'):
|
||||
metadata.append(element)
|
||||
for item in opf.xpath('manifest/item'):
|
||||
media_type = item.attrib['media-type']
|
||||
if media_type in OEB_DOCS:
|
||||
media_type = XHTML_MIME
|
||||
elif media_type in OEB_STYLES:
|
||||
media_type = CSS_MIME
|
||||
item.attrib['media-type'] = media_type
|
||||
for tag in ('manifest', 'spine', 'tours', 'guide'):
|
||||
for element in opf.xpath(tag):
|
||||
nroot.append(element)
|
||||
return etree.fromstring(etree.tostring(nroot), parser=XML_PARSER)
|
||||
|
||||
def _read_opf(self, opfpath):
|
||||
opf = self.container.read_xml(opfpath)
|
||||
version = float(opf.get('version', 1.0))
|
||||
if version < 2.0:
|
||||
opf = self._convert_opf1(opf)
|
||||
return opf
|
||||
|
||||
def _metadata_from_opf(self, opf):
|
||||
uid = opf.attrib['unique-identifier']
|
||||
self.metadata = metadata = Metadata(self)
|
||||
for elem in xpath(opf, '/o2:package/o2:metadata/*'):
|
||||
metadata.add(elem.tag, elem.text, elem.attrib)
|
||||
for item in metadata.identifier:
|
||||
if item.id == uid:
|
||||
self.uid = item
|
||||
break
|
||||
|
||||
def _manifest_from_opf(self, opf):
|
||||
self.manifest = manifest = Manifest(self)
|
||||
for elem in xpath(opf, '/o2:package/o2:manifest/o2:item'):
|
||||
manifest.add(elem.get('id'), elem.get('href'),
|
||||
elem.get('media-type'))
|
||||
|
||||
def _spine_from_opf(self, opf):
|
||||
self.spine = spine = Spine(self)
|
||||
for elem in xpath(opf, '/o2:package/o2:spine/o2:itemref'):
|
||||
item = self.manifest[elem.get('idref')]
|
||||
spine.add(item, elem.get('linear'))
|
||||
extras = []
|
||||
for item in self.manifest.values():
|
||||
if item.media_type == XHTML_MIME \
|
||||
and item not in spine:
|
||||
extras.append(item)
|
||||
extras.sort()
|
||||
for item in extras:
|
||||
spine.add(item, False)
|
||||
|
||||
def _guide_from_opf(self, opf):
|
||||
self.guide = guide = Guide(self)
|
||||
for elem in xpath(opf, '/o2:package/o2:guide/o2:reference'):
|
||||
guide.add(elem.get('type'), elem.get('title'), elem.get('href'))
|
||||
|
||||
def _toc_from_navpoint(self, toc, navpoint):
|
||||
children = xpath(navpoint, 'ncx:navPoint')
|
||||
for child in children:
|
||||
title = xpath(child, 'ncx:navLabel/ncx:text/text()')[0]
|
||||
href = xpath(child, 'ncx:content/@src')[0]
|
||||
id = child.get('id')
|
||||
klass = child.get('class')
|
||||
node = toc.add(title, href, id=id, klass=klass)
|
||||
self._toc_from_navpoint(node, child)
|
||||
|
||||
def _toc_from_ncx(self, opf):
|
||||
result = xpath(opf, '/o2:package/o2:spine/@toc')
|
||||
if not result:
|
||||
return False
|
||||
id = result[0]
|
||||
ncx = self.manifest[id].data
|
||||
self.manifest.remove(id)
|
||||
title = xpath(ncx, 'ncx:docTitle/ncx:text/text()')[0]
|
||||
self.toc = toc = Toc(title)
|
||||
navmaps = xpath(ncx, 'ncx:navMap')
|
||||
for navmap in navmaps:
|
||||
self._toc_from_navpoint(toc, navmap)
|
||||
return True
|
||||
|
||||
def _toc_from_tour(self, opf):
|
||||
result = xpath(opf, '/o2:package/o2:tours/o2:tour')
|
||||
if not result:
|
||||
return False
|
||||
tour = result[0]
|
||||
self.toc = toc = Toc(tour.get('title'))
|
||||
sites = xpath(tour, 'o2:site')
|
||||
for site in sites:
|
||||
toc.add(site.get('title'), site.get('href'))
|
||||
return True
|
||||
|
||||
def _toc_from_html(self, opf):
|
||||
if 'toc' not in self.guide:
|
||||
return False
|
||||
self.toc = toc = Toc()
|
||||
itempath, frag = urldefrag(self.guide['toc'].href)
|
||||
item = self.manifest.hrefs[itempath]
|
||||
html = item.data
|
||||
if frag:
|
||||
elem = xpath(html, './/*[@id="%s"]' % frag)
|
||||
html = elem[0] if elem else html
|
||||
titles = defaultdict(list)
|
||||
order = []
|
||||
for anchor in xpath(html, './/h:a[@href]'):
|
||||
href = anchor.attrib['href']
|
||||
path, frag = urldefrag(href)
|
||||
if not path:
|
||||
href = '#'.join((itempath, frag))
|
||||
title = ' '.join(xpath(anchor, './/text()'))
|
||||
if href not in titles:
|
||||
order.append(href)
|
||||
titles[href].append(title)
|
||||
for href in order:
|
||||
toc.add(' '.join(titles[href]), href)
|
||||
return True
|
||||
|
||||
def _toc_from_spine(self, opf):
|
||||
self.toc = toc = Toc()
|
||||
titles = []
|
||||
headers = []
|
||||
for item in self.spine:
|
||||
if not item.linear: continue
|
||||
html = item.data
|
||||
title = xpath(html, '/h:html/h:head/h:title/text()')
|
||||
if title: titles.append(title[0])
|
||||
headers.append('(unlabled)')
|
||||
for tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'strong'):
|
||||
expr = '/h:html/h:body//h:%s[position()=1]/text()' % (tag,)
|
||||
header = xpath(html, expr)
|
||||
if header:
|
||||
headers[-1] = header[0]
|
||||
break
|
||||
use = titles
|
||||
if len(titles) > len(set(titles)):
|
||||
use = headers
|
||||
for title, item in izip(use, self.spine):
|
||||
if not item.linear: continue
|
||||
toc.add(title, item.href)
|
||||
return True
|
||||
|
||||
def _toc_from_opf(self, opf):
|
||||
if self._toc_from_ncx(opf): return
|
||||
if self._toc_from_tour(opf): return
|
||||
if self._toc_from_html(opf): return
|
||||
self._toc_from_spine(opf)
|
||||
|
||||
def _all_from_opf(self, opf):
|
||||
self._metadata_from_opf(opf)
|
||||
self._manifest_from_opf(opf)
|
||||
self._spine_from_opf(opf)
|
||||
self._guide_from_opf(opf)
|
||||
self._toc_from_opf(opf)
|
||||
|
||||
def to_opf1(self):
|
||||
package = etree.Element('package',
|
||||
attrib={'unique-identifier': self.uid.id})
|
||||
metadata = self.metadata.to_opf1(package)
|
||||
manifest = self.manifest.to_opf1(package)
|
||||
spine = self.spine.to_opf1(package)
|
||||
tours = element(package, 'tours')
|
||||
tour = element(tours, 'tour',
|
||||
attrib={'id': 'chaptertour', 'title': 'Chapter Tour'})
|
||||
self.toc.to_opf1(tour)
|
||||
guide = self.guide.to_opf1(package)
|
||||
return {OPF_MIME: ('content.opf', package)}
|
||||
|
||||
def _generate_ncx_item(self):
|
||||
id = 'ncx'
|
||||
index = 0
|
||||
while id in self.manifest:
|
||||
id = 'ncx' + str(index)
|
||||
index = index + 1
|
||||
href = 'toc'
|
||||
index = 0
|
||||
while (href + '.ncx') in self.manifest.hrefs:
|
||||
href = 'toc' + str(index)
|
||||
href += '.ncx'
|
||||
return (id, href)
|
||||
|
||||
def _to_ncx(self):
|
||||
ncx = etree.Element(NCX('ncx'), attrib={'version': '2005-1'},
|
||||
nsmap={None: NCX_NS})
|
||||
head = etree.SubElement(ncx, NCX('head'))
|
||||
etree.SubElement(head, NCX('meta'),
|
||||
attrib={'name': 'dtb:uid', 'content': unicode(self.uid)})
|
||||
etree.SubElement(head, NCX('meta'),
|
||||
attrib={'name': 'dtb:depth', 'content': str(self.toc.depth())})
|
||||
etree.SubElement(head, NCX('meta'),
|
||||
attrib={'name': 'dtb:totalPageCount', 'content': '0'})
|
||||
etree.SubElement(head, NCX('meta'),
|
||||
attrib={'name': 'dtb:maxPageNumber', 'content': '0'})
|
||||
title = etree.SubElement(ncx, NCX('docTitle'))
|
||||
text = etree.SubElement(title, NCX('text'))
|
||||
text.text = unicode(self.metadata.title[0])
|
||||
navmap = etree.SubElement(ncx, NCX('navMap'))
|
||||
self.toc.to_ncx(navmap)
|
||||
return ncx
|
||||
|
||||
def to_opf2(self):
|
||||
package = etree.Element(OPF('package'),
|
||||
attrib={'version': '2.0', 'unique-identifier': self.uid.id},
|
||||
nsmap={None: OPF2_NS})
|
||||
metadata = self.metadata.to_opf2(package)
|
||||
manifest = self.manifest.to_opf2(package)
|
||||
id, href = self._generate_ncx_item()
|
||||
etree.SubElement(manifest, OPF('item'),
|
||||
attrib={'id': id, 'href': href, 'media-type': NCX_MIME})
|
||||
spine = self.spine.to_opf2(package)
|
||||
spine.attrib['toc'] = id
|
||||
guide = self.guide.to_opf2(package)
|
||||
ncx = self._to_ncx()
|
||||
return {OPF_MIME: ('content.opf', package),
|
||||
NCX_MIME: (href, ncx)}
|
||||
|
||||
def main(argv=sys.argv):
|
||||
for arg in argv[1:]:
|
||||
oeb = Oeb(arg)
|
||||
for name, doc in oeb.to_opf2().items():
|
||||
print etree.tostring(doc, pretty_print=True)
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
149
src/calibre/ebooks/lit/split.py
Normal file
149
src/calibre/ebooks/lit/split.py
Normal file
@ -0,0 +1,149 @@
|
||||
#! /usr/bin/python
|
||||
|
||||
from __future__ import with_statement
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
import types
|
||||
import copy
|
||||
import itertools
|
||||
from collections import defaultdict
|
||||
from lxml import etree
|
||||
from stylizer import Page, Stylizer, Style
|
||||
|
||||
XHTML_NS = 'http://www.w3.org/1999/xhtml'
|
||||
XPNSMAP = {'h': XHTML_NS,}
|
||||
|
||||
class Splitter(object):
|
||||
XML_PARSER = etree.XMLParser(remove_blank_text=True)
|
||||
COLLAPSE = re.compile(r'[ \n\r]+')
|
||||
CONTENT_TAGS = set(['img', 'object', 'embed'])
|
||||
for tag in list(CONTENT_TAGS):
|
||||
CONTENT_TAGS.add('{%s}%s' % (XHTML_NS, tag))
|
||||
|
||||
def __init__(self, path):
|
||||
with open(path, 'rb') as f:
|
||||
self.tree = etree.parse(f, parser=self.XML_PARSER)
|
||||
self.stylizer = Stylizer(self.tree, path)
|
||||
self.path = path
|
||||
self.basename = os.path.splitext(
|
||||
os.path.basename(path))[0].lower()
|
||||
self.splits = []
|
||||
self.names = []
|
||||
self.idmap = {}
|
||||
self.fonts = defaultdict(int)
|
||||
self.content = False
|
||||
|
||||
def split(self):
|
||||
tree = self.tree
|
||||
for prefix in ('', 'h:'):
|
||||
d = {'h': prefix}
|
||||
roots = tree.xpath('/%(h)shtml' % d, namespaces=XPNSMAP)
|
||||
if roots: break
|
||||
self.root, = roots
|
||||
self.head, = tree.xpath('/%(h)shtml/%(h)shead' % d, namespaces=XPNSMAP)
|
||||
body, = tree.xpath('/%(h)shtml/%(h)sbody' % d, namespaces=XPNSMAP)
|
||||
self._split(body, [self.new_root(str(self.basename))], 9.0)
|
||||
results = zip(self.names, self.splits)
|
||||
self.post_process_links(results, d)
|
||||
return results
|
||||
|
||||
def new_root(self, name):
|
||||
nroot = self.dup(self.root)
|
||||
nroot.append(copy.deepcopy(self.head))
|
||||
self.splits.append(nroot)
|
||||
self.names.append(name + '.html')
|
||||
return nroot
|
||||
|
||||
def dup(self, e):
|
||||
new = etree.Element(e.tag, nsmap=e.nsmap, **dict(e.attrib))
|
||||
new.text = e.text
|
||||
new.tail = e.tail
|
||||
return new
|
||||
|
||||
def dupsub(self, p, e):
|
||||
new = etree.SubElement(p, e.tag, nsmap=e.nsmap, **dict(e.attrib))
|
||||
new.text = e.text
|
||||
new.tail = e.tail
|
||||
return new
|
||||
|
||||
def _split(self, src, dstq, psize):
|
||||
style = self.stylizer.style(src)
|
||||
if self.new_page(style, 'before'):
|
||||
self.new_split(src, dstq)
|
||||
attrib = src.attrib
|
||||
name = self.names[-1]
|
||||
for aname in ('id', 'name'):
|
||||
if aname in attrib:
|
||||
self.idmap[attrib[aname]] = name
|
||||
text = self.COLLAPSE.sub(' ', src.text or '')
|
||||
tail = self.COLLAPSE.sub(' ', src.text or '')
|
||||
if text or tail or src.tag.lower() in self.CONTENT_TAGS:
|
||||
self.content = True
|
||||
size = style['font-size']
|
||||
self.fonts[size] += len(text)
|
||||
self.fonts[psize] += len(tail)
|
||||
new = self.dupsub(dstq[-1], src)
|
||||
if len(src) > 0:
|
||||
dstq.append(new)
|
||||
for child in src:
|
||||
self._split(child, dstq, size)
|
||||
dstq.pop()
|
||||
if self.new_page(style, 'after'):
|
||||
self.new_split(src, dstq)
|
||||
|
||||
def new_page(self, style, when):
|
||||
if self.content \
|
||||
and (style['page-break-%s' % when] \
|
||||
in ('always', 'odd', 'even')):
|
||||
return True
|
||||
return False
|
||||
|
||||
def new_split(self, src, dstq):
|
||||
name = self.basename
|
||||
attrib = src.attrib
|
||||
if 'class' in attrib:
|
||||
name = src.attrib['class']
|
||||
if ' ' in name:
|
||||
name = name.split(' ', 2)[0]
|
||||
if 'id' in attrib:
|
||||
name = '%s-%s' % (name, attrib['id'])
|
||||
name = name.lower().replace('_', '-')
|
||||
if (name + '.html') in self.names:
|
||||
name = '%s-%02d' % (name, len(self.names))
|
||||
prev = None
|
||||
for i in xrange(len(dstq)):
|
||||
new = self.new_root(name) if prev is None \
|
||||
else self.dupsub(prev, dstq[i])
|
||||
prev = dstq[i] = new
|
||||
self.content = False
|
||||
|
||||
def post_process_links(self, results, prefixes):
|
||||
basename = os.path.basename(self.path)
|
||||
query = '//%(h)sa[@href]' % prefixes
|
||||
for name, root in results:
|
||||
elements = root.xpath(query, namespaces=XPNSMAP)
|
||||
for element in elements:
|
||||
href = element.attrib['href']
|
||||
if '#' not in href: continue
|
||||
fname, id = href.split('#', 2)
|
||||
if fname in ('', basename):
|
||||
href = '#'.join((self.idmap[id], id))
|
||||
element.attrib['href'] = href
|
||||
|
||||
def main():
|
||||
def xml2str(root):
|
||||
return etree.tostring(root, pretty_print=True,
|
||||
encoding='utf-8', xml_declaration=True)
|
||||
tree = None
|
||||
path = sys.argv[1]
|
||||
dest = sys.argv[2]
|
||||
splitter = Splitter(path)
|
||||
for name, root in splitter.split():
|
||||
print name
|
||||
with open(os.path.join(dest, name), 'wb') as f:
|
||||
f.write(xml2str(root))
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
435
src/calibre/ebooks/lit/stylizer.py
Normal file
435
src/calibre/ebooks/lit/stylizer.py
Normal file
@ -0,0 +1,435 @@
|
||||
#! /usr/bin/python2.5
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
from __future__ import with_statement
|
||||
import sys
|
||||
import os
|
||||
import locale
|
||||
import codecs
|
||||
import itertools
|
||||
import types
|
||||
import re
|
||||
import copy
|
||||
import cssutils
|
||||
from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \
|
||||
CSSValueList, cssproperties
|
||||
from lxml import etree
|
||||
from calibre.ebooks.lit.oeb import XHTML_NS, CSS_MIME, OEB_STYLES, barename
|
||||
from calibre.resources import html_css
|
||||
|
||||
HTML_CSS_STYLESHEET = cssutils.parseString(html_css)
|
||||
XHTML_CSS_NAMESPACE = "@namespace url(http://www.w3.org/1999/xhtml);\n"
|
||||
|
||||
INHERITED = set(['azimuth', 'border-collapse', 'border-spacing',
|
||||
'caption-side', 'color', 'cursor', 'direction', 'elevation',
|
||||
'empty-cells', 'font-family', 'font-size', 'font-style',
|
||||
'font-variant', 'font-weight', 'letter-spacing',
|
||||
'line-height', 'list-style-image', 'list-style-position',
|
||||
'list-style-type', 'orphans', 'page-break-inside',
|
||||
'pitch-range', 'pitch', 'quotes', 'richness', 'speak-header',
|
||||
'speak-numeral', 'speak-punctuation', 'speak', 'speech-rate',
|
||||
'stress', 'text-align', 'text-indent', 'text-transform',
|
||||
'visibility', 'voice-family', 'volume', 'white-space',
|
||||
'widows', 'word-spacing'])
|
||||
|
||||
DEFAULTS = {'azimuth': 'center', 'background-attachment': 'scroll',
|
||||
'background-color': 'transparent', 'background-image': 'none',
|
||||
'background-position': '0% 0%', 'background-repeat': 'repeat',
|
||||
'border-bottom-color': ':color', 'border-bottom-style': 'none',
|
||||
'border-bottom-width': 'medium', 'border-collapse': 'separate',
|
||||
'border-left-color': ':color', 'border-left-style': 'none',
|
||||
'border-left-width': 'medium', 'border-right-color': ':color',
|
||||
'border-right-style': 'none', 'border-right-width': 'medium',
|
||||
'border-spacing': 0, 'border-top-color': ':color',
|
||||
'border-top-style': 'none', 'border-top-width': 'medium', 'bottom':
|
||||
'auto', 'caption-side': 'top', 'clear': 'none', 'clip': 'auto',
|
||||
'color': 'black', 'content': 'normal', 'counter-increment': 'none',
|
||||
'counter-reset': 'none', 'cue-after': 'none', 'cue-before': 'none',
|
||||
'cursor': 'auto', 'direction': 'ltr', 'display': 'inline',
|
||||
'elevation': 'level', 'empty-cells': 'show', 'float': 'none',
|
||||
'font-family': 'serif', 'font-size': 'medium', 'font-style':
|
||||
'normal', 'font-variant': 'normal', 'font-weight': 'normal',
|
||||
'height': 'auto', 'left': 'auto', 'letter-spacing': 'normal',
|
||||
'line-height': 'normal', 'list-style-image': 'none',
|
||||
'list-style-position': 'outside', 'list-style-type': 'disc',
|
||||
'margin-bottom': 0, 'margin-left': 0, 'margin-right': 0,
|
||||
'margin-top': 0, 'max-height': 'none', 'max-width': 'none',
|
||||
'min-height': 0, 'min-width': 0, 'orphans': '2',
|
||||
'outline-color': 'invert', 'outline-style': 'none',
|
||||
'outline-width': 'medium', 'overflow': 'visible', 'padding-bottom':
|
||||
0, 'padding-left': 0, 'padding-right': 0, 'padding-top': 0,
|
||||
'page-break-after': 'auto', 'page-break-before': 'auto',
|
||||
'page-break-inside': 'auto', 'pause-after': 0, 'pause-before':
|
||||
0, 'pitch': 'medium', 'pitch-range': '50', 'play-during': 'auto',
|
||||
'position': 'static', 'quotes': u"'“' '”' '‘' '’'", 'richness':
|
||||
'50', 'right': 'auto', 'speak': 'normal', 'speak-header': 'once',
|
||||
'speak-numeral': 'continuous', 'speak-punctuation': 'none',
|
||||
'speech-rate': 'medium', 'stress': '50', 'table-layout': 'auto',
|
||||
'text-align': 'left', 'text-decoration': 'none', 'text-indent':
|
||||
0, 'text-transform': 'none', 'top': 'auto', 'unicode-bidi':
|
||||
'normal', 'vertical-align': 'baseline', 'visibility': 'visible',
|
||||
'voice-family': 'default', 'volume': 'medium', 'white-space':
|
||||
'normal', 'widows': '2', 'width': 'auto', 'word-spacing': 'normal',
|
||||
'z-index': 'auto'}
|
||||
|
||||
FONT_SIZE_NAMES = set(['xx-small', 'x-small', 'small', 'medium', 'large',
|
||||
'x-large', 'xx-large'])
|
||||
|
||||
FONT_SIZE_LIST = [('xx-small', 1, 6.),
|
||||
('x-small', None, 7.),
|
||||
('small', 2, 8.),
|
||||
('medium', 3, 9.),
|
||||
('large', 4, 11.),
|
||||
('x-large', 5, 13.),
|
||||
('xx-large', 6, 15.),
|
||||
(None, 7, 17.)]
|
||||
|
||||
FONT_SIZE_BY_NAME = {}
|
||||
FONT_SIZE_BY_NUM = {}
|
||||
for name, num, size in FONT_SIZE_LIST:
|
||||
FONT_SIZE_BY_NAME[name] = size
|
||||
FONT_SIZE_BY_NUM[num] = size
|
||||
|
||||
XPNSMAP = {'h': XHTML_NS,}
|
||||
def xpath(elem, expr):
|
||||
return elem.xpath(expr, namespaces=XPNSMAP)
|
||||
|
||||
|
||||
class Page(object):
|
||||
def __init__(self, width, height, dpi):
|
||||
self.width = float(width)
|
||||
self.height = float(height)
|
||||
self.dpi = float(dpi)
|
||||
|
||||
class Profiles(object):
|
||||
PRS500 = Page(584, 754, 168.451)
|
||||
PRS505 = PRS500
|
||||
|
||||
|
||||
class Stylizer(object):
|
||||
STYLESHEETS = {}
|
||||
|
||||
def __init__(self, tree, path, oeb, page=Profiles.PRS505):
|
||||
self.page = page
|
||||
base = os.path.dirname(path)
|
||||
basename = os.path.basename(path)
|
||||
cssname = os.path.splitext(basename)[0] + '.css'
|
||||
stylesheets = [HTML_CSS_STYLESHEET]
|
||||
head = xpath(tree, '/h:html/h:head')[0]
|
||||
for elem in head:
|
||||
tag = barename(elem.tag)
|
||||
if tag == 'style':
|
||||
text = ''.join(elem.text)
|
||||
stylesheet = cssutils.parseString(text, href=cssname)
|
||||
stylesheets.append(stylesheet)
|
||||
elif tag == 'link' \
|
||||
and elem.get('rel', 'stylesheet') == 'stylesheet' \
|
||||
and elem.get('type', CSS_MIME) in OEB_STYLES:
|
||||
href = elem.attrib['href']
|
||||
path = os.path.join(base, href)
|
||||
path = os.path.normpath(path).replace('\\', '/')
|
||||
if path in self.STYLESHEETS:
|
||||
stylesheet = self.STYLESHEETS[path]
|
||||
else:
|
||||
data = XHTML_CSS_NAMESPACE
|
||||
data += oeb.manifest.hrefs[path].data
|
||||
stylesheet = cssutils.parseString(data, href=path)
|
||||
self.STYLESHEETS[path] = stylesheet
|
||||
stylesheets.append(stylesheet)
|
||||
rules = []
|
||||
index = 0
|
||||
self.stylesheets = set()
|
||||
for stylesheet in stylesheets:
|
||||
href = stylesheet.href
|
||||
self.stylesheets.add(href)
|
||||
for rule in stylesheet.cssRules:
|
||||
rules.extend(self.flatten_rule(rule, href, index))
|
||||
index = index + 1
|
||||
rules.sort()
|
||||
self.rules = rules
|
||||
self._styles = {}
|
||||
|
||||
def flatten_rule(self, rule, href, index):
|
||||
results = []
|
||||
if isinstance(rule, CSSStyleRule):
|
||||
style = self.flatten_style(rule.style)
|
||||
for selector in rule.selectorList:
|
||||
specificity = selector.specificity + (index,)
|
||||
text = selector.selectorText
|
||||
selector = list(selector.seq)
|
||||
results.append((specificity, selector, style, text, href))
|
||||
elif isinstance(rule, CSSPageRule):
|
||||
style = self.flatten_style(rule.style)
|
||||
results.append(((0, 0, 0, 0), [], style, '@page', href))
|
||||
return results
|
||||
|
||||
def flatten_style(self, cssstyle):
|
||||
style = {}
|
||||
for prop in cssstyle:
|
||||
name = prop.name
|
||||
if name in ('margin', 'padding'):
|
||||
style.update(self._normalize_edge(prop.cssValue, name))
|
||||
elif name == 'font':
|
||||
style.update(self._normalize_font(prop.cssValue))
|
||||
else:
|
||||
style[name] = prop.value
|
||||
if 'font-size' in style:
|
||||
size = style['font-size']
|
||||
if size == 'normal': size = 'medium'
|
||||
if size in FONT_SIZE_NAMES:
|
||||
style['font-size'] = "%dpt" % FONT_SIZE_BY_NAME[size]
|
||||
return style
|
||||
|
||||
def _normalize_edge(self, cssvalue, name):
|
||||
style = {}
|
||||
if isinstance(cssvalue, CSSValueList):
|
||||
primitives = [v.cssText for v in cssvalue]
|
||||
else:
|
||||
primitives = [cssvalue.cssText]
|
||||
if len(primitives) == 1:
|
||||
value, = primitives
|
||||
values = [value, value, value, value]
|
||||
elif len(primitives) == 2:
|
||||
vert, horiz = primitives
|
||||
values = [vert, horiz, vert, horiz]
|
||||
elif len(primitives) == 3:
|
||||
top, horiz, bottom = primitives
|
||||
values = [top, horiz, bottom, horiz]
|
||||
else:
|
||||
values = primitives[:4]
|
||||
edges = ('top', 'right', 'bottom', 'left')
|
||||
for edge, value in itertools.izip(edges, values):
|
||||
style["%s-%s" % (name, edge)] = value
|
||||
return style
|
||||
|
||||
def _normalize_font(self, cssvalue):
|
||||
composition = ('font-style', 'font-variant', 'font-weight',
|
||||
'font-size', 'line-height', 'font-family')
|
||||
style = {}
|
||||
if cssvalue.cssText == 'inherit':
|
||||
for key in composition:
|
||||
style[key] = 'inherit'
|
||||
else:
|
||||
primitives = [v.cssText for v in cssvalue]
|
||||
primitites.reverse()
|
||||
value = primitives.pop()
|
||||
for key in composition:
|
||||
if cssproperties.cssvalues[key](value):
|
||||
style[key] = value
|
||||
if not primitives: break
|
||||
value = primitives.pop()
|
||||
for key in composition:
|
||||
if key not in style:
|
||||
style[key] = DEFAULTS[key]
|
||||
return style
|
||||
|
||||
def style(self, element):
|
||||
try: return self._styles[element]
|
||||
except: pass
|
||||
return Style(element, self)
|
||||
|
||||
def stylesheet(self, name, font_scale=None):
|
||||
rules = []
|
||||
for _, _, style, selector, href in self.rules:
|
||||
if href != name: continue
|
||||
if font_scale and 'font-size' in style and \
|
||||
style['font-size'].endswith('pt'):
|
||||
style = copy.copy(style)
|
||||
size = float(style['font-size'][:-2])
|
||||
style['font-size'] = "%.2fpt" % (size * font_scale)
|
||||
style = ';\n '.join(': '.join(item) for item in style.items())
|
||||
rules.append('%s {\n %s;\n}' % (selector, style))
|
||||
return '\n'.join(rules)
|
||||
|
||||
class Style(object):
|
||||
def __init__(self, element, stylizer):
|
||||
self._element = element
|
||||
self._page = stylizer.page
|
||||
self._stylizer = stylizer
|
||||
self._style = self._assemble_style(element, stylizer)
|
||||
stylizer._styles[element] = self
|
||||
|
||||
def _assemble_style(self, element, stylizer):
|
||||
result = {}
|
||||
rules = stylizer.rules
|
||||
for _, selector, style, _, _ in rules:
|
||||
if self._selects_element(element, selector):
|
||||
result.update(style)
|
||||
try:
|
||||
style = CSSStyleDeclaration(element.attrib['style'])
|
||||
result.update(stylizer.flatten_style(style))
|
||||
except KeyError:
|
||||
pass
|
||||
return result
|
||||
|
||||
def _selects_element(self, element, selector):
|
||||
def _selects_element(element, items, index):
|
||||
if index == -1:
|
||||
return True
|
||||
item = items[index]
|
||||
if item.type == 'universal':
|
||||
pass
|
||||
elif item.type == 'type-selector':
|
||||
name1 = ("{%s}%s" % item.value).lower()
|
||||
name2 = element.tag.lower()
|
||||
if name1 != name2:
|
||||
return False
|
||||
elif item.type == 'id':
|
||||
name1 = item.value[1:].lower()
|
||||
name2 = element.attrib.get('id', '').lower().split()
|
||||
if name1 != name2:
|
||||
return False
|
||||
elif item.type == 'class':
|
||||
name = item.value[1:].lower()
|
||||
classes = element.attrib.get('class', '').lower().split()
|
||||
if name not in classes:
|
||||
return False
|
||||
elif item.type == 'child':
|
||||
parent = element.getparent()
|
||||
if parent is None:
|
||||
return False
|
||||
element = parent
|
||||
elif item.type == 'descendant':
|
||||
element = element.getparent()
|
||||
while element is not None:
|
||||
if _selects_element(element, items, index - 1):
|
||||
return True
|
||||
element = element.getparent()
|
||||
return False
|
||||
elif item.type == 'pseudo-class':
|
||||
if item.value == ':first-child':
|
||||
e = element.getprevious()
|
||||
if e is not None:
|
||||
return False
|
||||
else:
|
||||
return False
|
||||
elif item.type == 'pseudo-element':
|
||||
return False
|
||||
else:
|
||||
return False
|
||||
return _selects_element(element, items, index - 1)
|
||||
return _selects_element(element, selector, len(selector) - 1)
|
||||
|
||||
def _has_parent(self):
|
||||
parent = self._element.getparent()
|
||||
return (parent is not None) \
|
||||
and (parent in self._stylizer._styles)
|
||||
|
||||
def __getitem__(self, name):
|
||||
domname = cssproperties._toDOMname(name)
|
||||
if hasattr(self, domname):
|
||||
return getattr(self, domname)
|
||||
return self._unit_convert(self._get(name))
|
||||
|
||||
def _get(self, name):
|
||||
result = None
|
||||
styles = self._stylizer._styles
|
||||
if name in self._style:
|
||||
result = self._style[name]
|
||||
if (result == 'inherit'
|
||||
or (result is None and name in INHERITED
|
||||
and self._has_parent())):
|
||||
result = styles[self._element.getparent()]._get(name)
|
||||
if result is None:
|
||||
result = DEFAULTS[name]
|
||||
return result
|
||||
|
||||
def _unit_convert(self, value, base=None, font=None):
|
||||
if isinstance(value, (int, long, float)):
|
||||
return value
|
||||
try:
|
||||
if float(value) == 0:
|
||||
return 0.0
|
||||
except:
|
||||
pass
|
||||
result = value
|
||||
m = re.search(
|
||||
r"^(-*[0-9]*\.?[0-9]*)\s*(%|em|px|mm|cm|in|pt|pc)$", value)
|
||||
if m is not None and m.group(1):
|
||||
value = float(m.group(1))
|
||||
unit = m.group(2)
|
||||
if unit == '%':
|
||||
base = base or self.width
|
||||
result = (value/100.0) * base
|
||||
elif unit == 'px':
|
||||
result = value * 72.0 / self._page.dpi
|
||||
elif unit == 'in':
|
||||
result = value * 72.0
|
||||
elif unit == 'pt':
|
||||
result = value
|
||||
elif unit == 'em':
|
||||
font = font or self.fontSize
|
||||
result = value * font
|
||||
elif unit == 'pc':
|
||||
result = value * 12.0
|
||||
elif unit == 'mm':
|
||||
result = value * 0.04
|
||||
elif unit == 'cm':
|
||||
result = value * 0.40
|
||||
return result
|
||||
|
||||
@property
|
||||
def fontSize(self):
|
||||
def normalize_fontsize(value, base=None):
|
||||
result = None
|
||||
factor = None
|
||||
if value == 'inherit':
|
||||
value = 'medium'
|
||||
if value in FONT_SIZE_NAMES:
|
||||
result = FONT_SIZE_BY_NAME[value]
|
||||
elif value == 'smaller':
|
||||
factor = 1.0/1.2
|
||||
for _, _, size in FONT_SIZE_LIST:
|
||||
if base <= size: break
|
||||
factor = None
|
||||
result = size
|
||||
elif value == 'larger':
|
||||
factor = 1.2
|
||||
for _, _, size in reversed(FONT_SIZE_LIST):
|
||||
if base >= size: break
|
||||
factor = None
|
||||
result = size
|
||||
else:
|
||||
result = self._unit_convert(value, base=base, font=base)
|
||||
if result < 0:
|
||||
result = normalize_fontsize("smaller", base)
|
||||
if factor:
|
||||
result = factor * base
|
||||
return result
|
||||
result = None
|
||||
if self._has_parent():
|
||||
styles = self._stylizer._styles
|
||||
base = styles[self._element.getparent()].fontSize
|
||||
else:
|
||||
base = normalize_fontsize(DEFAULTS['font-size'])
|
||||
if 'font-size' in self._style:
|
||||
size = self._style['font-size']
|
||||
result = normalize_fontsize(size, base)
|
||||
else:
|
||||
result = base
|
||||
self.__dict__['fontSize'] = result
|
||||
return result
|
||||
|
||||
@property
|
||||
def width(self):
|
||||
result = None
|
||||
base = None
|
||||
if self._has_parent():
|
||||
styles = self._stylizer._styles
|
||||
base = styles[self._element.getparent()].width
|
||||
else:
|
||||
base = self._page.width
|
||||
if 'width' in self._style:
|
||||
width = self._style['width']
|
||||
if width == 'auto':
|
||||
result = base
|
||||
else:
|
||||
result = self._unit_convert(width, base=base)
|
||||
else:
|
||||
result = base
|
||||
self.__dict__['width'] = result
|
||||
return result
|
||||
|
||||
def __str__(self):
|
||||
items = self._style.items()
|
||||
return '; '.join("%s: %s" % (key, val) for key, val in items)
|
655
src/calibre/ebooks/lit/writer.py
Normal file
655
src/calibre/ebooks/lit/writer.py
Normal file
@ -0,0 +1,655 @@
|
||||
from __future__ import with_statement
|
||||
import sys
|
||||
import os
|
||||
from cStringIO import StringIO
|
||||
from struct import pack, unpack
|
||||
from itertools import izip, count
|
||||
import time
|
||||
import random
|
||||
import re
|
||||
import copy
|
||||
import uuid
|
||||
import functools
|
||||
from lxml import etree
|
||||
from calibre.ebooks.lit.reader import msguid, DirectoryEntry
|
||||
import calibre.ebooks.lit.maps as maps
|
||||
from calibre.ebooks.lit.oeb import CSS_MIME, OPF_MIME
|
||||
from calibre.ebooks.lit.oeb import Oeb, namespace, barename
|
||||
from calibre.ebooks.lit.stylizer import Stylizer
|
||||
from calibre.ebooks.lit.lzxcomp import Compressor
|
||||
import calibre
|
||||
from calibre import plugins
|
||||
msdes, msdeserror = plugins['msdes']
|
||||
import calibre.ebooks.lit.mssha1 as mssha1
|
||||
|
||||
__all__ = ['LitWriter']
|
||||
|
||||
def invert_tag_map(tag_map):
|
||||
tags, dattrs, tattrs = tag_map
|
||||
tags = dict((tags[i], i) for i in xrange(len(tags)))
|
||||
dattrs = dict((v, k) for k, v in dattrs.items())
|
||||
tattrs = [dict((v, k) for k, v in (map or {}).items()) for map in tattrs]
|
||||
for map in tattrs:
|
||||
if map: map.update(dattrs)
|
||||
tattrs[0] = dattrs
|
||||
return tags, tattrs
|
||||
|
||||
OPF_MAP = invert_tag_map(maps.OPF_MAP)
|
||||
HTML_MAP = invert_tag_map(maps.HTML_MAP)
|
||||
|
||||
LIT_MAGIC = 'ITOLITLS'
|
||||
|
||||
LITFILE_GUID = "{0A9007C1-4076-11D3-8789-0000F8105754}"
|
||||
PIECE3_GUID = "{0A9007C3-4076-11D3-8789-0000F8105754}"
|
||||
PIECE4_GUID = "{0A9007C4-4076-11D3-8789-0000F8105754}"
|
||||
DESENCRYPT_GUID = "{67F6E4A2-60BF-11D3-8540-00C04F58C3CF}"
|
||||
LZXCOMPRESS_GUID = "{0A9007C6-4076-11D3-8789-0000F8105754}"
|
||||
|
||||
def packguid(guid):
|
||||
values = guid[1:9], guid[10:14], guid[15:19], \
|
||||
guid[20:22], guid[22:24], guid[25:27], guid[27:29], \
|
||||
guid[29:31], guid[31:33], guid[33:35], guid[35:37]
|
||||
values = [int(value, 16) for value in values]
|
||||
return pack("<LHHBBBBBBBB", *values)
|
||||
|
||||
FLAG_OPENING = (1 << 0)
|
||||
FLAG_CLOSING = (1 << 1)
|
||||
FLAG_BLOCK = (1 << 2)
|
||||
FLAG_HEAD = (1 << 3)
|
||||
FLAG_ATOM = (1 << 4)
|
||||
FLAG_CUSTOM = (1 << 15)
|
||||
ATTR_NUMBER = 0xffff
|
||||
|
||||
PIECE_SIZE = 16
|
||||
PRIMARY_SIZE = 40
|
||||
SECONDARY_SIZE = 232
|
||||
DCHUNK_SIZE = 0x2000
|
||||
CCHUNK_SIZE = 0x0200
|
||||
ULL_NEG1 = 0xffffffffffffffff
|
||||
ROOT_OFFSET = 1284508585713721976
|
||||
ROOT_SIZE = 4165955342166943123
|
||||
|
||||
BLOCK_CAOL = \
|
||||
"\x43\x41\x4f\x4c\x02\x00\x00\x00" \
|
||||
"\x50\x00\x00\x00\x37\x13\x03\x00" \
|
||||
"\x00\x00\x00\x00\x00\x20\x00\x00" \
|
||||
"\x00\x02\x00\x00\x00\x00\x10\x00" \
|
||||
"\x00\x00\x02\x00\x00\x00\x00\x00" \
|
||||
"\x00\x00\x00\x00\x00\x00\x00\x00"
|
||||
BLOCK_ITSF = \
|
||||
"\x49\x54\x53\x46\x04\x00\x00\x00" \
|
||||
"\x20\x00\x00\x00\x01\x00\x00\x00"
|
||||
|
||||
MSDES_CONTROL = \
|
||||
"\x03\x00\x00\x00\x29\x17\x00\x00" \
|
||||
"\x01\x00\x00\x00\xa5\xa5\x00\x00"
|
||||
LZXC_CONTROL = \
|
||||
"\x07\x00\x00\x00\x4c\x5a\x58\x43" \
|
||||
"\x03\x00\x00\x00\x04\x00\x00\x00" \
|
||||
"\x04\x00\x00\x00\x02\x00\x00\x00" \
|
||||
"\x00\x00\x00\x00\x00\x00\x00\x00"
|
||||
|
||||
COLLAPSE = re.compile(r'[ \r\n\v]+')
|
||||
|
||||
def prefixname(name, nsrmap):
|
||||
prefix = nsrmap[namespace(name)]
|
||||
if not prefix:
|
||||
return barename(name)
|
||||
return ':'.join((prefix, barename(name)))
|
||||
|
||||
def decint(value):
|
||||
bytes = []
|
||||
while True:
|
||||
b = value & 0x7f
|
||||
value >>= 7
|
||||
if bytes:
|
||||
b |= 0x80
|
||||
bytes.append(chr(b))
|
||||
if value == 0:
|
||||
break
|
||||
return ''.join(reversed(bytes))
|
||||
|
||||
def randbytes(n):
|
||||
return ''.join(chr(random.randint(0, 255)) for x in xrange(n))
|
||||
|
||||
class ReBinary(object):
|
||||
def __init__(self, root, path, oeb, map=HTML_MAP):
|
||||
self.dir = os.path.dirname(path)
|
||||
self.manifest = oeb.manifest
|
||||
self.tags, self.tattrs = map
|
||||
self.buf = StringIO()
|
||||
self.anchors = []
|
||||
self.page_breaks = []
|
||||
self.is_html = is_html = map is HTML_MAP
|
||||
self.stylizer = Stylizer(root, path, oeb) if is_html else None
|
||||
self.tree_to_binary(root)
|
||||
self.content = self.buf.getvalue()
|
||||
self.ahc = self.build_ahc()
|
||||
self.aht = self.build_aht()
|
||||
|
||||
def write(self, *values):
|
||||
for value in values:
|
||||
if isinstance(value, (int, long)):
|
||||
value = unichr(value)
|
||||
self.buf.write(value.encode('utf-8'))
|
||||
|
||||
def tree_to_binary(self, elem, nsrmap={'': None}, parents=[],
|
||||
inhead=False, preserve=False):
|
||||
if not isinstance(elem.tag, basestring):
|
||||
self.write(etree.tostring(elem))
|
||||
return
|
||||
nsrmap = copy.copy(nsrmap)
|
||||
attrib = dict(elem.attrib)
|
||||
style = self.stylizer.style(elem) if self.stylizer else None
|
||||
for key, value in elem.nsmap.items():
|
||||
if value not in nsrmap or nsrmap[value] != key:
|
||||
xmlns = ('xmlns:' + key) if key else 'xmlns'
|
||||
attrib[xmlns] = value
|
||||
nsrmap[value] = key
|
||||
tag = prefixname(elem.tag, nsrmap)
|
||||
tag_offset = self.buf.tell()
|
||||
if tag == 'head':
|
||||
inhead = True
|
||||
flags = FLAG_OPENING
|
||||
if not elem.text and len(elem) == 0:
|
||||
flags |= FLAG_CLOSING
|
||||
if inhead:
|
||||
flags |= FLAG_HEAD
|
||||
if style and style['display'] in ('block', 'table'):
|
||||
flags |= FLAG_BLOCK
|
||||
self.write(0, flags)
|
||||
tattrs = self.tattrs[0]
|
||||
if tag in self.tags:
|
||||
index = self.tags[tag]
|
||||
self.write(index)
|
||||
if self.tattrs[index]:
|
||||
tattrs = self.tattrs[index]
|
||||
else:
|
||||
self.write(FLAG_CUSTOM, len(tag)+1, tag)
|
||||
last_break = self.page_breaks[-1][0] if self.page_breaks else None
|
||||
if style and last_break != tag_offset \
|
||||
and style['page-break-before'] not in ('avoid', 'auto'):
|
||||
self.page_breaks.append((tag_offset, list(parents)))
|
||||
for attr, value in attrib.items():
|
||||
attr = prefixname(attr, nsrmap)
|
||||
if attr in ('href', 'src'):
|
||||
path, hash, frag = value.partition('#')
|
||||
path = os.path.join(self.dir, path)
|
||||
path = os.path.normpath(path)
|
||||
path = path.replace('\\', '/')
|
||||
prefix = unichr(3)
|
||||
if path in self.manifest.hrefs:
|
||||
prefix = unichr(2)
|
||||
value = self.manifest.hrefs[path].id
|
||||
if hash and frag:
|
||||
value = '#'.join((value, frag))
|
||||
value = prefix + value
|
||||
elif attr in ('id', 'name'):
|
||||
self.anchors.append((value, tag_offset))
|
||||
elif attr.startswith('ms--'):
|
||||
attr = '%' + attr[4:]
|
||||
if attr in tattrs:
|
||||
self.write(tattrs[attr])
|
||||
else:
|
||||
self.write(FLAG_CUSTOM, len(attr)+1, attr)
|
||||
try:
|
||||
self.write(ATTR_NUMBER, int(value)+1)
|
||||
except ValueError:
|
||||
self.write(len(value)+1, value)
|
||||
self.write(0)
|
||||
if elem.text:
|
||||
text = elem.text
|
||||
if style and style['white-space'] == 'pre':
|
||||
preserve = True
|
||||
if elem.get('xml:space') == 'preserve':
|
||||
preserve = True
|
||||
if not preserve:
|
||||
text = COLLAPSE.sub(' ', text)
|
||||
self.write(text)
|
||||
parents.append(tag_offset)
|
||||
for child in elem:
|
||||
self.tree_to_binary(child, nsrmap, parents, inhead, preserve)
|
||||
parents.pop()
|
||||
if not flags & FLAG_CLOSING:
|
||||
self.write(0, (flags & ~FLAG_OPENING) | FLAG_CLOSING, 0)
|
||||
if elem.tail:
|
||||
tail = elem.tail
|
||||
if tag != 'pre':
|
||||
tail = COLLAPSE.sub(' ', tail)
|
||||
self.write(tail)
|
||||
if style and style['page-break-after'] not in ('avoid', 'auto'):
|
||||
self.page_breaks.append((self.buf.tell(), list(parents)))
|
||||
|
||||
def build_ahc(self):
|
||||
data = StringIO()
|
||||
data.write(unichr(len(self.anchors)).encode('utf-8'))
|
||||
for anchor, offset in self.anchors:
|
||||
data.write(unichr(len(anchor)).encode('utf-8'))
|
||||
data.write(anchor)
|
||||
data.write(pack('<I', offset))
|
||||
return data.getvalue()
|
||||
|
||||
def build_aht(self):
|
||||
return pack('<I', 0)
|
||||
|
||||
|
||||
def preserve(function):
|
||||
def wrapper(self, *args, **kwargs):
|
||||
opos = self._stream.tell()
|
||||
try:
|
||||
return function(self, *args, **kwargs)
|
||||
finally:
|
||||
self._stream.seek(opos)
|
||||
functools.update_wrapper(wrapper, function)
|
||||
return wrapper
|
||||
|
||||
class LitWriter(object):
|
||||
def __init__(self, oeb):
|
||||
self._oeb = oeb
|
||||
|
||||
def dump(self, stream):
|
||||
self._stream = stream
|
||||
self._sections = [StringIO() for i in xrange(4)]
|
||||
self._directory = []
|
||||
self._meta = None
|
||||
self._dump()
|
||||
|
||||
def _write(self, *data):
|
||||
for datum in data:
|
||||
self._stream.write(datum)
|
||||
|
||||
@preserve
|
||||
def _writeat(self, pos, *data):
|
||||
self._stream.seek(pos)
|
||||
self._write(*data)
|
||||
|
||||
def _tell(self):
|
||||
return self._stream.tell()
|
||||
|
||||
def _dump(self):
|
||||
# Build content sections
|
||||
self._build_sections()
|
||||
|
||||
# Build directory chunks
|
||||
dcounts, dchunks, ichunk = self._build_dchunks()
|
||||
|
||||
# Write headers
|
||||
self._write(LIT_MAGIC)
|
||||
self._write(pack('<IIII',
|
||||
1, PRIMARY_SIZE, 5, SECONDARY_SIZE))
|
||||
self._write(packguid(LITFILE_GUID))
|
||||
offset = self._tell()
|
||||
pieces = list(xrange(offset, offset + (PIECE_SIZE * 5), PIECE_SIZE))
|
||||
self._write((5 * PIECE_SIZE) * '\0')
|
||||
aoli1 = len(dchunks) if ichunk else ULL_NEG1
|
||||
last = len(dchunks) - 1
|
||||
ddepth = 2 if ichunk else 1
|
||||
self._write(pack('<IIQQQQIIIIQIIQQQQIIIIQIIIIQ',
|
||||
2, 0x98, aoli1, 0, last, 0, DCHUNK_SIZE, 2, 0, ddepth, 0,
|
||||
len(self._directory), 0, ULL_NEG1, 0, 0, 0, CCHUNK_SIZE, 2,
|
||||
0, 1, 0, len(dcounts), 0, 0x100000, 0x20000, 0))
|
||||
self._write(BLOCK_CAOL)
|
||||
self._write(BLOCK_ITSF)
|
||||
conoff_offset = self._tell()
|
||||
timestamp = int(time.time())
|
||||
self._write(pack('<QII', 0, timestamp, 0x409))
|
||||
|
||||
# Piece #0
|
||||
piece0_offset = self._tell()
|
||||
self._write(pack('<II', 0x1fe, 0))
|
||||
filesz_offset = self._tell()
|
||||
self._write(pack('<QQ', 0, 0))
|
||||
self._writeat(pieces[0], pack('<QQ',
|
||||
piece0_offset, self._tell() - piece0_offset))
|
||||
|
||||
# Piece #1: Directory chunks
|
||||
piece1_offset = self._tell()
|
||||
number = len(dchunks) + ((ichunk and 1) or 0)
|
||||
self._write('IFCM', pack('<IIIQQ',
|
||||
1, DCHUNK_SIZE, 0x100000, ULL_NEG1, number))
|
||||
for dchunk in dchunks:
|
||||
self._write(dchunk)
|
||||
if ichunk:
|
||||
self._write(ichunk)
|
||||
self._writeat(pieces[1], pack('<QQ',
|
||||
piece1_offset, self._tell() - piece1_offset))
|
||||
|
||||
# Piece #2: Count chunks
|
||||
piece2_offset = self._tell()
|
||||
self._write('IFCM', pack('<IIIQQ',
|
||||
1, CCHUNK_SIZE, 0x20000, ULL_NEG1, 1))
|
||||
cchunk = StringIO()
|
||||
last = 0
|
||||
for i, dcount in izip(count(), dcounts):
|
||||
cchunk.write(decint(last))
|
||||
cchunk.write(decint(dcount))
|
||||
cchunk.write(decint(i))
|
||||
last = dcount
|
||||
cchunk = cchunk.getvalue()
|
||||
rem = CCHUNK_SIZE - (len(cchunk) + 50)
|
||||
self._write('AOLL', pack('<IQQQQQ',
|
||||
rem, 0, ULL_NEG1, ULL_NEG1, 0, 1))
|
||||
filler = '\0' * rem
|
||||
self._write(cchunk, filler, pack('<H', len(dcounts)))
|
||||
self._writeat(pieces[2], pack('<QQ',
|
||||
piece2_offset, self._tell() - piece2_offset))
|
||||
|
||||
# Piece #3: GUID3
|
||||
piece3_offset = self._tell()
|
||||
self._write(packguid(PIECE3_GUID))
|
||||
self._writeat(pieces[3], pack('<QQ',
|
||||
piece3_offset, self._tell() - piece3_offset))
|
||||
|
||||
# Piece #4: GUID4
|
||||
piece4_offset = self._tell()
|
||||
self._write(packguid(PIECE4_GUID))
|
||||
self._writeat(pieces[4], pack('<QQ',
|
||||
piece4_offset, self._tell() - piece4_offset))
|
||||
|
||||
# The actual section content
|
||||
content_offset = self._tell()
|
||||
self._writeat(conoff_offset, pack('<Q', content_offset))
|
||||
self._write(self._sections[0].getvalue())
|
||||
self._writeat(filesz_offset, pack('<Q', self._tell()))
|
||||
|
||||
def _add_file(self, name, data, secnum=0):
|
||||
if len(data) > 0:
|
||||
section = self._sections[secnum]
|
||||
offset = section.tell()
|
||||
section.write(data)
|
||||
else:
|
||||
offset = 0
|
||||
self._directory.append(
|
||||
DirectoryEntry(name, secnum, offset, len(data)))
|
||||
|
||||
def _add_folder(self, name, offset=0, size=0):
|
||||
if not name.endswith('/'):
|
||||
name += '/'
|
||||
self._directory.append(
|
||||
DirectoryEntry(name, 0, offset, size))
|
||||
|
||||
def _djoin(self, *names):
|
||||
return '/'.join(names)
|
||||
|
||||
def _build_sections(self):
|
||||
self._add_folder('/', ROOT_OFFSET, ROOT_SIZE)
|
||||
self._build_data()
|
||||
self._build_manifest()
|
||||
self._build_page_breaks()
|
||||
self._build_meta()
|
||||
self._build_drm_storage()
|
||||
self._build_version()
|
||||
self._build_namelist()
|
||||
self._build_storage()
|
||||
self._build_transforms()
|
||||
|
||||
def _build_data(self):
|
||||
self._add_folder('/data')
|
||||
for item in self._oeb.manifest.values():
|
||||
name = '/data/' + item.id
|
||||
data = item.data
|
||||
secnum = 0
|
||||
if not isinstance(data, basestring):
|
||||
self._add_folder(name)
|
||||
rebin = ReBinary(data, item.href, self._oeb)
|
||||
self._add_file(name + '/ahc', rebin.ahc, 0)
|
||||
self._add_file(name + '/aht', rebin.aht, 0)
|
||||
item.page_breaks = rebin.page_breaks
|
||||
data = rebin.content
|
||||
name = name + '/content'
|
||||
secnum = 1
|
||||
self._add_file(name, data, secnum)
|
||||
item.size = len(data)
|
||||
|
||||
def _build_manifest(self):
|
||||
states = ['linear', 'nonlinear', 'css', 'images']
|
||||
manifest = dict((state, []) for state in states)
|
||||
for item in self._oeb.manifest.values():
|
||||
if item.spine_position is not None:
|
||||
key = 'linear' if item.linear else 'nonlinear'
|
||||
manifest[key].append(item)
|
||||
elif item.media_type == CSS_MIME:
|
||||
manifest['css'].append(item)
|
||||
else:
|
||||
manifest['images'].append(item)
|
||||
data = StringIO()
|
||||
data.write(pack('<Bc', 1, '\\'))
|
||||
offset = 0
|
||||
for state in states:
|
||||
items = manifest[state]
|
||||
items.sort()
|
||||
data.write(pack('<I', len(items)))
|
||||
for item in items:
|
||||
id, href, media_type = item.id, item.href, item.media_type
|
||||
item.offset = offset \
|
||||
if state in ('linear', 'nonlinear') else 0
|
||||
data.write(pack('<I', item.offset))
|
||||
entry = [unichr(len(id)), unicode(id),
|
||||
unichr(len(href)), unicode(href),
|
||||
unichr(len(media_type)), unicode(media_type)]
|
||||
for value in entry:
|
||||
data.write(value.encode('utf-8'))
|
||||
data.write('\0')
|
||||
offset += item.size
|
||||
self._add_file('/manifest', data.getvalue())
|
||||
|
||||
def _build_page_breaks(self):
|
||||
pb1 = StringIO()
|
||||
pb2 = StringIO()
|
||||
pb3 = StringIO()
|
||||
pb3cur = 0
|
||||
bits = 0
|
||||
for item in self._oeb.spine:
|
||||
page_breaks = copy.copy(item.page_breaks)
|
||||
if not item.linear:
|
||||
page_breaks.insert(0, (0, []))
|
||||
for pbreak, parents in page_breaks:
|
||||
pb3cur = (pb3cur << 2) | 1
|
||||
if len(parents) > 1:
|
||||
pb3cur |= 0x2
|
||||
bits += 2
|
||||
if bits >= 8:
|
||||
pb3.write(pack('<B', pb3cur))
|
||||
pb3cur = 0
|
||||
bits = 0
|
||||
pbreak += item.offset
|
||||
pb1.write(pack('<II', pbreak, pb2.tell()))
|
||||
pb2.write(pack('<I', len(parents)))
|
||||
for parent in parents:
|
||||
pb2.write(pack('<I', parent))
|
||||
if bits != 0:
|
||||
pb3cur <<= (8 - bits)
|
||||
pb3.write(pack('<B', pb3cur))
|
||||
self._add_file('/pb1', pb1.getvalue(), 0)
|
||||
self._add_file('/pb2', pb2.getvalue(), 0)
|
||||
self._add_file('/pb3', pb3.getvalue(), 0)
|
||||
|
||||
def _build_meta(self):
|
||||
_, meta = self._oeb.to_opf1()[OPF_MIME]
|
||||
xmetadata, = meta.xpath('/package/metadata/x-metadata')
|
||||
etree.SubElement(xmetadata, 'meta', attrib={
|
||||
'name': 'calibre-oeb2lit-version',
|
||||
'content': calibre.__version__})
|
||||
meta.attrib['ms--minimum_level'] = '0'
|
||||
meta.attrib['ms--attr5'] = '1'
|
||||
meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper()
|
||||
rebin = ReBinary(meta, 'content.opf', self._oeb, OPF_MAP)
|
||||
meta = rebin.content
|
||||
self._meta = meta
|
||||
self._add_file('/meta', meta)
|
||||
|
||||
def _build_drm_storage(self):
|
||||
drmsource = u'Fuck Microsoft\0'.encode('utf-16-le')
|
||||
self._add_file('/DRMStorage/DRMSource', drmsource)
|
||||
tempkey = self._calculate_deskey([self._meta, drmsource])
|
||||
msdes.deskey(tempkey, msdes.EN0)
|
||||
self._add_file('/DRMStorage/DRMSealed', msdes.des("\0" * 16))
|
||||
self._bookkey = '\0' * 8
|
||||
self._add_file('/DRMStorage/ValidationStream', 'MSReader', 3)
|
||||
|
||||
def _build_version(self):
|
||||
self._add_file('/Version', pack('<HH', 8, 1))
|
||||
|
||||
def _build_namelist(self):
|
||||
data = StringIO()
|
||||
data.write(pack('<HH', 0x3c, len(self._sections)))
|
||||
names = ['Uncompressed', 'MSCompressed', 'EbEncryptDS',
|
||||
'EbEncryptOnlyDS']
|
||||
for name in names:
|
||||
data.write(pack('<H', len(name)))
|
||||
data.write(name.encode('utf-16-le'))
|
||||
data.write('\0\0')
|
||||
self._add_file('::DataSpace/NameList', data.getvalue())
|
||||
|
||||
def _build_storage(self):
|
||||
mapping = [(1, 'MSCompressed', (LZXCOMPRESS_GUID,)),
|
||||
(2, 'EbEncryptDS', (LZXCOMPRESS_GUID, DESENCRYPT_GUID)),
|
||||
(3, 'EbEncryptOnlyDS', (DESENCRYPT_GUID,)),]
|
||||
for secnum, name, transforms in mapping:
|
||||
root = '::DataSpace/Storage/' + name
|
||||
data = self._sections[secnum].getvalue()
|
||||
cdata, sdata, tdata, rdata = '', '', '', ''
|
||||
for guid in transforms:
|
||||
tdata = packguid(guid) + tdata
|
||||
sdata = sdata + pack('<Q', len(data))
|
||||
if guid == DESENCRYPT_GUID:
|
||||
cdata = MSDES_CONTROL + cdata
|
||||
if not data: continue
|
||||
msdes.deskey(self._bookkey, msdes.EN0)
|
||||
pad = 8 - (len(data) & 0x7)
|
||||
if pad != 8:
|
||||
data = data + ('\0' * pad)
|
||||
data = msdes.des(data)
|
||||
elif guid == LZXCOMPRESS_GUID:
|
||||
cdata = LZXC_CONTROL + cdata
|
||||
if not data: continue
|
||||
unlen = len(data)
|
||||
with Compressor(17) as lzx:
|
||||
data, rtable = lzx.compress(data, flush=True)
|
||||
rdata = StringIO()
|
||||
rdata.write(pack('<IIIIQQQQ',
|
||||
3, len(rtable), 8, 0x28, unlen, len(data), 0x8000, 0))
|
||||
for uncomp, comp in rtable[:-1]:
|
||||
rdata.write(pack('<Q', comp))
|
||||
rdata = rdata.getvalue()
|
||||
self._add_file(root + '/Content', data)
|
||||
self._add_file(root + '/ControlData', cdata)
|
||||
self._add_file(root + '/SpanInfo', sdata)
|
||||
self._add_file(root + '/Transform/List', tdata)
|
||||
troot = root + '/Transform'
|
||||
for guid in transforms:
|
||||
dname = self._djoin(troot, guid, 'InstanceData')
|
||||
self._add_folder(dname)
|
||||
if guid == LZXCOMPRESS_GUID:
|
||||
dname += '/ResetTable'
|
||||
self._add_file(dname, rdata)
|
||||
|
||||
def _build_transforms(self):
|
||||
for guid in (LZXCOMPRESS_GUID, DESENCRYPT_GUID):
|
||||
self._add_folder('::Transform/'+ guid)
|
||||
|
||||
def _calculate_deskey(self, hashdata):
|
||||
prepad = 2
|
||||
hash = mssha1.new()
|
||||
for data in hashdata:
|
||||
if prepad > 0:
|
||||
data = ("\000" * prepad) + data
|
||||
prepad = 0
|
||||
postpad = 64 - (len(data) % 64)
|
||||
if postpad < 64:
|
||||
data = data + ("\000" * postpad)
|
||||
hash.update(data)
|
||||
digest = hash.digest()
|
||||
key = [0] * 8
|
||||
for i in xrange(0, len(digest)):
|
||||
key[i % 8] ^= ord(digest[i])
|
||||
return ''.join(chr(x) for x in key)
|
||||
|
||||
def _build_dchunks(self):
|
||||
ddata = []
|
||||
directory = list(self._directory)
|
||||
directory.sort(cmp=lambda x, y: \
|
||||
cmp(x.name.lower(), y.name.lower()))
|
||||
qrn = 1 + (1 << 2)
|
||||
dchunk = StringIO()
|
||||
dcount = 0
|
||||
quickref = []
|
||||
name = directory[0].name
|
||||
for entry in directory:
|
||||
next = ''.join([decint(len(entry.name)), entry.name,
|
||||
decint(entry.section), decint(entry.offset),
|
||||
decint(entry.size)])
|
||||
usedlen = dchunk.tell() + len(next) + (len(quickref) * 2) + 52
|
||||
if usedlen >= DCHUNK_SIZE:
|
||||
ddata.append((dchunk.getvalue(), quickref, dcount, name))
|
||||
dchunk = StringIO()
|
||||
dcount = 0
|
||||
quickref = []
|
||||
name = entry.name
|
||||
if (dcount % qrn) == 0:
|
||||
quickref.append(dchunk.tell())
|
||||
dchunk.write(next)
|
||||
dcount = dcount + 1
|
||||
ddata.append((dchunk.getvalue(), quickref, dcount, name))
|
||||
cidmax = len(ddata) - 1
|
||||
rdcount = 0
|
||||
dchunks = []
|
||||
dcounts = []
|
||||
ichunk = None
|
||||
if len(ddata) > 1:
|
||||
ichunk = StringIO()
|
||||
for cid, (content, quickref, dcount, name) in izip(count(), ddata):
|
||||
dchunk = StringIO()
|
||||
prev = cid - 1 if cid > 0 else ULL_NEG1
|
||||
next = cid + 1 if cid < cidmax else ULL_NEG1
|
||||
rem = DCHUNK_SIZE - (len(content) + 50)
|
||||
pad = rem - (len(quickref) * 2)
|
||||
dchunk.write('AOLL')
|
||||
dchunk.write(pack('<IQQQQQ', rem, cid, prev, next, rdcount, 1))
|
||||
dchunk.write(content)
|
||||
dchunk.write('\0' * pad)
|
||||
for ref in reversed(quickref):
|
||||
dchunk.write(pack('<H', ref))
|
||||
dchunk.write(pack('<H', dcount))
|
||||
rdcount = rdcount + dcount
|
||||
dchunks.append(dchunk.getvalue())
|
||||
dcounts.append(dcount)
|
||||
if ichunk:
|
||||
ichunk.write(decint(len(name)))
|
||||
ichunk.write(name)
|
||||
ichunk.write(decint(cid))
|
||||
if ichunk:
|
||||
rem = DCHUNK_SIZE - (ichunk.tell() + 16)
|
||||
pad = rem - 2
|
||||
ichunk = ''.join(['AOLI', pack('<IQ', rem, len(dchunks)),
|
||||
ichunk.getvalue(), ('\0' * pad), pack('<H', len(dchunks))])
|
||||
return dcounts, dchunks, ichunk
|
||||
|
||||
|
||||
def option_parser():
|
||||
from calibre.utils.config import OptionParser
|
||||
parser = OptionParser(usage=_('%prog [options] OPFFILE'))
|
||||
parser.add_option(
|
||||
'-o', '--output', default=None,
|
||||
help=_('Output file. Default is derived from input filename.'))
|
||||
return parser
|
||||
|
||||
def main(argv=sys.argv):
|
||||
parser = option_parser()
|
||||
opts, args = parser.parse_args(argv[1:])
|
||||
if len(args) != 1:
|
||||
parser.print_help()
|
||||
return 1
|
||||
opfpath = args[0]
|
||||
litpath = opts.output
|
||||
if litpath is None:
|
||||
litpath = os.path.basename(opfpath)
|
||||
litpath = os.path.splitext(litpath)[0] + '.lit'
|
||||
lit = LitWriter(Oeb(opfpath))
|
||||
with open(litpath, 'wb') as f:
|
||||
lit.dump(f)
|
||||
print _('LIT ebook created at'), litpath
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
@ -55,6 +55,7 @@ entry_points = {
|
||||
'mobi2oeb = calibre.ebooks.mobi.reader:main',
|
||||
'lrf2html = calibre.ebooks.lrf.html.convert_to:main',
|
||||
'lit2oeb = calibre.ebooks.lit.reader:main',
|
||||
'oeb2lit = calibre.ebooks.lit.writer:main',
|
||||
'comic2lrf = calibre.ebooks.lrf.comic.convert_from:main',
|
||||
'comic2epub = calibre.ebooks.epub.from_comic:main',
|
||||
'calibre-debug = calibre.debug:main',
|
||||
|
Loading…
x
Reference in New Issue
Block a user