Adding initial LitWriter and oeb2lit code.

2025-07-09 03:04:10 -04:00 · 2008-12-07 23:53:14 -05:00 · 2008-12-07 23:53:14 -05:00 · f740d20f32
commit f740d20f32
parent 039572d937
8 changed files with 2527 additions and 0 deletions
--- a/setup.py
+++ b/setup.py
@ -146,6 +146,7 @@ if __name__ == '__main__':
            metadata_sqlite = 'library/metadata_sqlite.sql',
            jquery          = 'gui2/viewer/jquery.js',
            jquery_scrollTo = 'gui2/viewer/jquery_scrollTo.js',
            html_css        = 'ebooks/lit/html.css',
        )
        DEST = os.path.join('src', APPNAME, 'resources.py')
--- a/src/calibre/ebooks/lit/html.css
+++ b/src/calibre/ebooks/lit/html.css
@ -0,0 +1,420 @@
 /* ***** BEGIN LICENSE BLOCK *****
 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Mozilla Public License Version
 * 1.1 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 *
 * The Original Code is mozilla.org code.
 *
 * The Initial Developer of the Original Code is
 * Netscape Communications Corporation.
 * Portions created by the Initial Developer are Copyright (C) 1998
 * the Initial Developer. All Rights Reserved.
 *
 * Contributor(s):
 *   Blake Ross <BlakeR1234@aol.com>
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either of the GNU General Public License Version 2 or later (the "GPL"),
 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the MPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the MPL, the GPL or the LGPL.
 *
 * ***** END LICENSE BLOCK ***** */
@namespace url(http://www.w3.org/1999/xhtml); /* set default namespace to HTML */
 /* blocks */
 html, div, map, dt, isindex, form {
  display: block;
 }
 body {
  display: block;
  margin: 8px;
 }
 p, dl, multicol {
  display: block;
  margin: 1em 0;
 }
 dd {
  display: block;
 }
 blockquote {
  display: block;
  margin: 1em 40px;
 }
 address {
  display: block;
  font-style: italic;
 }
 center {
  display: block;
  text-align: center;
 }
 blockquote[type=cite] {
  display: block;
  margin: 1em 0px;
  border-color: blue;
  border-width: thin;
 }
 span[_moz_quote=true] {
  color: blue;
 }
 pre[_moz_quote=true] {
  color: blue;
 }
 h1 {
  display: block;
  font-size: 2em;
  font-weight: bold;
  margin: .67em 0;
 }
 h2 {
  display: block;
  font-size: 1.5em;
  font-weight: bold;
  margin: .83em 0;
 }
 h3 {
  display: block;
  font-size: 1.17em;
  font-weight: bold;
  margin: 1em 0;
 }
 h4 {
  display: block;
  font-weight: bold;
  margin: 1.33em 0;
 }
 h5 {
  display: block;
  font-size: 0.83em;
  font-weight: bold;
  margin: 1.67em 0;
 }
 h6 {
  display: block;
  font-size: 0.67em;
  font-weight: bold;
  margin: 2.33em 0;
 }
 listing {
  display: block;
  font-family: monospace;
  font-size: medium;
  white-space: pre;
  margin: 1em 0;
 }
 xmp, pre, plaintext {
  display: block;
  font-family: monospace;
  white-space: pre;
  margin: 1em 0;
 }
 /* tables */
 table {
  display: table;
  border-spacing: 2px;
  border-collapse: separate;
  margin-top: 0;
  margin-bottom: 0;
  text-indent: 0;
 }
 table[align="left"] {
  float: left;
 }
 table[align="right"] {
  float: right;
 }
 table[rules]:not([rules="none"]) {
  border-collapse: collapse;
 }
 /* caption inherits from table not table-outer */  
 caption {
  display: table-caption;
  text-align: center;
 }
 table[align="center"] > caption {
  margin-left: auto;
  margin-right: auto;
 }
 table[align="center"] > caption[align="left"] {
  margin-right: 0;
 }
 table[align="center"] > caption[align="right"] {
  margin-left: 0;
 }
 tr {
  display: table-row;
  vertical-align: inherit;
 }
 col {
  display: table-column;
 }
 colgroup {
  display: table-column-group;
 }
 tbody {
  display: table-row-group;
  vertical-align: middle;
 }
 thead {
  display: table-header-group;
  vertical-align: middle;
 }
 tfoot {
  display: table-footer-group;
  vertical-align: middle;
 }
 /* for XHTML tables without tbody */
 table > tr {
  vertical-align: middle;
 }
 td { 
  display: table-cell;
  vertical-align: inherit;
  text-align: inherit; 
  padding: 1px;
 }
 th {
  display: table-cell;
  vertical-align: inherit;
  font-weight: bold;
  padding: 1px;
 }
 /* inlines */
 q:before {
  content: open-quote;
 }
 q:after {
  content: close-quote;
 }
 b, strong {
  font-weight: bolder;
 }
 i, cite, em, var, dfn {
  font-style: italic;
 }
 tt, code, kbd, samp {
  font-family: monospace;
 }
 u, ins {
  text-decoration: underline;
 }
 s, strike, del {
  text-decoration: line-through;
 }
 blink {
  text-decoration: blink;
 }
 big {
  font-size: larger;
 }
 small {
  font-size: smaller;
 }
 sub {
  vertical-align: sub;
  font-size: smaller;
  line-height: normal;
 }
 sup {
  vertical-align: super;
  font-size: smaller;
  line-height: normal;
 }
 nobr {
  white-space: nowrap;
 }
 /* titles */
 abbr[title], acronym[title] {
  border-bottom: dotted 1px;
 }
 /* lists */
 ul, menu, dir {
  display: block;
  list-style-type: disc;
  margin: 1em 0;
 }
 ol {
  display: block;
  list-style-type: decimal;
  margin: 1em 0;
 }
 li {
  display: list-item;
 }
 /* nested lists have no top/bottom margins */
 ul ul,   ul ol,   ul dir,   ul menu,   ul dl,
 ol ul,   ol ol,   ol dir,   ol menu,   ol dl,
 dir ul,  dir ol,  dir dir,  dir menu,  dir dl,
 menu ul, menu ol, menu dir, menu menu, menu dl,
 dl ul,   dl ol,   dl dir,   dl menu,   dl dl {
  margin-top: 0;
  margin-bottom: 0;
 }
 /* 2 deep unordered lists use a circle */
 ol ul,   ul ul,   menu ul,   dir ul,
 ol menu, ul menu, menu menu, dir menu,
 ol dir,  ul dir,  menu dir,  dir dir {
  list-style-type: circle;
 }
 /* 3 deep (or more) unordered lists use a square */
 ol ol ul,     ol ul ul,     ol menu ul,     ol dir ul,
 ol ol menu,   ol ul menu,   ol menu menu,   ol dir menu,
 ol ol dir,    ol ul dir,    ol menu dir,    ol dir dir,
 ul ol ul,     ul ul ul,     ul menu ul,     ul dir ul,
 ul ol menu,   ul ul menu,   ul menu menu,   ul dir menu,
 ul ol dir,    ul ul dir,    ul menu dir,    ul dir dir,
 menu ol ul,   menu ul ul,   menu menu ul,   menu dir ul,
 menu ol menu, menu ul menu, menu menu menu, menu dir menu,
 menu ol dir,  menu ul dir,  menu menu dir,  menu dir dir,
 dir ol ul,    dir ul ul,    dir menu ul,    dir dir ul,
 dir ol menu,  dir ul menu,  dir menu menu,  dir dir menu,
 dir ol dir,   dir ul dir,   dir menu dir,   dir dir dir {
  list-style-type: square;
 }
 /* leafs */
 /* <hr> noshade and color attributes are handled completely by
 * the nsHTMLHRElement attribute mapping code
 */
 hr {
  display: block;
  height: 2px;
  border: 1px inset;
  margin: 0.5em auto 0.5em auto;
  color: gray;
 }
 hr[size="1"] {
  border-style: solid none none none;
 }
 img[usemap], object[usemap] {
  color: blue;
 }
 frameset {
  display: block ! important;
  position: static ! important;
  float: none ! important;
  border: none ! important;
 }
 frame {
  border: none ! important;
 }
 iframe {
  border: 2px inset;
 }
 noframes {
  display: none;
 }
 spacer {
  position: static ! important;
  float: none ! important;
 }
 /* focusable content: anything w/ tabindex >=0 is focusable */
 abbr:focus, acronym:focus, address:focus, applet:focus, b:focus, 
 base:focus, big:focus, blockquote:focus, br:focus, canvas:focus, caption:focus,
 center:focus, cite:focus, code:focus, col:focus, colgroup:focus, dd:focus,
 del:focus, dfn:focus, dir:focus, div:focus, dl:focus, dt:focus, em:focus,
 fieldset:focus, font:focus, form:focus, h1:focus, h2:focus, h3:focus, h4:focus,
 h5:focus, h6:focus, hr:focus, i:focus, img:focus, ins:focus, 
 kbd:focus, label:focus, legend:focus, li:focus, link:focus, menu:focus, 
 object:focus, ol:focus, p:focus, pre:focus, q:focus, s:focus, samp:focus, 
 small:focus, span:focus, strike:focus, strong:focus, sub:focus, sup:focus, 
 table:focus, tbody:focus, td:focus, tfoot:focus, th:focus, thead:focus, 
 tr:focus, tt:focus, u:focus, ul:focus, var:focus {
  /* Don't specify the outline-color, we should always use initial value. */
   outline: 1px dotted;
 }
 /* hidden elements */
 area, base, basefont, head, meta, script, style, title,
 noembed, param {
   display: none;
 }
 /* Page breaks at body tags, to help out with LIT-generation */
 body {
  page-break-before: always;
 }
--- a/src/calibre/ebooks/lit/lzxcomp.py
+++ b/src/calibre/ebooks/lit/lzxcomp.py
@ -0,0 +1,176 @@
 from __future__ import with_statement
 import sys
 import os
 from cStringIO import StringIO
 from ctypes import *
 __all__ = ['Compressor']
 liblzxcomp = cdll.LoadLibrary('liblzxcomp.so')
 class lzx_data(Structure):
    pass
 lzx_get_bytes_t = CFUNCTYPE(c_int, c_voidp, c_int, c_voidp)
 lzx_put_bytes_t = CFUNCTYPE(c_int, c_voidp, c_int, c_voidp)
 lzx_mark_frame_t = CFUNCTYPE(None, c_voidp, c_uint32, c_uint32)
 lzx_at_eof_t = CFUNCTYPE(c_int, c_voidp)
 class lzx_results(Structure):
    _fields_ = [('len_compressed_output', c_long),
                ('len_uncompressed_input', c_long)]
 # int lzx_init(struct lzx_data **lzxdp, int wsize_code, 
 #              lzx_get_bytes_t get_bytes, void *get_bytes_arg,
 #              lzx_at_eof_t at_eof,
 #              lzx_put_bytes_t put_bytes, void *put_bytes_arg,
 #              lzx_mark_frame_t mark_frame, void *mark_frame_arg);
 lzx_init = liblzxcomp.lzx_init
 lzx_init.restype = c_int
 lzx_init.argtypes = [POINTER(POINTER(lzx_data)), c_int,
                     lzx_get_bytes_t, c_voidp,
                     lzx_at_eof_t,
                     lzx_put_bytes_t, c_voidp,
                     lzx_mark_frame_t, c_voidp]
 # void  lzx_reset(lzx_data *lzxd);
 lzx_reset = liblzxcomp.lzx_reset
 lzx_reset.restype = None
 lzx_reset.argtypes = [POINTER(lzx_data)]
 # int lzx_compress_block(lzx_data *lzxd, int block_size, int subdivide);
 lzx_compress_block = liblzxcomp.lzx_compress_block
 lzx_compress_block.restype = c_int
 lzx_compress_block.argtypes = [POINTER(lzx_data), c_int, c_int]
 # int lzx_finish(struct lzx_data *lzxd, struct lzx_results *lzxr);
 lzx_finish = liblzxcomp.lzx_finish
 lzx_finish.restype = c_int
 lzx_finish.argtypes = [POINTER(lzx_data), POINTER(lzx_results)]
 class LzxError(Exception):
    pass
 class Compressor(object):
    def __init__(self, wbits, reset=True):
        self._reset = reset
        self._blocksize = 1 << wbits
        self._buffered = 0
        self._input = StringIO()
        self._output = StringIO()
        self._flushing = False
        self._rtable = []
        self._get_bytes = lzx_get_bytes_t(self._get_bytes)
        self._at_eof = lzx_at_eof_t(self._at_eof)
        self._put_bytes = lzx_put_bytes_t(self._put_bytes)
        self._mark_frame = lzx_mark_frame_t(self._mark_frame)
        self._lzx = POINTER(lzx_data)()
        self._results = lzx_results()
        rv = lzx_init(self._lzx, wbits, self._get_bytes, c_voidp(),
                      self._at_eof, self._put_bytes, c_voidp(),
                      self._mark_frame, c_voidp())
        if rv != 0:
            raise LzxError("lzx_init() failed with %d" % rv)
    def _add_input(self, data):
        self._input.seek(0, 2)
        self._input.write(data)
        self._input.seek(0)
        self._buffered += len(data)
    def _reset_input(self):
        data = self._input.read()
        self._input.seek(0)
        self._input.truncate()
        self._input.write(data)
        self._input.seek(0)
    def _reset_output(self):
        data = self._output.getvalue()
        self._output.seek(0)
        self._output.truncate()
        return data
    def _reset_rtable(self):
        rtable = list(self._rtable)
        del self._rtable[:]
        return rtable
    def _get_bytes(self, arg, n, buf):
        data = self._input.read(n)
        memmove(buf, data, len(data))
        self._buffered -= len(data)
        return len(data)
    def _put_bytes(self, arg, n, buf):
        self._output.write(string_at(buf, n))
        return n
    def _at_eof(self, arg):
        if self._flushing and self._buffered == 0:
            return 1
        return 0
    def _mark_frame(self, arg, uncomp, comp):
        self._rtable.append((uncomp, comp))
        return
    def _compress_block(self):
        rv = lzx_compress_block(self._lzx, self._blocksize, 1)
        if rv != 0:
            raise LzxError("lzx_compress_block() failed with %d" % rv)
        if self._reset:
            lzx_reset(self._lzx)        
    def compress(self, data, flush=False):
        self._add_input(data)
        self._flushing = flush
        while self._buffered >= self._blocksize:
            self._compress_block()
        if self._buffered > 0 and flush:
            self._compress_block()
        self._reset_input()
        data = self._reset_output()
        rtable = self._reset_rtable()
        return (data, rtable)
    def flush(self):
        self._flushing = True
        if self._buffered > 0:
            self._compress_block()
            self._reset_input()
        data = self._reset_output()
        rtable = self._reset_rtable()
        return (data, rtable)
    def close(self):
        if self._lzx:
            lzx_finish(self._lzx, self._results)
            self._lzx = None
        pass
    def __enter__(self):
        return self
    def __exit__(self, *exc_info):
        self.close()
    def __del__(self):
        self.close()
 def main(argv=sys.argv):
    wbits, inf, outf = argv[1:]
    with open(inf, 'rb') as f:
        data = f.read()
    with Compressor(int(wbits)) as lzx:
        data, rtable = lzx.compress(data, flush=True)
    print rtable
    with open(outf, 'wb') as f:
        f.write(data)
    return 0
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/lit/oeb.py
+++ b/src/calibre/ebooks/lit/oeb.py
@ -0,0 +1,690 @@
 from __future__ import with_statement
 import os
 import sys
 from collections import defaultdict
 from types import StringTypes
 from itertools import izip, count
 from urlparse import urldefrag
 from lxml import etree
 XML_PARSER = etree.XMLParser(
    remove_blank_text=True, recover=True, resolve_entities=False)
 XHTML_NS = 'http://www.w3.org/1999/xhtml'
 OPF1_NS = 'http://openebook.org/namespaces/oeb-package/1.0/'
 OPF2_NS = 'http://www.idpf.org/2007/opf'
 DC09_NS = 'http://purl.org/metadata/dublin_core'
 DC10_NS = 'http://purl.org/dc/elements/1.0/'
 DC11_NS = 'http://purl.org/dc/elements/1.1/'
 XSI_NS = 'http://www.w3.org/2001/XMLSchema-instance'
 DCTERMS_NS = 'http://purl.org/dc/terms/'
 NCX_NS = 'http://www.daisy.org/z3986/2005/ncx/'
 XPNSMAP = {'h': XHTML_NS, 'o1': OPF1_NS, 'o2': OPF2_NS,
           'd09': DC09_NS, 'd10': DC10_NS, 'd11': DC11_NS,
           'xsi': XSI_NS, 'dt': DCTERMS_NS, 'ncx': NCX_NS}
 def XHTML(name): return '{%s}%s' % (XHTML_NS, name)
 def OPF(name): return '{%s}%s' % (OPF2_NS, name)
 def DC(name): return '{%s}%s' % (DC11_NS, name)
 def NCX(name): return '{%s}%s' % (NCX_NS, name)
 XHTML_MIME = 'application/xhtml+xml'
 CSS_MIME = 'text/css'
 NCX_MIME = 'application/x-dtbncx+xml'
 OPF_MIME = 'application/oebps-package+xml'
 OEB_STYLES = set([CSS_MIME, 'text/x-oeb1-css', 'text/x-oeb-css'])
 OEB_DOCS = set([XHTML_MIME, 'text/html', 'text/x-oeb1-document',
                'text/x-oeb-document'])
 def element(parent, *args, **kwargs):
    if parent is not None:
        return etree.SubElement(parent, *args, **kwargs)
    return etree.Element(*args, **kwargs)
 def namespace(name):
    if '}' in name:
        return name.split('}', 1)[0][1:]
    return ''
 def barename(name):
    if '}' in name:
        return name.split('}', 1)[1]
    return name
 def xpath(elem, expr):
    return elem.xpath(expr, namespaces=XPNSMAP)
 class AbstractContainer(object):
    def read_xml(self, path):
        return etree.fromstring(
            self.read(path), parser=XML_PARSER,
            base_url=os.path.dirname(path))
 class DirContainer(AbstractContainer):
    def __init__(self, rootdir):
        self.rootdir = rootdir
    def read(self, path):
        path = os.path.join(self.rootdir, path)
        with open(path, 'rb') as f:
            return f.read()
    def write(self, path, data):
        path = os.path.join(self.rootdir, path)
        with open(path, 'wb') as f:
            return f.write(data)
 class Metadata(object):
    TERMS = set(['contributor', 'coverage', 'creator', 'date', 'description',
                 'format', 'identifier', 'language', 'publisher', 'relation',
                 'rights', 'source', 'subject', 'title', 'type'])
    OPF1_NSMAP = {'dc': DC11_NS, 'oebpackage': OPF1_NS}
    OPF2_NSMAP = {'opf': OPF2_NS, 'dc': DC11_NS, 'dcterms': DCTERMS_NS,
                  'xsi': XSI_NS}
    class Item(object):
        def __init__(self, term, value, fq_attrib={}):
            if term == OPF('meta') and not value:
                fq_attrib = dict(fq_attrib)
                term = fq_attrib.pop('name')
                value = fq_attrib.pop('content')
            elif term in Metadata.TERMS and not namespace(term):
                term = DC(term)
            self.term = term
            self.value = value
            self.fq_attrib = dict(fq_attrib)
            self.attrib = attrib = {}
            for fq_attr in fq_attrib:
                attr = barename(fq_attr)
                attrib[attr] = fq_attrib[fq_attr]
        def __getattr__(self, name):
            name = name.replace('_', '-')
            try:
                return self.attrib[name]
            except KeyError:
                raise AttributeError(
                    '%r object has no attribute %r' \
                        % (self.__class__.__name__, name))
        def __repr__(self):
            return 'Item(term=%r, value=%r, attrib=%r)' \
                % (barename(self.term), self.value, self.attrib)
        def __str__(self):
            return str(self.value)
        def __unicode__(self):
            return unicode(self.value)
        def to_opf1(self, dcmeta=None, xmeta=None):
            if namespace(self.term) == DC11_NS:
                name = DC(barename(self.term).title())
                elem = element(dcmeta, name, attrib=self.attrib)
                elem.text = self.value
            else:
                elem = element(xmeta, 'meta', attrib=self.attrib)
                elem.attrib['name'] = self.term
                elem.attrib['content'] = self.value
            return elem
        def to_opf2(self, parent=None):
            if namespace(self.term) == DC11_NS:
                elem = element(parent, self.term, attrib=self.fq_attrib)
                elem.text = self.value
            else:
                elem = element(parent, OPF('meta'), attrib=self.fq_attrib)
                elem.attrib['name'] = self.term
                elem.attrib['content'] = self.value
            return elem
    def __init__(self, oeb):
        self.oeb = oeb
        self.items = defaultdict(list)
    def add(self, term, value, attrib):
        item = self.Item(term, value, attrib)
        items = self.items[barename(term)]
        items.append(item)
        return item
    def iterkeys(self):
        for key in self.items:
            yield key
    __iter__ = iterkeys
    def __getitem__(self, key):
        return self.items[key]
    def __contains__(self, key):
        return key in self.items
    def __getattr__(self, term):
        return self.items[term]
    def to_opf1(self, parent=None):
        elem = element(parent, 'metadata')
        dcmeta = element(elem, 'dc-metadata', nsmap=self.OPF1_NSMAP)
        xmeta = element(elem, 'x-metadata')
        for term in self.items:
            for item in self.items[term]:
                item.to_opf1(dcmeta, xmeta)
        if 'ms-chaptertour' not in self.items:
            chaptertour = self.Item('ms-chaptertour', 'chaptertour')
            chaptertour.to_opf1(dcmeta, xmeta)
        return elem
    def to_opf2(self, parent=None):
        elem = element(parent, OPF('metadata'), nsmap=self.NSMAP)
        for term in self.items:
            for item in self.items[term]:
                item.to_opf2(elem)
        return elem
 class Manifest(object):
    class Item(object):
        def __init__(self, id, href, media_type, loader=str):
            self.id = id
            self.href = self.path = href.replace('%20', ' ')
            self.media_type = media_type
            self.spine_position = None
            self.linear = True
            self._loader = loader
            self._data = None
        def __repr__(self):
            return 'Item(id=%r, href=%r, media_type=%r)' \
                % (self.id, self.href, self.media_type)
        def data():
            def fget(self):
                if self._data:
                    return self._data
                data = self._loader(self.href)
                if self.media_type == XHTML_MIME:
                    data = etree.fromstring(data, parser=XML_PARSER)
                    if namespace(data.tag) != XHTML_NS:
                        data.attrib['xmlns'] = XHTML_NS
                        data = etree.tostring(data)
                        data = etree.fromstring(data, parser=XML_PARSER)
                elif self.media_type.startswith('application/') \
                     and self.media_type.endswith('+xml'):
                    data = etree.fromstring(data, parser=XML_PARSER)
                return data
            def fset(self, value):
                self._data = value
            def fdel(self):
                self._data = None
            return property(fget, fset, fdel)
        data = data()
        def __cmp__(self, other):
            result = cmp(self.spine_position, other.spine_position)
            if result != 0:
                return result
            return cmp(self.id, other.id)
    def __init__(self, oeb):
        self.oeb = oeb
        self.items = {}
        self.hrefs = {}
    def add(self, id, href, media_type):
        item = self.Item(id, href, media_type, self.oeb.container.read)
        self.items[id] = item
        self.hrefs[href] = item
        return item
    def remove(self, id):
        href = self.items[id].href
        del self.items[id]
        del self.hrefs[href]
    def __iter__(self):
        for id in self.items:
            yield id
    def __getitem__(self, id):
        return self.items[id]
    def values(self):
        for item in self.items.values():
            yield item
    def items(self):
        for id, item in self.refs.items():
            yield id, items
    def __contains__(self, key):
        return id in self.items
    def to_opf1(self, parent=None):
        elem = element(parent, 'manifest')
        for item in self.items.values():
            attrib = {'id': item.id, 'href': item.href,
                      'media-type': item.media_type}
            element(elem, 'item', attrib=attrib)
        return elem            
    def to_opf2(self, parent=None):
        elem = element(parent, OPF('manifest'))
        for item in self.items.values():
            attrib = {'id': item.id, 'href': item.href,
                      'media-type': item.media_type}
            element(elem, OPF('item'), attrib=attrib)
        return elem
 class Spine(object):
    def __init__(self, oeb):
        self.oeb = oeb
        self.items = []
    def add(self, item, linear):
        if isinstance(linear, StringTypes):
            linear = linear.lower()
        if linear is None or linear in ('yes', 'true'):
            linear = True
        elif linear in ('no', 'false'):
            linear = False
        item.linear = linear
        item.spine_position = len(self.items)
        self.items.append(item)
        return item
    def __iter__(self):
        for item in self.items:
            yield item
    def __getitem__(self, index):
        return self.items[index]
    def __len__(self):
        return len(self.items)
    def __contains__(self, item):
        return (item in self.items)
    def to_opf1(self, parent=None):
        elem = element(parent, 'spine')
        for item in self.items:
            if item.linear:
                element(elem, 'itemref', attrib={'idref': item.id})
        return elem
    def to_opf2(self, parent=None):
        elem = element(parent, OPF('spine'))
        for item in self.items:
            attrib = {'idref': item.id}
            if not item.linear:
                attrib['linear'] = 'no'
            element(elem, OPF('itemref'), attrib=attrib)
        return elem
 class Guide(object):
    class Reference(object):
        def __init__(self, type, title, href):
            self.type = type
            self.title = title
            self.href = href
        def __repr__(self):
            return 'Reference(type=%r, title=%r, href=%r)' \
                % (self.type, self.title, self.href)
    def __init__(self, oeb):
        self.oeb = oeb
        self.refs = {}
    def add(self, type, title, href):
        ref = self.Reference(type, title, href)
        self.refs[type] = ref
        return ref
    def by_type(self, type):
        return self.ref_types[type]
    def iterkeys(self):
        for type in self.refs:
            yield type
    __iter__ = iterkeys
    def values(self):
        for ref in self.refs.values():
            yield ref
    def items(self):
        for type, ref in self.refs.items():
            yield type, ref
    def __getitem__(self, index):
        return self.refs[index]
    def __contains__(self, key):
        return key in self.refs
    def to_opf1(self, parent=None):
        elem = element(parent, 'guide')
        for ref in self.refs.values():
            attrib = {'type': ref.type, 'href': ref.href}
            if ref.title:
                attrib['title'] = ref.title
            element(elem, 'reference', attrib=attrib)
        return elem
    def to_opf2(self, parent=None):
        elem = element(parent, OPF('guide'))
        for ref in self.refs.values():
            attrib = {'type': ref.type, 'href': ref.href}
            if ref.title:
                attrib['title'] = ref.title
            element(elem, OPF('reference'), attrib=attrib)
        return elem
 class Toc(object):
    def __init__(self, title=None, href=None, klass=None, id=None):
        self.title = title
        self.href = href
        self.klass = klass
        self.id = id
        self.nodes = []
    def add(self, title, href, klass=None, id=None):
        node = Toc(title, href, klass, id)
        self.nodes.append(node)
        return node
    def __iter__(self):
        for node in self.nodes:
            yield node
    def __getitem__(self, index):
        return self.nodes[index]
    def depth(self, level=0):
        if self.nodes:
            return self.nodes[0].depth(level+1)
        return level
    def to_opf1(self, tour):
        for node in self.nodes:
            element(tour, 'site',
                attrib={'title': node.title, 'href': node.href})
            node.to_opf1(tour)
        return tour
    def to_ncx(self, parent, playorder=None, depth=1):
        if not playorder: playorder = [0]
        for node in self.nodes:
            playorder[0] += 1
            point = etree.SubElement(parent,
                NCX('navPoint'), attrib={'playOrder': str(playorder[0])})
            if self.klass:
                point.attrib['class'] = self.klass
            if self.id:
                point.attrib['id'] = self.id
            label = etree.SubElement(point, NCX('navLabel'))
            etree.SubElement(label, NCX('text')).text = node.title
            href = node.href if depth > 1 else node.href.split('#', 1)[0]
            etree.SubElement(point, NCX('content'), attrib={'src': href})
            node.to_ncx(point, playorder, depth+1)
        return parent
 class Oeb(object):
    def __init__(self, opfpath, container=None):
        if not container:
            container = DirContainer(os.path.dirname(opfpath))
            opfpath = os.path.basename(opfpath)
        self.container = container
        opf = self._read_opf(opfpath)
        self._all_from_opf(opf)
    def _convert_opf1(self, opf):
        nroot = etree.Element(OPF('package'),
            nsmap={None: OPF2_NS}, version="2.0", **dict(opf.attrib))
        metadata = etree.SubElement(nroot, OPF('metadata'),
            nsmap={'opf': OPF2_NS, 'dc': DC11_NS,
                   'xsi': XSI_NS, 'dcterms': DCTERMS_NS})
        for prefix in ('d11', 'd10', 'd09'):
            elements = xpath(opf, 'metadata/dc-metadata/%s:*' % prefix)
            if elements: break
        for element in elements:
            if not element.text: continue
            tag = barename(element.tag).lower()
            element.tag = '{%s}%s' % (DC11_NS, tag)
            for name in element.attrib:
                if name in ('role', 'file-as', 'scheme'):
                    nsname = '{%s}%s' % (OPF2_NS, name)
                    element.attrib[nsname] = element.attrib[name]
                    del element.attrib[name]
            metadata.append(element)
        for element in opf.xpath('metadata/x-metadata/meta'):
            metadata.append(element)
        for item in opf.xpath('manifest/item'):
            media_type = item.attrib['media-type']
            if media_type in OEB_DOCS:
                media_type = XHTML_MIME
            elif media_type in OEB_STYLES:
                media_type = CSS_MIME
            item.attrib['media-type'] = media_type
        for tag in ('manifest', 'spine', 'tours', 'guide'):
            for element in opf.xpath(tag):
                nroot.append(element)
        return etree.fromstring(etree.tostring(nroot), parser=XML_PARSER)
    def _read_opf(self, opfpath):
        opf = self.container.read_xml(opfpath)
        version = float(opf.get('version', 1.0))
        if version < 2.0:
            opf = self._convert_opf1(opf)
        return opf
    def _metadata_from_opf(self, opf):
        uid = opf.attrib['unique-identifier']
        self.metadata = metadata = Metadata(self)        
        for elem in xpath(opf, '/o2:package/o2:metadata/*'):
            metadata.add(elem.tag, elem.text, elem.attrib)
        for item in metadata.identifier:
            if item.id == uid:
                self.uid = item
                break
    def _manifest_from_opf(self, opf):
        self.manifest = manifest = Manifest(self)
        for elem in xpath(opf, '/o2:package/o2:manifest/o2:item'):
            manifest.add(elem.get('id'), elem.get('href'),
                         elem.get('media-type'))
    def _spine_from_opf(self, opf):
        self.spine = spine = Spine(self)
        for elem in xpath(opf, '/o2:package/o2:spine/o2:itemref'):
            item = self.manifest[elem.get('idref')]
            spine.add(item, elem.get('linear'))
        extras = []
        for item in self.manifest.values():
            if item.media_type == XHTML_MIME \
               and item not in spine:
                extras.append(item)
        extras.sort()
        for item in extras:
            spine.add(item, False)
    def _guide_from_opf(self, opf):
        self.guide = guide = Guide(self)
        for elem in xpath(opf, '/o2:package/o2:guide/o2:reference'):
            guide.add(elem.get('type'), elem.get('title'), elem.get('href'))
    def _toc_from_navpoint(self, toc, navpoint):
        children = xpath(navpoint, 'ncx:navPoint')
        for child in children:
            title = xpath(child, 'ncx:navLabel/ncx:text/text()')[0]
            href = xpath(child, 'ncx:content/@src')[0]
            id = child.get('id')
            klass = child.get('class')
            node = toc.add(title, href, id=id, klass=klass)
            self._toc_from_navpoint(node, child)
    def _toc_from_ncx(self, opf):
        result = xpath(opf, '/o2:package/o2:spine/@toc')
        if not result:
            return False
        id = result[0]
        ncx = self.manifest[id].data
        self.manifest.remove(id)
        title = xpath(ncx, 'ncx:docTitle/ncx:text/text()')[0]
        self.toc = toc = Toc(title)
        navmaps = xpath(ncx, 'ncx:navMap')
        for navmap in navmaps:
            self._toc_from_navpoint(toc, navmap)
        return True
    def _toc_from_tour(self, opf):
        result = xpath(opf, '/o2:package/o2:tours/o2:tour')
        if not result:
            return False
        tour = result[0]
        self.toc = toc = Toc(tour.get('title'))
        sites = xpath(tour, 'o2:site')
        for site in sites:
            toc.add(site.get('title'), site.get('href'))
        return True
    def _toc_from_html(self, opf):
        if 'toc' not in self.guide:
            return False
        self.toc = toc = Toc()
        itempath, frag = urldefrag(self.guide['toc'].href)
        item = self.manifest.hrefs[itempath]
        html = item.data
        if frag:
            elem = xpath(html, './/*[@id="%s"]' % frag)
            html = elem[0] if elem else html
        titles = defaultdict(list)
        order = []
        for anchor in xpath(html, './/h:a[@href]'):
            href = anchor.attrib['href']
            path, frag = urldefrag(href)
            if not path:
                href = '#'.join((itempath, frag))
            title = ' '.join(xpath(anchor, './/text()'))
            if href not in titles:
                order.append(href)
            titles[href].append(title)
        for href in order:
            toc.add(' '.join(titles[href]), href)
        return True
    def _toc_from_spine(self, opf):
        self.toc = toc = Toc()
        titles = []
        headers = []
        for item in self.spine:
            if not item.linear: continue
            html = item.data
            title = xpath(html, '/h:html/h:head/h:title/text()')
            if title: titles.append(title[0])
            headers.append('(unlabled)')
            for tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'strong'):
                expr = '/h:html/h:body//h:%s[position()=1]/text()' % (tag,)
                header = xpath(html, expr)
                if header:
                    headers[-1] = header[0]
                    break
        use = titles
        if len(titles) > len(set(titles)):
            use = headers
        for title, item in izip(use, self.spine):
            if not item.linear: continue
            toc.add(title, item.href)
        return True
    def _toc_from_opf(self, opf):
        if self._toc_from_ncx(opf): return
        if self._toc_from_tour(opf): return
        if self._toc_from_html(opf): return
        self._toc_from_spine(opf)
    def _all_from_opf(self, opf):
        self._metadata_from_opf(opf)
        self._manifest_from_opf(opf)
        self._spine_from_opf(opf)
        self._guide_from_opf(opf)
        self._toc_from_opf(opf)
    def to_opf1(self):
        package = etree.Element('package',
            attrib={'unique-identifier': self.uid.id})
        metadata = self.metadata.to_opf1(package)
        manifest = self.manifest.to_opf1(package)
        spine = self.spine.to_opf1(package)
        tours = element(package, 'tours')
        tour = element(tours, 'tour',
            attrib={'id': 'chaptertour', 'title': 'Chapter Tour'})
        self.toc.to_opf1(tour)
        guide = self.guide.to_opf1(package)
        return {OPF_MIME: ('content.opf', package)}
    def _generate_ncx_item(self):
        id = 'ncx'
        index = 0
        while id in self.manifest:
            id = 'ncx' + str(index)
            index = index + 1
        href = 'toc'
        index = 0
        while (href + '.ncx') in self.manifest.hrefs:
            href = 'toc' + str(index)
        href += '.ncx'
        return (id, href)
    def _to_ncx(self):
        ncx = etree.Element(NCX('ncx'), attrib={'version': '2005-1'},
                            nsmap={None: NCX_NS})
        head = etree.SubElement(ncx, NCX('head'))
        etree.SubElement(head, NCX('meta'),
            attrib={'name': 'dtb:uid', 'content': unicode(self.uid)})
        etree.SubElement(head, NCX('meta'),
            attrib={'name': 'dtb:depth', 'content': str(self.toc.depth())})
        etree.SubElement(head, NCX('meta'),
            attrib={'name': 'dtb:totalPageCount', 'content': '0'})
        etree.SubElement(head, NCX('meta'),
            attrib={'name': 'dtb:maxPageNumber', 'content': '0'})
        title = etree.SubElement(ncx, NCX('docTitle'))
        text = etree.SubElement(title, NCX('text'))
        text.text = unicode(self.metadata.title[0])
        navmap = etree.SubElement(ncx, NCX('navMap'))
        self.toc.to_ncx(navmap)
        return ncx
    def to_opf2(self):
        package = etree.Element(OPF('package'),
            attrib={'version': '2.0', 'unique-identifier': self.uid.id},
            nsmap={None: OPF2_NS})
        metadata = self.metadata.to_opf2(package)
        manifest = self.manifest.to_opf2(package)
        id, href = self._generate_ncx_item()
        etree.SubElement(manifest, OPF('item'),
            attrib={'id': id, 'href': href, 'media-type': NCX_MIME})
        spine = self.spine.to_opf2(package)
        spine.attrib['toc'] = id
        guide = self.guide.to_opf2(package)
        ncx = self._to_ncx()
        return {OPF_MIME: ('content.opf', package),
                NCX_MIME: (href, ncx)}
 def main(argv=sys.argv):
    for arg in argv[1:]:
        oeb = Oeb(arg)
        for name, doc in oeb.to_opf2().items():
            print etree.tostring(doc, pretty_print=True)
    return 0
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/lit/split.py
+++ b/src/calibre/ebooks/lit/split.py
@ -0,0 +1,149 @@
 #! /usr/bin/python
 from __future__ import with_statement
 import sys
 import os
 import re
 import types
 import copy
 import itertools
 from collections import defaultdict
 from lxml import etree
 from stylizer import Page, Stylizer, Style
 XHTML_NS = 'http://www.w3.org/1999/xhtml'
 XPNSMAP = {'h': XHTML_NS,}
 class Splitter(object):
    XML_PARSER = etree.XMLParser(remove_blank_text=True)
    COLLAPSE = re.compile(r'[ \n\r]+')
    CONTENT_TAGS = set(['img', 'object', 'embed'])
    for tag in list(CONTENT_TAGS):
        CONTENT_TAGS.add('{%s}%s' % (XHTML_NS, tag))
    def __init__(self, path):
        with open(path, 'rb') as f:
            self.tree = etree.parse(f, parser=self.XML_PARSER)
        self.stylizer = Stylizer(self.tree, path)
        self.path = path
        self.basename = os.path.splitext(
            os.path.basename(path))[0].lower()
        self.splits = []
        self.names = []
        self.idmap = {}
        self.fonts = defaultdict(int)
        self.content = False
    def split(self):
        tree = self.tree
        for prefix in ('', 'h:'):
            d = {'h': prefix}
            roots = tree.xpath('/%(h)shtml' % d, namespaces=XPNSMAP)
            if roots: break
        self.root, = roots
        self.head, = tree.xpath('/%(h)shtml/%(h)shead' % d, namespaces=XPNSMAP)
        body, = tree.xpath('/%(h)shtml/%(h)sbody' % d, namespaces=XPNSMAP)
        self._split(body, [self.new_root(str(self.basename))], 9.0)
        results = zip(self.names, self.splits)
        self.post_process_links(results, d)
        return results
    def new_root(self, name):
        nroot = self.dup(self.root)
        nroot.append(copy.deepcopy(self.head))
        self.splits.append(nroot)
        self.names.append(name + '.html')
        return nroot
    def dup(self, e):
        new = etree.Element(e.tag, nsmap=e.nsmap, **dict(e.attrib))
        new.text = e.text
        new.tail = e.tail
        return new
    def dupsub(self, p, e):
        new = etree.SubElement(p, e.tag, nsmap=e.nsmap, **dict(e.attrib))
        new.text = e.text
        new.tail = e.tail
        return new
    def _split(self, src, dstq, psize):
        style = self.stylizer.style(src)
        if self.new_page(style, 'before'):
            self.new_split(src, dstq)
        attrib = src.attrib
        name = self.names[-1]
        for aname in ('id', 'name'):
            if aname in attrib:
                self.idmap[attrib[aname]] = name
        text = self.COLLAPSE.sub(' ', src.text or '')
        tail = self.COLLAPSE.sub(' ', src.text or '')
        if text or tail or src.tag.lower() in self.CONTENT_TAGS:
            self.content = True
        size = style['font-size']
        self.fonts[size] += len(text)
        self.fonts[psize] += len(tail)
        new = self.dupsub(dstq[-1], src)
        if len(src) > 0:
            dstq.append(new)
            for child in src:
                self._split(child, dstq, size)
            dstq.pop()
        if self.new_page(style, 'after'):
            self.new_split(src, dstq)
    def new_page(self, style, when):
        if self.content \
                and (style['page-break-%s' % when] \
                         in ('always', 'odd', 'even')):
            return True
        return False
    def new_split(self, src, dstq):
        name = self.basename
        attrib = src.attrib
        if 'class' in attrib:
            name = src.attrib['class']            
            if ' ' in name:
                name = name.split(' ', 2)[0]
        if 'id' in attrib:
            name = '%s-%s' % (name, attrib['id'])
        name = name.lower().replace('_', '-')
        if (name + '.html') in self.names:
            name = '%s-%02d' % (name, len(self.names))
        prev = None
        for i in xrange(len(dstq)):
            new = self.new_root(name) if prev is None \
                else self.dupsub(prev, dstq[i])
            prev = dstq[i] = new
        self.content = False
    def post_process_links(self, results, prefixes):
        basename = os.path.basename(self.path)
        query = '//%(h)sa[@href]' % prefixes
        for name, root in results:
            elements = root.xpath(query, namespaces=XPNSMAP)
            for element in elements:
                href = element.attrib['href']
                if '#' not in href: continue
                fname, id = href.split('#', 2)
                if fname in ('', basename):
                    href = '#'.join((self.idmap[id], id))
                    element.attrib['href'] = href
 def main():
    def xml2str(root):
        return etree.tostring(root, pretty_print=True,
                              encoding='utf-8', xml_declaration=True)
    tree = None
    path = sys.argv[1]
    dest = sys.argv[2]
    splitter = Splitter(path)
    for name, root in splitter.split():
        print name
        with open(os.path.join(dest, name), 'wb') as f:
            f.write(xml2str(root))
    return 0
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/lit/stylizer.py
+++ b/src/calibre/ebooks/lit/stylizer.py
@ -0,0 +1,435 @@
 #! /usr/bin/python2.5
 # -*- encoding: utf-8 -*-
 from __future__ import with_statement
 import sys
 import os
 import locale
 import codecs
 import itertools
 import types
 import re
 import copy
 import cssutils
 from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \
    CSSValueList, cssproperties
 from lxml import etree
 from calibre.ebooks.lit.oeb import XHTML_NS, CSS_MIME, OEB_STYLES, barename
 from calibre.resources import html_css
 HTML_CSS_STYLESHEET = cssutils.parseString(html_css)
 XHTML_CSS_NAMESPACE = "@namespace url(http://www.w3.org/1999/xhtml);\n"
 INHERITED = set(['azimuth', 'border-collapse', 'border-spacing',
                 'caption-side', 'color', 'cursor', 'direction', 'elevation',
                 'empty-cells', 'font-family', 'font-size', 'font-style',
                 'font-variant', 'font-weight', 'letter-spacing',
                 'line-height', 'list-style-image', 'list-style-position',
                 'list-style-type', 'orphans', 'page-break-inside',
                 'pitch-range', 'pitch', 'quotes', 'richness', 'speak-header',
                 'speak-numeral', 'speak-punctuation', 'speak', 'speech-rate',
                 'stress', 'text-align', 'text-indent', 'text-transform',
                 'visibility', 'voice-family', 'volume', 'white-space',
                 'widows', 'word-spacing'])
 DEFAULTS = {'azimuth': 'center', 'background-attachment': 'scroll',
            'background-color': 'transparent', 'background-image': 'none',
            'background-position': '0% 0%', 'background-repeat': 'repeat',
            'border-bottom-color': ':color', 'border-bottom-style': 'none',
            'border-bottom-width': 'medium', 'border-collapse': 'separate',
            'border-left-color': ':color', 'border-left-style': 'none',
            'border-left-width': 'medium', 'border-right-color': ':color',
            'border-right-style': 'none', 'border-right-width': 'medium',
            'border-spacing': 0, 'border-top-color': ':color',
            'border-top-style': 'none', 'border-top-width': 'medium', 'bottom':
            'auto', 'caption-side': 'top', 'clear': 'none', 'clip': 'auto',
            'color': 'black', 'content': 'normal', 'counter-increment': 'none',
            'counter-reset': 'none', 'cue-after': 'none', 'cue-before': 'none',
            'cursor': 'auto', 'direction': 'ltr', 'display': 'inline',
            'elevation': 'level', 'empty-cells': 'show', 'float': 'none',
            'font-family': 'serif', 'font-size': 'medium', 'font-style':
            'normal', 'font-variant': 'normal', 'font-weight': 'normal',
            'height': 'auto', 'left': 'auto', 'letter-spacing': 'normal',
            'line-height': 'normal', 'list-style-image': 'none',
            'list-style-position': 'outside', 'list-style-type': 'disc',
            'margin-bottom': 0, 'margin-left': 0, 'margin-right': 0,
            'margin-top': 0, 'max-height': 'none', 'max-width': 'none',
            'min-height': 0, 'min-width': 0, 'orphans': '2',
            'outline-color': 'invert', 'outline-style': 'none',
            'outline-width': 'medium', 'overflow': 'visible', 'padding-bottom':
            0, 'padding-left': 0, 'padding-right': 0, 'padding-top': 0,
            'page-break-after': 'auto', 'page-break-before': 'auto',
            'page-break-inside': 'auto', 'pause-after': 0, 'pause-before':
            0, 'pitch': 'medium', 'pitch-range': '50', 'play-during': 'auto',
            'position': 'static', 'quotes': u"'“' '”' '‘' '’'", 'richness':
            '50', 'right': 'auto', 'speak': 'normal', 'speak-header': 'once',
            'speak-numeral': 'continuous', 'speak-punctuation': 'none',
            'speech-rate': 'medium', 'stress': '50', 'table-layout': 'auto',
            'text-align': 'left', 'text-decoration': 'none', 'text-indent':
            0, 'text-transform': 'none', 'top': 'auto', 'unicode-bidi':
            'normal', 'vertical-align': 'baseline', 'visibility': 'visible',
            'voice-family': 'default', 'volume': 'medium', 'white-space':
            'normal', 'widows': '2', 'width': 'auto', 'word-spacing': 'normal',
            'z-index': 'auto'}
 FONT_SIZE_NAMES = set(['xx-small', 'x-small', 'small', 'medium', 'large',
                       'x-large', 'xx-large'])
 FONT_SIZE_LIST = [('xx-small', 1,     6.),
                  ('x-small',  None,  7.),
                  ('small',    2,     8.),
                  ('medium',   3,     9.),
                  ('large',    4,    11.),
                  ('x-large',  5,    13.),
                  ('xx-large', 6,    15.),
                  (None,       7,    17.)]
 FONT_SIZE_BY_NAME = {}
 FONT_SIZE_BY_NUM = {}
 for name, num, size in FONT_SIZE_LIST:
    FONT_SIZE_BY_NAME[name] = size
    FONT_SIZE_BY_NUM[num] = size
 XPNSMAP = {'h': XHTML_NS,}
 def xpath(elem, expr):
    return elem.xpath(expr, namespaces=XPNSMAP)
 class Page(object):
    def __init__(self, width, height, dpi):
        self.width = float(width)
        self.height = float(height)
        self.dpi = float(dpi)
 class Profiles(object):
    PRS500 = Page(584, 754, 168.451)
    PRS505 = PRS500
 class Stylizer(object):    
    STYLESHEETS = {}
    def __init__(self, tree, path, oeb, page=Profiles.PRS505):
        self.page = page
        base = os.path.dirname(path)
        basename = os.path.basename(path)
        cssname = os.path.splitext(basename)[0] + '.css'
        stylesheets = [HTML_CSS_STYLESHEET]
        head = xpath(tree, '/h:html/h:head')[0] 
        for elem in head:
            tag = barename(elem.tag)
            if tag == 'style':
                text = ''.join(elem.text)
                stylesheet = cssutils.parseString(text, href=cssname)
                stylesheets.append(stylesheet)
            elif tag == 'link' \
                 and elem.get('rel', 'stylesheet') == 'stylesheet' \
                 and elem.get('type', CSS_MIME) in OEB_STYLES:
                href = elem.attrib['href']
                path = os.path.join(base, href)
                path = os.path.normpath(path).replace('\\', '/')
                if path in self.STYLESHEETS:
                    stylesheet = self.STYLESHEETS[path]
                else:
                    data = XHTML_CSS_NAMESPACE
                    data += oeb.manifest.hrefs[path].data
                    stylesheet = cssutils.parseString(data, href=path)
                    self.STYLESHEETS[path] = stylesheet
                stylesheets.append(stylesheet)
        rules = []
        index = 0
        self.stylesheets = set()
        for stylesheet in stylesheets:
            href = stylesheet.href
            self.stylesheets.add(href)
            for rule in stylesheet.cssRules:
                rules.extend(self.flatten_rule(rule, href, index))
                index = index + 1
        rules.sort()
        self.rules = rules
        self._styles = {}
    def flatten_rule(self, rule, href, index):
        results = []
        if isinstance(rule, CSSStyleRule):
            style = self.flatten_style(rule.style)
            for selector in rule.selectorList:
                specificity = selector.specificity + (index,)
                text = selector.selectorText
                selector = list(selector.seq)
                results.append((specificity, selector, style, text, href))
        elif isinstance(rule, CSSPageRule):
            style = self.flatten_style(rule.style)
            results.append(((0, 0, 0, 0), [], style, '@page', href))
        return results
    def flatten_style(self, cssstyle):
        style = {}
        for prop in cssstyle:
            name = prop.name
            if name in ('margin', 'padding'):
                style.update(self._normalize_edge(prop.cssValue, name))
            elif name == 'font':
                style.update(self._normalize_font(prop.cssValue))
            else:
                style[name] = prop.value
        if 'font-size' in style:
            size = style['font-size']
            if size == 'normal': size = 'medium'
            if size in FONT_SIZE_NAMES:
                style['font-size'] = "%dpt" % FONT_SIZE_BY_NAME[size]
        return style
    def _normalize_edge(self, cssvalue, name):
        style = {}
        if isinstance(cssvalue, CSSValueList):
            primitives = [v.cssText for v in cssvalue]
        else:
            primitives = [cssvalue.cssText]
        if len(primitives) == 1:
            value, = primitives
            values = [value, value, value, value]
        elif len(primitives) == 2:
            vert, horiz = primitives
            values = [vert, horiz, vert, horiz]
        elif len(primitives) == 3:
            top, horiz, bottom = primitives
            values = [top, horiz, bottom, horiz]
        else:
            values = primitives[:4]
        edges = ('top', 'right', 'bottom', 'left')
        for edge, value in itertools.izip(edges, values):
            style["%s-%s" % (name, edge)] = value
        return style
    def _normalize_font(self, cssvalue):
        composition = ('font-style', 'font-variant', 'font-weight',
                       'font-size', 'line-height', 'font-family')
        style = {}
        if cssvalue.cssText == 'inherit':
            for key in composition:
                style[key] = 'inherit'
        else:
            primitives = [v.cssText for v in cssvalue]
            primitites.reverse()
            value = primitives.pop()
            for key in composition:
                if cssproperties.cssvalues[key](value):
                    style[key] = value
                    if not primitives: break
                    value = primitives.pop()
            for key in composition:
                if key not in style:
                    style[key] = DEFAULTS[key]
        return style
    def style(self, element):
        try: return self._styles[element]
        except: pass
        return Style(element, self)
    def stylesheet(self, name, font_scale=None):
        rules = []
        for _, _, style, selector, href in self.rules:
            if href != name: continue
            if font_scale and 'font-size' in style and \
                    style['font-size'].endswith('pt'):
                style = copy.copy(style)
                size = float(style['font-size'][:-2])
                style['font-size'] = "%.2fpt" % (size * font_scale)
            style = ';\n    '.join(': '.join(item) for item in style.items())
            rules.append('%s {\n    %s;\n}' % (selector, style))
        return '\n'.join(rules)
 class Style(object):
    def __init__(self, element, stylizer):
        self._element = element
        self._page = stylizer.page
        self._stylizer = stylizer
        self._style = self._assemble_style(element, stylizer)
        stylizer._styles[element] = self
    def _assemble_style(self, element, stylizer):
        result = {}
        rules = stylizer.rules
        for _, selector, style, _, _ in rules:
            if self._selects_element(element, selector):
                result.update(style)
        try:
            style = CSSStyleDeclaration(element.attrib['style'])
            result.update(stylizer.flatten_style(style))
        except KeyError:
            pass
        return result
    def _selects_element(self, element, selector):
        def _selects_element(element, items, index):
            if index == -1:
                return True
            item = items[index]
            if item.type == 'universal':
                pass
            elif item.type == 'type-selector':
                name1 = ("{%s}%s" % item.value).lower()
                name2 = element.tag.lower()
                if name1 != name2:
                    return False
            elif item.type == 'id':
                name1 = item.value[1:].lower()
                name2 = element.attrib.get('id', '').lower().split()
                if name1 != name2:
                    return False
            elif item.type == 'class':
                name = item.value[1:].lower()
                classes = element.attrib.get('class', '').lower().split()
                if name not in classes:
                    return False
            elif item.type == 'child':
                parent = element.getparent()
                if parent is None:
                    return False
                element = parent
            elif item.type == 'descendant':
                element = element.getparent()
                while element is not None:
                    if _selects_element(element, items, index - 1):
                        return True
                    element = element.getparent()
                return False
            elif item.type == 'pseudo-class':
                if item.value == ':first-child':
                    e = element.getprevious()
                    if e is not None:
                        return False
                else:
                    return False
            elif item.type == 'pseudo-element':
                return False
            else:
                return False
            return _selects_element(element, items, index - 1)
        return _selects_element(element, selector, len(selector) - 1)
    def _has_parent(self):
        parent = self._element.getparent()
        return (parent is not None) \
            and (parent in self._stylizer._styles)
    def __getitem__(self, name):
        domname = cssproperties._toDOMname(name)
        if hasattr(self, domname):
            return getattr(self, domname)
        return self._unit_convert(self._get(name))
    def _get(self, name):
        result = None
        styles = self._stylizer._styles
        if name in self._style:
            result = self._style[name]
        if (result == 'inherit'
            or (result is None and name in INHERITED
                and self._has_parent())):
            result = styles[self._element.getparent()]._get(name)
        if result is None:
            result = DEFAULTS[name]
        return result
    def _unit_convert(self, value, base=None, font=None):
        if isinstance(value, (int, long, float)):
            return value
        try:
            if float(value) == 0:
                return 0.0
        except:
            pass
        result = value
        m = re.search(
            r"^(-*[0-9]*\.?[0-9]*)\s*(%|em|px|mm|cm|in|pt|pc)$", value)
        if m is not None and m.group(1):
            value = float(m.group(1))
            unit = m.group(2)
            if unit == '%':
                base = base or self.width
                result = (value/100.0) * base
            elif unit == 'px':
                result = value * 72.0 / self._page.dpi
            elif unit == 'in':
                result = value * 72.0
            elif unit == 'pt':
                result = value 
            elif unit == 'em':
                font = font or self.fontSize
                result = value * font
            elif unit == 'pc':
                result = value * 12.0
            elif unit == 'mm':
                result = value * 0.04
            elif unit == 'cm':
                result = value * 0.40
        return result
    @property
    def fontSize(self):
        def normalize_fontsize(value, base=None):
            result = None
            factor = None
            if value == 'inherit':
                value = 'medium'
            if value in FONT_SIZE_NAMES:
                result = FONT_SIZE_BY_NAME[value]
            elif value == 'smaller':
                factor = 1.0/1.2
                for _, _, size in FONT_SIZE_LIST:
                    if base <= size: break
                    factor = None
                    result = size
            elif value == 'larger':
                factor = 1.2
                for _, _, size in reversed(FONT_SIZE_LIST):
                    if base >= size: break
                    factor = None
                    result = size
            else:
                result = self._unit_convert(value, base=base, font=base)
                if result < 0:
                    result = normalize_fontsize("smaller", base)
            if factor:
                result = factor * base
            return result
        result = None
        if self._has_parent():
            styles = self._stylizer._styles
            base = styles[self._element.getparent()].fontSize
        else:
            base = normalize_fontsize(DEFAULTS['font-size'])
        if 'font-size' in self._style:
            size = self._style['font-size']
            result = normalize_fontsize(size, base)
        else:
            result = base
        self.__dict__['fontSize'] = result
        return result
    @property
    def width(self):
        result = None
        base = None
        if self._has_parent():
            styles = self._stylizer._styles
            base = styles[self._element.getparent()].width
        else:
            base = self._page.width
        if 'width' in self._style:
            width = self._style['width']
            if width == 'auto':
                result = base
            else:
                result = self._unit_convert(width, base=base)
        else:
            result = base
        self.__dict__['width'] = result
        return result
    def __str__(self):
        items = self._style.items()
        return '; '.join("%s: %s" % (key, val) for key, val in items)
--- a/src/calibre/ebooks/lit/writer.py
+++ b/src/calibre/ebooks/lit/writer.py
@ -0,0 +1,655 @@
 from __future__ import with_statement
 import sys
 import os
 from cStringIO import StringIO
 from struct import pack, unpack
 from itertools import izip, count
 import time
 import random
 import re
 import copy
 import uuid
 import functools
 from lxml import etree
 from calibre.ebooks.lit.reader import msguid, DirectoryEntry
 import calibre.ebooks.lit.maps as maps
 from calibre.ebooks.lit.oeb import CSS_MIME, OPF_MIME
 from calibre.ebooks.lit.oeb import Oeb, namespace, barename
 from calibre.ebooks.lit.stylizer import Stylizer
 from calibre.ebooks.lit.lzxcomp import Compressor
 import calibre
 from calibre import plugins
 msdes, msdeserror = plugins['msdes']
 import calibre.ebooks.lit.mssha1 as mssha1
 __all__ = ['LitWriter']
 def invert_tag_map(tag_map):
    tags, dattrs, tattrs = tag_map
    tags = dict((tags[i], i) for i in xrange(len(tags)))
    dattrs = dict((v, k) for k, v in dattrs.items())
    tattrs = [dict((v, k) for k, v in (map or {}).items()) for map in tattrs]
    for map in tattrs:
        if map: map.update(dattrs)
    tattrs[0] = dattrs
    return tags, tattrs
 OPF_MAP = invert_tag_map(maps.OPF_MAP)
 HTML_MAP = invert_tag_map(maps.HTML_MAP)
 LIT_MAGIC = 'ITOLITLS'
 LITFILE_GUID = "{0A9007C1-4076-11D3-8789-0000F8105754}"
 PIECE3_GUID = "{0A9007C3-4076-11D3-8789-0000F8105754}"
 PIECE4_GUID = "{0A9007C4-4076-11D3-8789-0000F8105754}"
 DESENCRYPT_GUID = "{67F6E4A2-60BF-11D3-8540-00C04F58C3CF}"
 LZXCOMPRESS_GUID = "{0A9007C6-4076-11D3-8789-0000F8105754}"
 def packguid(guid):
    values = guid[1:9], guid[10:14], guid[15:19], \
        guid[20:22], guid[22:24], guid[25:27], guid[27:29], \
        guid[29:31], guid[31:33], guid[33:35], guid[35:37]
    values = [int(value, 16) for value in values]
    return pack("<LHHBBBBBBBB", *values)
 FLAG_OPENING = (1 << 0)
 FLAG_CLOSING = (1 << 1)
 FLAG_BLOCK = (1 << 2)
 FLAG_HEAD = (1 << 3)
 FLAG_ATOM = (1 << 4)
 FLAG_CUSTOM  = (1 << 15)
 ATTR_NUMBER  = 0xffff
 PIECE_SIZE = 16
 PRIMARY_SIZE = 40
 SECONDARY_SIZE = 232
 DCHUNK_SIZE = 0x2000
 CCHUNK_SIZE = 0x0200
 ULL_NEG1 = 0xffffffffffffffff
 ROOT_OFFSET = 1284508585713721976
 ROOT_SIZE = 4165955342166943123
 BLOCK_CAOL = \
    "\x43\x41\x4f\x4c\x02\x00\x00\x00" \
    "\x50\x00\x00\x00\x37\x13\x03\x00" \
    "\x00\x00\x00\x00\x00\x20\x00\x00" \
    "\x00\x02\x00\x00\x00\x00\x10\x00" \
    "\x00\x00\x02\x00\x00\x00\x00\x00" \
    "\x00\x00\x00\x00\x00\x00\x00\x00"
 BLOCK_ITSF = \
    "\x49\x54\x53\x46\x04\x00\x00\x00" \
    "\x20\x00\x00\x00\x01\x00\x00\x00"
 MSDES_CONTROL = \
    "\x03\x00\x00\x00\x29\x17\x00\x00" \
    "\x01\x00\x00\x00\xa5\xa5\x00\x00"
 LZXC_CONTROL = \
    "\x07\x00\x00\x00\x4c\x5a\x58\x43" \
    "\x03\x00\x00\x00\x04\x00\x00\x00" \
    "\x04\x00\x00\x00\x02\x00\x00\x00" \
    "\x00\x00\x00\x00\x00\x00\x00\x00"
 COLLAPSE = re.compile(r'[ \r\n\v]+')
 def prefixname(name, nsrmap):
    prefix = nsrmap[namespace(name)]
    if not prefix:
        return barename(name)
    return ':'.join((prefix, barename(name)))
 def decint(value):
    bytes = []
    while True:
        b = value & 0x7f
        value >>= 7
        if bytes:
            b |= 0x80
        bytes.append(chr(b))
        if value == 0:
            break
    return ''.join(reversed(bytes))
 def randbytes(n):
    return ''.join(chr(random.randint(0, 255)) for x in xrange(n))
 class ReBinary(object):
    def __init__(self, root, path, oeb, map=HTML_MAP):
        self.dir = os.path.dirname(path)
        self.manifest = oeb.manifest
        self.tags, self.tattrs = map
        self.buf = StringIO()
        self.anchors = []
        self.page_breaks = []
        self.is_html  = is_html = map is HTML_MAP
        self.stylizer = Stylizer(root, path, oeb) if is_html else None
        self.tree_to_binary(root)
        self.content = self.buf.getvalue()
        self.ahc = self.build_ahc()
        self.aht = self.build_aht()
    def write(self, *values):
        for value in values:
            if isinstance(value, (int, long)):
                value = unichr(value)
            self.buf.write(value.encode('utf-8'))
    def tree_to_binary(self, elem, nsrmap={'': None}, parents=[],
                       inhead=False, preserve=False):
        if not isinstance(elem.tag, basestring):
            self.write(etree.tostring(elem))
            return
        nsrmap = copy.copy(nsrmap)
        attrib = dict(elem.attrib)
        style = self.stylizer.style(elem) if self.stylizer else None
        for key, value in elem.nsmap.items():
            if value not in nsrmap or nsrmap[value] != key:
                xmlns = ('xmlns:' + key) if key else 'xmlns'
                attrib[xmlns] = value
            nsrmap[value] = key
        tag = prefixname(elem.tag, nsrmap)
        tag_offset = self.buf.tell()
        if tag == 'head':
            inhead = True
        flags = FLAG_OPENING
        if not elem.text and len(elem) == 0:
            flags |= FLAG_CLOSING
        if inhead:
            flags |= FLAG_HEAD
        if style and style['display'] in ('block', 'table'):
            flags |= FLAG_BLOCK
        self.write(0, flags)
        tattrs = self.tattrs[0]
        if tag in self.tags:
            index = self.tags[tag]
            self.write(index)
            if self.tattrs[index]:
                tattrs = self.tattrs[index]
        else:
            self.write(FLAG_CUSTOM, len(tag)+1, tag)
        last_break = self.page_breaks[-1][0] if self.page_breaks else None
        if style and last_break != tag_offset \
           and style['page-break-before'] not in ('avoid', 'auto'):
            self.page_breaks.append((tag_offset, list(parents)))
        for attr, value in attrib.items():
            attr = prefixname(attr, nsrmap)
            if attr in ('href', 'src'):
                path, hash, frag = value.partition('#')
                path = os.path.join(self.dir, path)
                path = os.path.normpath(path)
                path = path.replace('\\', '/')
                prefix = unichr(3)
                if path in self.manifest.hrefs:
                    prefix = unichr(2)
                    value = self.manifest.hrefs[path].id
                    if hash and frag:
                        value = '#'.join((value, frag))
                value = prefix + value
            elif attr in ('id', 'name'):
                self.anchors.append((value, tag_offset))
            elif attr.startswith('ms--'):
                attr = '%' + attr[4:]
            if attr in tattrs:
                self.write(tattrs[attr])
            else:
                self.write(FLAG_CUSTOM, len(attr)+1, attr)
            try:
                self.write(ATTR_NUMBER, int(value)+1)
            except ValueError:
                self.write(len(value)+1, value)
        self.write(0)
        if elem.text:
            text = elem.text
            if style and style['white-space'] == 'pre':
                preserve = True
            if elem.get('xml:space') == 'preserve':
                preserve = True
            if not preserve:
                text = COLLAPSE.sub(' ', text)
            self.write(text)
        parents.append(tag_offset)
        for child in elem:
            self.tree_to_binary(child, nsrmap, parents, inhead, preserve)
        parents.pop()
        if not flags & FLAG_CLOSING:
            self.write(0, (flags & ~FLAG_OPENING) | FLAG_CLOSING, 0)
        if elem.tail:
            tail = elem.tail
            if tag != 'pre':
                tail = COLLAPSE.sub(' ', tail)
            self.write(tail)
        if style and style['page-break-after'] not in ('avoid', 'auto'):
            self.page_breaks.append((self.buf.tell(), list(parents)))
    def build_ahc(self):
        data = StringIO()
        data.write(unichr(len(self.anchors)).encode('utf-8'))
        for anchor, offset in self.anchors:
            data.write(unichr(len(anchor)).encode('utf-8'))
            data.write(anchor)
            data.write(pack('<I', offset))
        return data.getvalue()
    def build_aht(self):
        return pack('<I', 0)
 def preserve(function):
    def wrapper(self, *args, **kwargs):
        opos = self._stream.tell()
        try:
            return function(self, *args, **kwargs)
        finally:
            self._stream.seek(opos)
    functools.update_wrapper(wrapper, function)
    return wrapper
 class LitWriter(object):
    def __init__(self, oeb):
        self._oeb = oeb
    def dump(self, stream):
        self._stream = stream
        self._sections = [StringIO() for i in xrange(4)]
        self._directory = []
        self._meta = None
        self._dump()
    def _write(self, *data):
        for datum in data:
            self._stream.write(datum)
    @preserve
    def _writeat(self, pos, *data):
        self._stream.seek(pos)
        self._write(*data)
    def _tell(self):
        return self._stream.tell()
    def _dump(self):
        # Build content sections
        self._build_sections()
        # Build directory chunks
        dcounts, dchunks, ichunk = self._build_dchunks()
        # Write headers
        self._write(LIT_MAGIC)
        self._write(pack('<IIII',
            1, PRIMARY_SIZE, 5, SECONDARY_SIZE))
        self._write(packguid(LITFILE_GUID))
        offset = self._tell()
        pieces = list(xrange(offset, offset + (PIECE_SIZE * 5), PIECE_SIZE))
        self._write((5 * PIECE_SIZE) * '\0')
        aoli1 = len(dchunks) if ichunk else ULL_NEG1
        last = len(dchunks) - 1
        ddepth = 2 if ichunk else 1
        self._write(pack('<IIQQQQIIIIQIIQQQQIIIIQIIIIQ',
            2, 0x98, aoli1, 0, last, 0, DCHUNK_SIZE, 2, 0, ddepth, 0,
            len(self._directory), 0, ULL_NEG1, 0, 0, 0, CCHUNK_SIZE, 2,
            0, 1, 0, len(dcounts), 0, 0x100000, 0x20000, 0))
        self._write(BLOCK_CAOL)
        self._write(BLOCK_ITSF)
        conoff_offset = self._tell()
        timestamp = int(time.time())
        self._write(pack('<QII', 0, timestamp, 0x409))
        # Piece #0
        piece0_offset = self._tell()
        self._write(pack('<II', 0x1fe, 0))
        filesz_offset = self._tell()
        self._write(pack('<QQ', 0, 0))
        self._writeat(pieces[0], pack('<QQ',
            piece0_offset, self._tell() - piece0_offset))
        # Piece #1: Directory chunks
        piece1_offset = self._tell()
        number = len(dchunks) + ((ichunk and 1) or 0)
        self._write('IFCM', pack('<IIIQQ',
            1, DCHUNK_SIZE, 0x100000, ULL_NEG1, number))
        for dchunk in dchunks:
            self._write(dchunk)
        if ichunk:
            self._write(ichunk)
        self._writeat(pieces[1], pack('<QQ',
            piece1_offset, self._tell() - piece1_offset))
        # Piece #2: Count chunks
        piece2_offset = self._tell()
        self._write('IFCM', pack('<IIIQQ',
            1, CCHUNK_SIZE, 0x20000, ULL_NEG1, 1))
        cchunk = StringIO()
        last = 0
        for i, dcount in izip(count(), dcounts):
            cchunk.write(decint(last))
            cchunk.write(decint(dcount))
            cchunk.write(decint(i))
            last = dcount
        cchunk = cchunk.getvalue()
        rem = CCHUNK_SIZE - (len(cchunk) + 50)
        self._write('AOLL', pack('<IQQQQQ',
            rem, 0, ULL_NEG1, ULL_NEG1, 0, 1))
        filler = '\0' * rem
        self._write(cchunk, filler, pack('<H', len(dcounts)))
        self._writeat(pieces[2], pack('<QQ',
            piece2_offset, self._tell() - piece2_offset))
        # Piece #3: GUID3
        piece3_offset = self._tell()
        self._write(packguid(PIECE3_GUID))
        self._writeat(pieces[3], pack('<QQ',
            piece3_offset, self._tell() - piece3_offset))
        # Piece #4: GUID4
        piece4_offset = self._tell()
        self._write(packguid(PIECE4_GUID))
        self._writeat(pieces[4], pack('<QQ',
            piece4_offset, self._tell() - piece4_offset))
        # The actual section content
        content_offset = self._tell()
        self._writeat(conoff_offset, pack('<Q', content_offset))
        self._write(self._sections[0].getvalue())
        self._writeat(filesz_offset, pack('<Q', self._tell()))
    def _add_file(self, name, data, secnum=0):
        if len(data) > 0:
            section = self._sections[secnum]
            offset = section.tell()
            section.write(data)
        else:
            offset = 0
        self._directory.append(
            DirectoryEntry(name, secnum, offset, len(data)))
    def _add_folder(self, name, offset=0, size=0):
        if not name.endswith('/'):
            name += '/'
        self._directory.append(
            DirectoryEntry(name, 0, offset, size))
    def _djoin(self, *names):
        return '/'.join(names)
    def _build_sections(self):
        self._add_folder('/', ROOT_OFFSET, ROOT_SIZE)
        self._build_data()
        self._build_manifest()
        self._build_page_breaks()
        self._build_meta()
        self._build_drm_storage()
        self._build_version()
        self._build_namelist()
        self._build_storage()
        self._build_transforms()
    def _build_data(self):
        self._add_folder('/data')
        for item in self._oeb.manifest.values():
            name = '/data/' + item.id
            data = item.data
            secnum = 0
            if not isinstance(data, basestring):
                self._add_folder(name)
                rebin = ReBinary(data, item.href, self._oeb)
                self._add_file(name + '/ahc', rebin.ahc, 0)
                self._add_file(name + '/aht', rebin.aht, 0)
                item.page_breaks = rebin.page_breaks
                data = rebin.content
                name = name + '/content'
                secnum = 1
            self._add_file(name, data, secnum)
            item.size = len(data)
    def _build_manifest(self):
        states = ['linear', 'nonlinear', 'css', 'images']
        manifest = dict((state, []) for state in states)
        for item in self._oeb.manifest.values():
            if item.spine_position is not None:
                key = 'linear' if item.linear else 'nonlinear'
                manifest[key].append(item)
            elif item.media_type == CSS_MIME:
                manifest['css'].append(item)
            else:
                manifest['images'].append(item)
        data = StringIO()
        data.write(pack('<Bc', 1, '\\'))
        offset = 0
        for state in states:
            items = manifest[state]
            items.sort()
            data.write(pack('<I', len(items)))
            for item in items:
                id, href, media_type = item.id, item.href, item.media_type
                item.offset = offset \
                    if state in ('linear', 'nonlinear') else 0
                data.write(pack('<I', item.offset))
                entry = [unichr(len(id)), unicode(id),
                         unichr(len(href)), unicode(href),
                         unichr(len(media_type)), unicode(media_type)]
                for value in entry:
                    data.write(value.encode('utf-8'))
                data.write('\0')
                offset += item.size
        self._add_file('/manifest', data.getvalue())
    def _build_page_breaks(self):
        pb1 = StringIO()
        pb2 = StringIO()
        pb3 = StringIO()
        pb3cur = 0
        bits = 0
        for item in self._oeb.spine:
            page_breaks = copy.copy(item.page_breaks)
            if not item.linear:
                page_breaks.insert(0, (0, []))
            for pbreak, parents in page_breaks:
                pb3cur = (pb3cur << 2) | 1
                if len(parents) > 1:
                    pb3cur |= 0x2
                bits += 2
                if bits >= 8:
                    pb3.write(pack('<B', pb3cur))
                    pb3cur = 0
                    bits = 0
                pbreak += item.offset
                pb1.write(pack('<II', pbreak, pb2.tell()))
                pb2.write(pack('<I', len(parents)))
                for parent in parents:
                    pb2.write(pack('<I', parent))
        if bits != 0:
            pb3cur <<= (8 - bits)
            pb3.write(pack('<B', pb3cur))
        self._add_file('/pb1', pb1.getvalue(), 0)
        self._add_file('/pb2', pb2.getvalue(), 0)
        self._add_file('/pb3', pb3.getvalue(), 0)
    def _build_meta(self):
        _, meta = self._oeb.to_opf1()[OPF_MIME]
        xmetadata, = meta.xpath('/package/metadata/x-metadata')
        etree.SubElement(xmetadata, 'meta', attrib={
            'name': 'calibre-oeb2lit-version',
            'content': calibre.__version__})
        meta.attrib['ms--minimum_level'] = '0'
        meta.attrib['ms--attr5'] = '1'
        meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper()
        rebin = ReBinary(meta, 'content.opf', self._oeb, OPF_MAP)
        meta = rebin.content
        self._meta = meta
        self._add_file('/meta', meta)
    def _build_drm_storage(self):
        drmsource = u'Fuck Microsoft\0'.encode('utf-16-le')
        self._add_file('/DRMStorage/DRMSource', drmsource)
        tempkey = self._calculate_deskey([self._meta, drmsource])
        msdes.deskey(tempkey, msdes.EN0)
        self._add_file('/DRMStorage/DRMSealed', msdes.des("\0" * 16))
        self._bookkey = '\0' * 8
        self._add_file('/DRMStorage/ValidationStream', 'MSReader', 3)
    def _build_version(self):
        self._add_file('/Version', pack('<HH', 8, 1))
    def _build_namelist(self):
        data = StringIO()
        data.write(pack('<HH', 0x3c, len(self._sections)))
        names = ['Uncompressed', 'MSCompressed', 'EbEncryptDS',
                 'EbEncryptOnlyDS']
        for name in names:
            data.write(pack('<H', len(name)))
            data.write(name.encode('utf-16-le'))
            data.write('\0\0')
        self._add_file('::DataSpace/NameList', data.getvalue())
    def _build_storage(self):
        mapping = [(1, 'MSCompressed', (LZXCOMPRESS_GUID,)),
                   (2, 'EbEncryptDS', (LZXCOMPRESS_GUID, DESENCRYPT_GUID)),
                   (3, 'EbEncryptOnlyDS', (DESENCRYPT_GUID,)),]
        for secnum, name, transforms in mapping:
            root = '::DataSpace/Storage/' + name
            data = self._sections[secnum].getvalue()
            cdata, sdata, tdata, rdata = '', '', '', ''
            for guid in transforms:
                tdata = packguid(guid) + tdata
                sdata = sdata + pack('<Q', len(data))
                if guid == DESENCRYPT_GUID:
                    cdata = MSDES_CONTROL + cdata
                    if not data: continue
                    msdes.deskey(self._bookkey, msdes.EN0)
                    pad = 8 - (len(data) & 0x7)
                    if pad != 8:
                        data = data + ('\0' * pad)
                    data = msdes.des(data)
                elif guid == LZXCOMPRESS_GUID:
                    cdata = LZXC_CONTROL + cdata
                    if not data: continue
                    unlen = len(data)
                    with Compressor(17) as lzx:
                        data, rtable = lzx.compress(data, flush=True)
                    rdata = StringIO()
                    rdata.write(pack('<IIIIQQQQ',
                        3, len(rtable), 8, 0x28, unlen, len(data), 0x8000, 0))
                    for uncomp, comp in rtable[:-1]:
                        rdata.write(pack('<Q', comp))
                    rdata = rdata.getvalue()
            self._add_file(root + '/Content', data)
            self._add_file(root + '/ControlData', cdata)
            self._add_file(root + '/SpanInfo', sdata)
            self._add_file(root + '/Transform/List', tdata)
            troot = root + '/Transform'
            for guid in transforms:
                dname = self._djoin(troot, guid, 'InstanceData')
                self._add_folder(dname)
                if guid == LZXCOMPRESS_GUID:
                    dname += '/ResetTable'
                    self._add_file(dname, rdata)
    def _build_transforms(self):
        for guid in (LZXCOMPRESS_GUID, DESENCRYPT_GUID):
            self._add_folder('::Transform/'+ guid)
    def _calculate_deskey(self, hashdata):
        prepad = 2
        hash = mssha1.new()
        for data in hashdata:
            if prepad > 0:
                data = ("\000" * prepad) + data
                prepad = 0
            postpad = 64 - (len(data) % 64)
            if postpad < 64:
                data = data + ("\000" * postpad)
            hash.update(data)
        digest = hash.digest()
        key = [0] * 8
        for i in xrange(0, len(digest)):
            key[i % 8] ^= ord(digest[i])
        return ''.join(chr(x) for x in key)
    def _build_dchunks(self):
        ddata = []
        directory = list(self._directory)
        directory.sort(cmp=lambda x, y: \
            cmp(x.name.lower(), y.name.lower()))
        qrn = 1 + (1 << 2)
        dchunk = StringIO()
        dcount = 0
        quickref = []
        name = directory[0].name
        for entry in directory:
            next = ''.join([decint(len(entry.name)), entry.name,
                decint(entry.section), decint(entry.offset),
                decint(entry.size)])
            usedlen = dchunk.tell() + len(next) + (len(quickref) * 2) + 52
            if usedlen >= DCHUNK_SIZE:
                ddata.append((dchunk.getvalue(), quickref, dcount, name))
                dchunk = StringIO()
                dcount = 0
                quickref = []
                name = entry.name
            if (dcount % qrn) == 0:
                quickref.append(dchunk.tell())
            dchunk.write(next)
            dcount = dcount + 1
        ddata.append((dchunk.getvalue(), quickref, dcount, name))
        cidmax = len(ddata) - 1
        rdcount = 0
        dchunks = []
        dcounts = []
        ichunk = None
        if len(ddata) > 1:
            ichunk = StringIO()
        for cid, (content, quickref, dcount, name) in izip(count(), ddata):
            dchunk = StringIO()
            prev = cid - 1 if cid > 0 else ULL_NEG1
            next = cid + 1 if cid < cidmax else ULL_NEG1
            rem = DCHUNK_SIZE - (len(content) + 50)
            pad = rem - (len(quickref) * 2)
            dchunk.write('AOLL')
            dchunk.write(pack('<IQQQQQ', rem, cid, prev, next, rdcount, 1))
            dchunk.write(content)
            dchunk.write('\0' * pad)
            for ref in reversed(quickref):
                dchunk.write(pack('<H', ref))
            dchunk.write(pack('<H', dcount))
            rdcount = rdcount + dcount
            dchunks.append(dchunk.getvalue())
            dcounts.append(dcount)
            if ichunk:
                ichunk.write(decint(len(name)))
                ichunk.write(name)
                ichunk.write(decint(cid))
        if ichunk:
            rem = DCHUNK_SIZE - (ichunk.tell() + 16)
            pad = rem - 2
            ichunk = ''.join(['AOLI', pack('<IQ', rem, len(dchunks)),
                ichunk.getvalue(), ('\0' * pad), pack('<H', len(dchunks))])
        return dcounts, dchunks, ichunk
 def option_parser():
    from calibre.utils.config import OptionParser
    parser = OptionParser(usage=_('%prog [options] OPFFILE'))
    parser.add_option(
        '-o', '--output', default=None, 
        help=_('Output file. Default is derived from input filename.'))
    return parser
 def main(argv=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(argv[1:])
    if len(args) != 1:
        parser.print_help()
        return 1
    opfpath = args[0]
    litpath = opts.output
    if litpath is None:
        litpath = os.path.basename(opfpath)
        litpath = os.path.splitext(litpath)[0] + '.lit'
    lit = LitWriter(Oeb(opfpath))
    with open(litpath, 'wb') as f:
        lit.dump(f)
    print _('LIT ebook created at'), litpath
    return 0
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@ -55,6 +55,7 @@ entry_points = {
                             'mobi2oeb  = calibre.ebooks.mobi.reader:main',
                             'lrf2html  = calibre.ebooks.lrf.html.convert_to:main',
                             'lit2oeb   = calibre.ebooks.lit.reader:main',
                             'oeb2lit   = calibre.ebooks.lit.writer:main',
                             'comic2lrf = calibre.ebooks.lrf.comic.convert_from:main',
                             'comic2epub = calibre.ebooks.epub.from_comic:main',
                             'calibre-debug      = calibre.debug:main',