Merge upstream changes

This commit is contained in:
Marshall T. Vandegrift 2008-12-16 17:57:19 -05:00
commit 0d7a9d95d7
23 changed files with 4674 additions and 47 deletions

View File

@ -146,6 +146,7 @@ if __name__ == '__main__':
metadata_sqlite = 'library/metadata_sqlite.sql',
jquery = 'gui2/viewer/jquery.js',
jquery_scrollTo = 'gui2/viewer/jquery_scrollTo.js',
html_css = 'ebooks/lit/html.css',
)
DEST = os.path.join('src', APPNAME, 'resources.py')
@ -373,7 +374,10 @@ if __name__ == '__main__':
ext_modules = [
Extension('calibre.plugins.lzx',
sources=['src/calibre/utils/lzx/lzxmodule.c',
'src/calibre/utils/lzx/lzxd.c'],
'src/calibre/utils/lzx/compressor.c',
'src/calibre/utils/lzx/lzxd.c',
'src/calibre/utils/lzx/lzc.c',
'src/calibre/utils/lzx/lzxc.c'],
include_dirs=['src/calibre/utils/lzx']),
Extension('calibre.plugins.msdes',

View File

@ -88,10 +88,10 @@ def initialize_container(path_to_container, opf_name='metadata.opf'):
zf.writestr('META-INF/container.xml', CONTAINER)
return zf
def config(defaults=None):
def config(defaults=None, name='epub'):
desc = _('Options to control the conversion to EPUB')
if defaults is None:
c = Config('epub', desc)
c = Config(name, desc)
else:
c = StringConfig(defaults, desc)

View File

@ -148,14 +148,14 @@ def config(defaults=None):
def formats():
return ['html', 'rar', 'zip', 'oebzip']+list(MAP.keys())
def option_parser():
return config().option_parser(usage=_('''\
USAGE = _('''\
%%prog [options] filename
Convert any of a large number of ebook formats to an epub file. Supported formats are: %s
''')%formats()
)
Convert any of a large number of ebook formats to a %s file. Supported formats are: %s
''')
def option_parser(usage=USAGE):
return config().option_parser(usage=usage%('EPUB', formats()))
def main(args=sys.argv):
parser = option_parser()

View File

@ -64,7 +64,8 @@ def check(opf_path, pretty_print):
'''
Find a remove all invalid links in the HTML files
'''
print '\tChecking files for bad links...'
logger = logging.getLogger('html2epub')
logger.info('\tChecking files for bad links...')
pathtoopf = os.path.abspath(opf_path)
with CurrentDir(os.path.dirname(pathtoopf)):
opf = OPF(open(pathtoopf, 'rb'), os.path.dirname(pathtoopf))

View File

@ -0,0 +1,59 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
Convert any ebook format to LIT.
'''
import sys, os, glob, logging
from calibre.ebooks.epub.from_any import any2epub, formats, USAGE
from calibre.ebooks.epub import config as common_config
from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks.lit.writer import oeb2lit
def config(defaults=None):
c = common_config(defaults=defaults, name='lit')
return c
def option_parser(usage=USAGE):
return config().option_parser(usage=usage%('LIT', formats()))
def any2lit(opts, path):
ext = os.path.splitext(path)[1]
if not ext:
raise ValueError('Unknown file type: '+path)
ext = ext.lower()[1:]
if opts.output is None:
opts.output = os.path.splitext(os.path.basename(path))[0]+'.lit'
opts.output = os.path.abspath(opts.output)
orig_output = opts.output
with TemporaryDirectory('_any2lit') as tdir:
oebdir = os.path.join(tdir, 'oeb')
os.mkdir(oebdir)
opts.output = os.path.join(tdir, 'dummy.epub')
opts.extract_to = oebdir
any2epub(opts, path)
opf = glob.glob(os.path.join(oebdir, '*.opf'))[0]
opts.output = orig_output
logging.getLogger('html2epub').info(_('Creating LIT file from EPUB...'))
oeb2lit(opts, opf)
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) < 2:
parser.print_help()
print 'No input file specified.'
return 1
any2lit(opts, args[1])
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -0,0 +1,426 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is mozilla.org code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Blake Ross <BlakeR1234@aol.com>
*
* Alternatively, the contents of this file may be used under the terms of
* either of the GNU General Public License Version 2 or later (the "GPL"),
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
@namespace url(http://www.w3.org/1999/xhtml); /* set default namespace to HTML */
/* blocks */
html, div, map, dt, isindex, form {
display: block;
}
body {
display: block;
margin: 8px;
}
p, dl, multicol {
display: block;
margin: 1em 0;
}
dd {
display: block;
}
blockquote {
display: block;
margin: 1em 40px;
}
address {
display: block;
font-style: italic;
}
center {
display: block;
text-align: center;
}
blockquote[type=cite] {
display: block;
margin: 1em 0px;
border-color: blue;
border-width: thin;
}
span[_moz_quote=true] {
color: blue;
}
pre[_moz_quote=true] {
color: blue;
}
h1 {
display: block;
font-size: 2em;
font-weight: bold;
margin: .67em 0;
}
h2 {
display: block;
font-size: 1.5em;
font-weight: bold;
margin: .83em 0;
}
h3 {
display: block;
font-size: 1.17em;
font-weight: bold;
margin: 1em 0;
}
h4 {
display: block;
font-weight: bold;
margin: 1.33em 0;
}
h5 {
display: block;
font-size: 0.83em;
font-weight: bold;
margin: 1.67em 0;
}
h6 {
display: block;
font-size: 0.67em;
font-weight: bold;
margin: 2.33em 0;
}
listing {
display: block;
font-family: monospace;
font-size: medium;
white-space: pre;
margin: 1em 0;
}
xmp, pre, plaintext {
display: block;
font-family: monospace;
white-space: pre;
margin: 1em 0;
}
/* tables */
table {
display: table;
border-spacing: 2px;
border-collapse: separate;
margin-top: 0;
margin-bottom: 0;
text-indent: 0;
}
table[align="left"] {
float: left;
}
table[align="right"] {
float: right;
}
table[rules]:not([rules="none"]) {
border-collapse: collapse;
}
/* caption inherits from table not table-outer */
caption {
display: table-caption;
text-align: center;
}
table[align="center"] > caption {
margin-left: auto;
margin-right: auto;
}
table[align="center"] > caption[align="left"] {
margin-right: 0;
}
table[align="center"] > caption[align="right"] {
margin-left: 0;
}
tr {
display: table-row;
vertical-align: inherit;
}
col {
display: table-column;
}
colgroup {
display: table-column-group;
}
tbody {
display: table-row-group;
vertical-align: middle;
}
thead {
display: table-header-group;
vertical-align: middle;
}
tfoot {
display: table-footer-group;
vertical-align: middle;
}
/* for XHTML tables without tbody */
table > tr {
vertical-align: middle;
}
td {
display: table-cell;
vertical-align: inherit;
text-align: inherit;
padding: 1px;
}
th {
display: table-cell;
vertical-align: inherit;
font-weight: bold;
padding: 1px;
}
/* inlines */
q:before {
content: open-quote;
}
q:after {
content: close-quote;
}
b, strong {
font-weight: bolder;
}
i, cite, em, var, dfn {
font-style: italic;
}
tt, code, kbd, samp {
font-family: monospace;
}
u, ins {
text-decoration: underline;
}
s, strike, del {
text-decoration: line-through;
}
blink {
text-decoration: blink;
}
big {
font-size: larger;
}
small {
font-size: smaller;
}
sub {
vertical-align: sub;
font-size: smaller;
line-height: normal;
}
sup {
vertical-align: super;
font-size: smaller;
line-height: normal;
}
nobr {
white-space: nowrap;
}
/* titles */
abbr[title], acronym[title] {
border-bottom: dotted 1px;
}
/* lists */
ul, menu, dir {
display: block;
list-style-type: disc;
margin: 1em 0;
}
ol {
display: block;
list-style-type: decimal;
margin: 1em 0;
}
li {
display: list-item;
}
/* nested lists have no top/bottom margins */
ul ul, ul ol, ul dir, ul menu, ul dl,
ol ul, ol ol, ol dir, ol menu, ol dl,
dir ul, dir ol, dir dir, dir menu, dir dl,
menu ul, menu ol, menu dir, menu menu, menu dl,
dl ul, dl ol, dl dir, dl menu, dl dl {
margin-top: 0;
margin-bottom: 0;
}
/* 2 deep unordered lists use a circle */
ol ul, ul ul, menu ul, dir ul,
ol menu, ul menu, menu menu, dir menu,
ol dir, ul dir, menu dir, dir dir {
list-style-type: circle;
}
/* 3 deep (or more) unordered lists use a square */
ol ol ul, ol ul ul, ol menu ul, ol dir ul,
ol ol menu, ol ul menu, ol menu menu, ol dir menu,
ol ol dir, ol ul dir, ol menu dir, ol dir dir,
ul ol ul, ul ul ul, ul menu ul, ul dir ul,
ul ol menu, ul ul menu, ul menu menu, ul dir menu,
ul ol dir, ul ul dir, ul menu dir, ul dir dir,
menu ol ul, menu ul ul, menu menu ul, menu dir ul,
menu ol menu, menu ul menu, menu menu menu, menu dir menu,
menu ol dir, menu ul dir, menu menu dir, menu dir dir,
dir ol ul, dir ul ul, dir menu ul, dir dir ul,
dir ol menu, dir ul menu, dir menu menu, dir dir menu,
dir ol dir, dir ul dir, dir menu dir, dir dir dir {
list-style-type: square;
}
/* leafs */
/* <hr> noshade and color attributes are handled completely by
* the nsHTMLHRElement attribute mapping code
*/
hr {
display: block;
height: 2px;
border: 1px inset;
margin: 0.5em auto 0.5em auto;
color: gray;
}
hr[size="1"] {
border-style: solid none none none;
}
img[usemap], object[usemap] {
color: blue;
}
frameset {
display: block ! important;
position: static ! important;
float: none ! important;
border: none ! important;
}
frame {
border: none ! important;
}
iframe {
border: 2px inset;
}
noframes {
display: none;
}
spacer {
position: static ! important;
float: none ! important;
}
/* focusable content: anything w/ tabindex >=0 is focusable */
abbr:focus, acronym:focus, address:focus, applet:focus, b:focus,
base:focus, big:focus, blockquote:focus, br:focus, canvas:focus, caption:focus,
center:focus, cite:focus, code:focus, col:focus, colgroup:focus, dd:focus,
del:focus, dfn:focus, dir:focus, div:focus, dl:focus, dt:focus, em:focus,
fieldset:focus, font:focus, form:focus, h1:focus, h2:focus, h3:focus, h4:focus,
h5:focus, h6:focus, hr:focus, i:focus, img:focus, ins:focus,
kbd:focus, label:focus, legend:focus, li:focus, link:focus, menu:focus,
object:focus, ol:focus, p:focus, pre:focus, q:focus, s:focus, samp:focus,
small:focus, span:focus, strike:focus, strong:focus, sub:focus, sup:focus,
table:focus, tbody:focus, td:focus, tfoot:focus, th:focus, thead:focus,
tr:focus, tt:focus, u:focus, ul:focus, var:focus {
/* Don't specify the outline-color, we should always use initial value. */
outline: 1px dotted;
}
/* hidden elements */
area, base, basefont, head, meta, script, style, title,
noembed, param, link {
display: none;
}
/* Page breaks at body tags, to help out with LIT-generation */
body {
page-break-before: always;
}
/* Explicit line-breaks are blocks, sure... */
br {
display: block;
}

View File

@ -0,0 +1,27 @@
'''
LZX compression/decompression wrapper.
'''
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import sys
from calibre import plugins
_lzx, LZXError = plugins['lzx']
__all__ = ['Compressor', 'Decompressor', 'LZXError']
Compressor = _lzx.Compressor
class Decompressor(object):
def __init__(self, wbits):
self.wbits = wbits
self.blocksize = 1 << wbits
_lzx.init(wbits)
def decompress(self, data, outlen):
return _lzx.decompress(data, outlen)
def reset(self):
return _lzx.reset()

View File

@ -4,6 +4,9 @@ Modified version of SHA-1 used in Microsoft LIT files.
Adapted from the PyPy pure-Python SHA-1 implementation.
"""
__license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import struct, copy
# ======================================================================

View File

@ -0,0 +1,737 @@
'''
Basic support for manipulating OEB 1.x/2.0 content and metadata.
'''
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import os
import sys
from collections import defaultdict
from types import StringTypes
from itertools import izip, count
from urlparse import urldefrag, urlparse, urlunparse
from urllib import unquote as urlunquote
from lxml import etree
XML_PARSER = etree.XMLParser(recover=True, resolve_entities=False)
XML_NS = 'http://www.w3.org/XML/1998/namespace'
XHTML_NS = 'http://www.w3.org/1999/xhtml'
OPF1_NS = 'http://openebook.org/namespaces/oeb-package/1.0/'
OPF2_NS = 'http://www.idpf.org/2007/opf'
DC09_NS = 'http://purl.org/metadata/dublin_core'
DC10_NS = 'http://purl.org/dc/elements/1.0/'
DC11_NS = 'http://purl.org/dc/elements/1.1/'
XSI_NS = 'http://www.w3.org/2001/XMLSchema-instance'
DCTERMS_NS = 'http://purl.org/dc/terms/'
NCX_NS = 'http://www.daisy.org/z3986/2005/ncx/'
XPNSMAP = {'h': XHTML_NS, 'o1': OPF1_NS, 'o2': OPF2_NS,
'd09': DC09_NS, 'd10': DC10_NS, 'd11': DC11_NS,
'xsi': XSI_NS, 'dt': DCTERMS_NS, 'ncx': NCX_NS}
def XML(name): return '{%s}%s' % (XML_NS, name)
def XHTML(name): return '{%s}%s' % (XHTML_NS, name)
def OPF(name): return '{%s}%s' % (OPF2_NS, name)
def DC(name): return '{%s}%s' % (DC11_NS, name)
def NCX(name): return '{%s}%s' % (NCX_NS, name)
XHTML_MIME = 'application/xhtml+xml'
CSS_MIME = 'text/css'
NCX_MIME = 'application/x-dtbncx+xml'
OPF_MIME = 'application/oebps-package+xml'
OEB_DOC_MIME = 'text/x-oeb1-document'
OEB_CSS_MIME = 'text/x-oeb1-css'
OEB_STYLES = set([CSS_MIME, OEB_CSS_MIME, 'text/x-oeb-css'])
OEB_DOCS = set([XHTML_MIME, 'text/html', OEB_DOC_MIME, 'text/x-oeb-document'])
def element(parent, *args, **kwargs):
if parent is not None:
return etree.SubElement(parent, *args, **kwargs)
return etree.Element(*args, **kwargs)
def namespace(name):
if '}' in name:
return name.split('}', 1)[0][1:]
return ''
def barename(name):
if '}' in name:
return name.split('}', 1)[1]
return name
def xpath(elem, expr):
return elem.xpath(expr, namespaces=XPNSMAP)
URL_UNSAFE = r"""`!@#$%^&*[](){}?+=;:'",<>\| """
def urlquote(href):
result = []
for char in href:
if char in URL_UNSAFE:
char = "%%%02x" % ord(char)
result.append(char)
return ''.join(result)
def urlnormalize(href):
parts = urlparse(href)
parts = (part.replace('\\', '/') for part in parts)
parts = (urlunquote(part) for part in parts)
parts = (urlquote(part) for part in parts)
return urlunparse(parts)
class AbstractContainer(object):
def read_xml(self, path):
return etree.fromstring(
self.read(path), parser=XML_PARSER,
base_url=os.path.dirname(path))
class DirContainer(AbstractContainer):
def __init__(self, rootdir):
self.rootdir = rootdir
def read(self, path):
path = os.path.join(self.rootdir, path)
with open(urlunquote(path), 'rb') as f:
return f.read()
def write(self, path, data):
path = os.path.join(self.rootdir, path)
with open(urlunquote(path), 'wb') as f:
return f.write(data)
class Metadata(object):
TERMS = set(['contributor', 'coverage', 'creator', 'date', 'description',
'format', 'identifier', 'language', 'publisher', 'relation',
'rights', 'source', 'subject', 'title', 'type'])
OPF1_NSMAP = {'dc': DC11_NS, 'oebpackage': OPF1_NS}
OPF2_NSMAP = {'opf': OPF2_NS, 'dc': DC11_NS, 'dcterms': DCTERMS_NS,
'xsi': XSI_NS}
class Item(object):
def __init__(self, term, value, fq_attrib={}):
self.fq_attrib = dict(fq_attrib)
if term == OPF('meta') and not value:
term = self.fq_attrib.pop('name')
value = self.fq_attrib.pop('content')
elif term in Metadata.TERMS and not namespace(term):
term = DC(term)
self.term = term
self.value = value
self.attrib = attrib = {}
for fq_attr in fq_attrib:
attr = barename(fq_attr)
attrib[attr] = fq_attrib[fq_attr]
def __getattr__(self, name):
name = name.replace('_', '-')
try:
return self.attrib[name]
except KeyError:
raise AttributeError(
'%r object has no attribute %r' \
% (self.__class__.__name__, name))
def __repr__(self):
return 'Item(term=%r, value=%r, attrib=%r)' \
% (barename(self.term), self.value, self.attrib)
def __str__(self):
return str(self.value)
def __unicode__(self):
return unicode(self.value)
def to_opf1(self, dcmeta=None, xmeta=None):
if namespace(self.term) == DC11_NS:
name = DC(barename(self.term).title())
elem = element(dcmeta, name, attrib=self.attrib)
elem.text = self.value
else:
elem = element(xmeta, 'meta', attrib=self.attrib)
elem.attrib['name'] = self.term
elem.attrib['content'] = self.value
return elem
def to_opf2(self, parent=None):
if namespace(self.term) == DC11_NS:
elem = element(parent, self.term, attrib=self.fq_attrib)
elem.text = self.value
else:
elem = element(parent, OPF('meta'), attrib=self.fq_attrib)
elem.attrib['name'] = self.term
elem.attrib['content'] = self.value
return elem
def __init__(self, oeb):
self.oeb = oeb
self.items = defaultdict(list)
def add(self, term, value, attrib={}):
item = self.Item(term, value, attrib)
items = self.items[barename(item.term)]
items.append(item)
return item
def iterkeys(self):
for key in self.items:
yield key
__iter__ = iterkeys
def __getitem__(self, key):
return self.items[key]
def __contains__(self, key):
return key in self.items
def __getattr__(self, term):
return self.items[term]
def to_opf1(self, parent=None):
elem = element(parent, 'metadata')
dcmeta = element(elem, 'dc-metadata', nsmap=self.OPF1_NSMAP)
xmeta = element(elem, 'x-metadata')
for term in self.items:
for item in self.items[term]:
item.to_opf1(dcmeta, xmeta)
if 'ms-chaptertour' not in self.items:
chaptertour = self.Item('ms-chaptertour', 'chaptertour')
chaptertour.to_opf1(dcmeta, xmeta)
return elem
def to_opf2(self, parent=None):
elem = element(parent, OPF('metadata'), nsmap=self.OPF2_NSMAP)
for term in self.items:
for item in self.items[term]:
item.to_opf2(elem)
return elem
class Manifest(object):
class Item(object):
def __init__(self, id, href, media_type, fallback=None, loader=str):
self.id = id
self.href = self.path = urlnormalize(href)
self.media_type = media_type
self.fallback = fallback
self.spine_position = None
self.linear = True
self._loader = loader
self._data = None
def __repr__(self):
return 'Item(id=%r, href=%r, media_type=%r)' \
% (self.id, self.href, self.media_type)
def data():
def fget(self):
if self._data:
return self._data
data = self._loader(self.href)
if self.media_type == XHTML_MIME:
data = etree.fromstring(data, parser=XML_PARSER)
if namespace(data.tag) != XHTML_NS:
data.attrib['xmlns'] = XHTML_NS
data = etree.tostring(data)
data = etree.fromstring(data, parser=XML_PARSER)
elif self.media_type.startswith('application/') \
and self.media_type.endswith('+xml'):
data = etree.fromstring(data, parser=XML_PARSER)
return data
def fset(self, value):
self._data = value
def fdel(self):
self._data = None
return property(fget, fset, fdel)
data = data()
def __cmp__(self, other):
result = cmp(self.spine_position, other.spine_position)
if result != 0:
return result
return cmp(self.id, other.id)
def __init__(self, oeb):
self.oeb = oeb
self.items = {}
self.hrefs = {}
def add(self, id, href, media_type, fallback=None):
item = self.Item(
id, href, media_type, fallback, self.oeb.container.read)
self.items[item.id] = item
self.hrefs[item.href] = item
return item
def remove(self, id):
href = self.items[id].href
del self.items[id]
del self.hrefs[href]
def __iter__(self):
for id in self.items:
yield id
def __getitem__(self, id):
return self.items[id]
def values(self):
for item in self.items.values():
yield item
def items(self):
for id, item in self.refs.items():
yield id, items
def __contains__(self, key):
return id in self.items
def to_opf1(self, parent=None):
elem = element(parent, 'manifest')
for item in self.items.values():
media_type = item.media_type
if media_type == XHTML_MIME:
media_type = OEB_DOC_MIME
elif media_type == CSS_MIME:
media_type = OEB_CSS_MIME
attrib = {'id': item.id, 'href': item.href,
'media-type': media_type}
if item.fallback:
attrib['fallback'] = item.fallback
element(elem, 'item', attrib=attrib)
return elem
def to_opf2(self, parent=None):
elem = element(parent, OPF('manifest'))
for item in self.items.values():
attrib = {'id': item.id, 'href': item.href,
'media-type': item.media_type}
if item.fallback:
attrib['fallback'] = item.fallback
element(elem, OPF('item'), attrib=attrib)
return elem
class Spine(object):
def __init__(self, oeb):
self.oeb = oeb
self.items = []
def add(self, item, linear):
if isinstance(linear, StringTypes):
linear = linear.lower()
if linear is None or linear in ('yes', 'true'):
linear = True
elif linear in ('no', 'false'):
linear = False
item.linear = linear
item.spine_position = len(self.items)
self.items.append(item)
return item
def __iter__(self):
for item in self.items:
yield item
def __getitem__(self, index):
return self.items[index]
def __len__(self):
return len(self.items)
def __contains__(self, item):
return (item in self.items)
def to_opf1(self, parent=None):
elem = element(parent, 'spine')
for item in self.items:
if item.linear:
element(elem, 'itemref', attrib={'idref': item.id})
return elem
def to_opf2(self, parent=None):
elem = element(parent, OPF('spine'))
for item in self.items:
attrib = {'idref': item.id}
if not item.linear:
attrib['linear'] = 'no'
element(elem, OPF('itemref'), attrib=attrib)
return elem
class Guide(object):
class Reference(object):
def __init__(self, type, title, href):
self.type = type
self.title = title
self.href = urlnormalize(href)
def __repr__(self):
return 'Reference(type=%r, title=%r, href=%r)' \
% (self.type, self.title, self.href)
def __init__(self, oeb):
self.oeb = oeb
self.refs = {}
def add(self, type, title, href):
ref = self.Reference(type, title, href)
self.refs[type] = ref
return ref
def by_type(self, type):
return self.ref_types[type]
def iterkeys(self):
for type in self.refs:
yield type
__iter__ = iterkeys
def values(self):
for ref in self.refs.values():
yield ref
def items(self):
for type, ref in self.refs.items():
yield type, ref
def __getitem__(self, index):
return self.refs[index]
def __contains__(self, key):
return key in self.refs
def to_opf1(self, parent=None):
elem = element(parent, 'guide')
for ref in self.refs.values():
attrib = {'type': ref.type, 'href': ref.href}
if ref.title:
attrib['title'] = ref.title
element(elem, 'reference', attrib=attrib)
return elem
def to_opf2(self, parent=None):
elem = element(parent, OPF('guide'))
for ref in self.refs.values():
attrib = {'type': ref.type, 'href': ref.href}
if ref.title:
attrib['title'] = ref.title
element(elem, OPF('reference'), attrib=attrib)
return elem
class TOC(object):
def __init__(self, title=None, href=None, klass=None, id=None):
self.title = title
self.href = urlnormalize(href) if href else href
self.klass = klass
self.id = id
self.nodes = []
def add(self, title, href, klass=None, id=None):
node = TOC(title, href, klass, id)
self.nodes.append(node)
return node
def __iter__(self):
for node in self.nodes:
yield node
def __getitem__(self, index):
return self.nodes[index]
def depth(self, level=0):
if self.nodes:
return self.nodes[0].depth(level+1)
return level
def to_opf1(self, tour):
for node in self.nodes:
element(tour, 'site', attrib={
'title': node.title, 'href': node.href})
node.to_opf1(tour)
return tour
def to_ncx(self, parent, playorder=None, depth=1):
if not playorder: playorder = [0]
for node in self.nodes:
playorder[0] += 1
point = etree.SubElement(parent,
NCX('navPoint'), attrib={'playOrder': str(playorder[0])})
if self.klass:
point.attrib['class'] = node.klass
if self.id:
point.attrib['id'] = node.id
label = etree.SubElement(point, NCX('navLabel'))
etree.SubElement(label, NCX('text')).text = node.title
href = node.href if depth > 1 else urldefrag(node.href)[0]
child = etree.SubElement(point,
NCX('content'), attrib={'src': href})
node.to_ncx(point, playorder, depth+1)
return parent
class OEBBook(object):
def __init__(self, opfpath, container=None):
if not container:
container = DirContainer(os.path.dirname(opfpath))
opfpath = os.path.basename(opfpath)
self.container = container
opf = self._read_opf(opfpath)
self._all_from_opf(opf)
def _convert_opf1(self, opf):
nroot = etree.Element(OPF('package'),
nsmap={None: OPF2_NS}, version="2.0", **dict(opf.attrib))
metadata = etree.SubElement(nroot, OPF('metadata'),
nsmap={'opf': OPF2_NS, 'dc': DC11_NS,
'xsi': XSI_NS, 'dcterms': DCTERMS_NS})
for prefix in ('d11', 'd10', 'd09'):
elements = xpath(opf, 'metadata/dc-metadata/%s:*' % prefix)
if elements: break
for element in elements:
if not element.text: continue
tag = barename(element.tag).lower()
element.tag = '{%s}%s' % (DC11_NS, tag)
for name in element.attrib:
if name in ('role', 'file-as', 'scheme'):
nsname = '{%s}%s' % (OPF2_NS, name)
element.attrib[nsname] = element.attrib[name]
del element.attrib[name]
metadata.append(element)
for element in opf.xpath('metadata/x-metadata/meta'):
metadata.append(element)
for item in opf.xpath('manifest/item'):
media_type = item.attrib['media-type'].lower()
if media_type in OEB_DOCS:
media_type = XHTML_MIME
elif media_type in OEB_STYLES:
media_type = CSS_MIME
item.attrib['media-type'] = media_type
for tag in ('manifest', 'spine', 'tours', 'guide'):
for element in opf.xpath(tag):
nroot.append(element)
return etree.fromstring(etree.tostring(nroot), parser=XML_PARSER)
def _read_opf(self, opfpath):
opf = self.container.read_xml(opfpath)
version = float(opf.get('version', 1.0))
if version < 2.0:
opf = self._convert_opf1(opf)
return opf
def _metadata_from_opf(self, opf):
uid = opf.attrib['unique-identifier']
self.metadata = metadata = Metadata(self)
for elem in xpath(opf, '/o2:package/o2:metadata/*'):
if elem.text or elem.attrib:
metadata.add(elem.tag, elem.text, elem.attrib)
for item in metadata.identifier:
if item.id == uid:
self.uid = item
break
def _manifest_from_opf(self, opf):
self.manifest = manifest = Manifest(self)
for elem in xpath(opf, '/o2:package/o2:manifest/o2:item'):
manifest.add(elem.get('id'), elem.get('href'),
elem.get('media-type'), elem.get('fallback'))
def _spine_from_opf(self, opf):
self.spine = spine = Spine(self)
for elem in xpath(opf, '/o2:package/o2:spine/o2:itemref'):
item = self.manifest[elem.get('idref')]
spine.add(item, elem.get('linear'))
extras = []
for item in self.manifest.values():
if item.media_type == XHTML_MIME \
and item not in spine:
extras.append(item)
extras.sort()
for item in extras:
spine.add(item, False)
def _guide_from_opf(self, opf):
self.guide = guide = Guide(self)
for elem in xpath(opf, '/o2:package/o2:guide/o2:reference'):
guide.add(elem.get('type'), elem.get('title'), elem.get('href'))
def _toc_from_navpoint(self, toc, navpoint):
children = xpath(navpoint, 'ncx:navPoint')
for child in children:
title = ''.join(xpath(child, 'ncx:navLabel/ncx:text/text()'))
href = xpath(child, 'ncx:content/@src')[0]
id = child.get('id')
klass = child.get('class')
node = toc.add(title, href, id=id, klass=klass)
self._toc_from_navpoint(node, child)
def _toc_from_ncx(self, opf):
result = xpath(opf, '/o2:package/o2:spine/@toc')
if not result:
return False
id = result[0]
ncx = self.manifest[id].data
self.manifest.remove(id)
title = xpath(ncx, 'ncx:docTitle/ncx:text/text()')[0]
self.toc = toc = TOC(title)
navmaps = xpath(ncx, 'ncx:navMap')
for navmap in navmaps:
self._toc_from_navpoint(toc, navmap)
return True
def _toc_from_tour(self, opf):
result = xpath(opf, '/o2:package/o2:tours/o2:tour')
if not result:
return False
tour = result[0]
self.toc = toc = TOC(tour.get('title'))
sites = xpath(tour, 'o2:site')
for site in sites:
toc.add(site.get('title'), site.get('href'))
return True
def _toc_from_html(self, opf):
if 'toc' not in self.guide:
return False
self.toc = toc = TOC()
itempath, frag = urldefrag(self.guide['toc'].href)
item = self.manifest.hrefs[itempath]
html = item.data
if frag:
elems = xpath(html, './/*[@id="%s"]' % frag)
if not elems:
elems = xpath(html, './/*[@name="%s"]' % frag)
elem = elems[0] if elems else html
while elem != html and not xpath(elem, './/h:a[@href]'):
elem = elem.getparent()
html = elem
titles = defaultdict(list)
order = []
for anchor in xpath(html, './/h:a[@href]'):
href = anchor.attrib['href']
path, frag = urldefrag(href)
if not path:
href = '#'.join((itempath, frag))
title = ' '.join(xpath(anchor, './/text()'))
href = urlnormalize(href)
if href not in titles:
order.append(href)
titles[href].append(title)
for href in order:
toc.add(' '.join(titles[href]), href)
return True
def _toc_from_spine(self, opf):
self.toc = toc = TOC()
titles = []
headers = []
for item in self.spine:
if not item.linear: continue
html = item.data
title = xpath(html, '/h:html/h:head/h:title/text()')
if title: titles.append(title[0])
headers.append('(unlabled)')
for tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'strong'):
expr = '/h:html/h:body//h:%s[position()=1]/text()' % (tag,)
header = xpath(html, expr)
if header:
headers[-1] = header[0]
break
use = titles
if len(titles) > len(set(titles)):
use = headers
for title, item in izip(use, self.spine):
if not item.linear: continue
toc.add(title, item.href)
return True
def _toc_from_opf(self, opf):
if self._toc_from_ncx(opf): return
if self._toc_from_tour(opf): return
if self._toc_from_html(opf): return
self._toc_from_spine(opf)
def _all_from_opf(self, opf):
self._metadata_from_opf(opf)
self._manifest_from_opf(opf)
self._spine_from_opf(opf)
self._guide_from_opf(opf)
self._toc_from_opf(opf)
def to_opf1(self):
package = etree.Element('package',
attrib={'unique-identifier': self.uid.id})
metadata = self.metadata.to_opf1(package)
manifest = self.manifest.to_opf1(package)
spine = self.spine.to_opf1(package)
tours = element(package, 'tours')
tour = element(tours, 'tour',
attrib={'id': 'chaptertour', 'title': 'Chapter Tour'})
self.toc.to_opf1(tour)
guide = self.guide.to_opf1(package)
return {OPF_MIME: ('content.opf', package)}
def _generate_ncx_item(self):
id = 'ncx'
index = 0
while id in self.manifest:
id = 'ncx' + str(index)
index = index + 1
href = 'toc'
index = 0
while (href + '.ncx') in self.manifest.hrefs:
href = 'toc' + str(index)
href += '.ncx'
return (id, href)
def _to_ncx(self):
ncx = etree.Element(NCX('ncx'), attrib={'version': '2005-1'},
nsmap={None: NCX_NS})
head = etree.SubElement(ncx, NCX('head'))
etree.SubElement(head, NCX('meta'),
attrib={'name': 'dtb:uid', 'content': unicode(self.uid)})
etree.SubElement(head, NCX('meta'),
attrib={'name': 'dtb:depth', 'content': str(self.toc.depth())})
etree.SubElement(head, NCX('meta'),
attrib={'name': 'dtb:totalPageCount', 'content': '0'})
etree.SubElement(head, NCX('meta'),
attrib={'name': 'dtb:maxPageNumber', 'content': '0'})
title = etree.SubElement(ncx, NCX('docTitle'))
text = etree.SubElement(title, NCX('text'))
text.text = unicode(self.metadata.title[0])
navmap = etree.SubElement(ncx, NCX('navMap'))
self.toc.to_ncx(navmap)
return ncx
def to_opf2(self):
package = etree.Element(OPF('package'),
attrib={'version': '2.0', 'unique-identifier': self.uid.id},
nsmap={None: OPF2_NS})
metadata = self.metadata.to_opf2(package)
manifest = self.manifest.to_opf2(package)
id, href = self._generate_ncx_item()
etree.SubElement(manifest, OPF('item'),
attrib={'id': id, 'href': href, 'media-type': NCX_MIME})
spine = self.spine.to_opf2(package)
spine.attrib['toc'] = id
guide = self.guide.to_opf2(package)
ncx = self._to_ncx()
return {OPF_MIME: ('content.opf', package),
NCX_MIME: (href, ncx)}
def main(argv=sys.argv):
for arg in argv[1:]:
oeb = OEBBook(arg)
for name, doc in oeb.to_opf1().values():
print etree.tostring(doc, pretty_print=True)
for name, doc in oeb.to_opf2().values():
print etree.tostring(doc, pretty_print=True)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -10,10 +10,12 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
import sys, struct, cStringIO, os
import functools
import re
from urlparse import urldefrag
from lxml import etree
from calibre.ebooks.lit import LitError
from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
import calibre.ebooks.lit.mssha1 as mssha1
from calibre.ebooks.lit.oeb import urlnormalize
from calibre.ebooks import DRMError
from calibre import plugins
lzx, lxzerror = plugins['lzx']
@ -110,7 +112,7 @@ class UnBinary(object):
AMPERSAND_RE = re.compile(
r'&(?!(?:#[0-9]+|#x[0-9a-fA-F]+|[a-zA-Z_:][a-zA-Z0-9.-_:]+);)')
OPEN_ANGLE_RE = re.compile(r'<<(?![!]--)')
CLOSE_ANGLE_RE = re.compile(r'(?<!--)>>')
CLOSE_ANGLE_RE = re.compile(r'(?<!--)>>(?=>>|[^>])')
DOUBLE_ANGLE_RE = re.compile(r'([<>])\1')
def __init__(self, bin, path, manifest={}, map=HTML_MAP):
@ -322,12 +324,12 @@ class UnBinary(object):
href += c
count -= 1
if count == 0:
doc, m, frag = href[1:].partition('#')
doc, frag = urldefrag(href[1:])
path = self.item_path(doc)
if m and frag:
path += m + frag
self.buf.write((u'"%s"' % path).encode(
'ascii', 'xmlcharrefreplace'))
if frag:
path = '#'.join((path, frag))
path = urlnormalize(path)
self.buf.write((u'"%s"' % path).encode('utf-8'))
state = 'get attr'
return index
@ -385,7 +387,7 @@ def preserve(function):
class LitReader(object):
PIECE_SIZE = 16
XML_PARSER = etree.XMLParser(
remove_blank_text=True, resolve_entities=False)
recover=True, resolve_entities=False)
def magic():
@preserve
@ -781,7 +783,7 @@ class LitReader(object):
try:
result.append(
lzx.decompress(content[base:size], window_bytes))
except lzx.LzxError:
except lzx.LZXError:
self._warn("LZX decompression error; skipping chunk")
bytes_remaining -= window_bytes
base = size
@ -791,7 +793,7 @@ class LitReader(object):
lzx.reset()
try:
result.append(lzx.decompress(content[base:], bytes_remaining))
except lzx.LzxError:
except lzx.LZXError:
self._warn("LZX decompression error; skipping chunk")
bytes_remaining = 0
if bytes_remaining > 0:

View File

@ -0,0 +1,444 @@
# -*- encoding: utf-8 -*-
'''
CSS property propagation class.
'''
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import sys
import os
import locale
import codecs
import itertools
import types
import re
import copy
import cssutils
from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \
CSSValueList, cssproperties
from lxml import etree
from calibre.ebooks.lit.oeb import XHTML_NS, CSS_MIME, OEB_STYLES
from calibre.ebooks.lit.oeb import barename, urlnormalize
from calibre.resources import html_css
HTML_CSS_STYLESHEET = cssutils.parseString(html_css)
XHTML_CSS_NAMESPACE = "@namespace url(http://www.w3.org/1999/xhtml);\n"
INHERITED = set(['azimuth', 'border-collapse', 'border-spacing',
'caption-side', 'color', 'cursor', 'direction', 'elevation',
'empty-cells', 'font-family', 'font-size', 'font-style',
'font-variant', 'font-weight', 'letter-spacing',
'line-height', 'list-style-image', 'list-style-position',
'list-style-type', 'orphans', 'page-break-inside',
'pitch-range', 'pitch', 'quotes', 'richness', 'speak-header',
'speak-numeral', 'speak-punctuation', 'speak', 'speech-rate',
'stress', 'text-align', 'text-indent', 'text-transform',
'visibility', 'voice-family', 'volume', 'white-space',
'widows', 'word-spacing'])
DEFAULTS = {'azimuth': 'center', 'background-attachment': 'scroll',
'background-color': 'transparent', 'background-image': 'none',
'background-position': '0% 0%', 'background-repeat': 'repeat',
'border-bottom-color': ':color', 'border-bottom-style': 'none',
'border-bottom-width': 'medium', 'border-collapse': 'separate',
'border-left-color': ':color', 'border-left-style': 'none',
'border-left-width': 'medium', 'border-right-color': ':color',
'border-right-style': 'none', 'border-right-width': 'medium',
'border-spacing': 0, 'border-top-color': ':color',
'border-top-style': 'none', 'border-top-width': 'medium', 'bottom':
'auto', 'caption-side': 'top', 'clear': 'none', 'clip': 'auto',
'color': 'black', 'content': 'normal', 'counter-increment': 'none',
'counter-reset': 'none', 'cue-after': 'none', 'cue-before': 'none',
'cursor': 'auto', 'direction': 'ltr', 'display': 'inline',
'elevation': 'level', 'empty-cells': 'show', 'float': 'none',
'font-family': 'serif', 'font-size': 'medium', 'font-style':
'normal', 'font-variant': 'normal', 'font-weight': 'normal',
'height': 'auto', 'left': 'auto', 'letter-spacing': 'normal',
'line-height': 'normal', 'list-style-image': 'none',
'list-style-position': 'outside', 'list-style-type': 'disc',
'margin-bottom': 0, 'margin-left': 0, 'margin-right': 0,
'margin-top': 0, 'max-height': 'none', 'max-width': 'none',
'min-height': 0, 'min-width': 0, 'orphans': '2',
'outline-color': 'invert', 'outline-style': 'none',
'outline-width': 'medium', 'overflow': 'visible', 'padding-bottom':
0, 'padding-left': 0, 'padding-right': 0, 'padding-top': 0,
'page-break-after': 'auto', 'page-break-before': 'auto',
'page-break-inside': 'auto', 'pause-after': 0, 'pause-before':
0, 'pitch': 'medium', 'pitch-range': '50', 'play-during': 'auto',
'position': 'static', 'quotes': u"'' '' '' ''", 'richness':
'50', 'right': 'auto', 'speak': 'normal', 'speak-header': 'once',
'speak-numeral': 'continuous', 'speak-punctuation': 'none',
'speech-rate': 'medium', 'stress': '50', 'table-layout': 'auto',
'text-align': 'left', 'text-decoration': 'none', 'text-indent':
0, 'text-transform': 'none', 'top': 'auto', 'unicode-bidi':
'normal', 'vertical-align': 'baseline', 'visibility': 'visible',
'voice-family': 'default', 'volume': 'medium', 'white-space':
'normal', 'widows': '2', 'width': 'auto', 'word-spacing': 'normal',
'z-index': 'auto'}
FONT_SIZE_NAMES = set(['xx-small', 'x-small', 'small', 'medium', 'large',
'x-large', 'xx-large'])
FONT_SIZE_LIST = [('xx-small', 1, 6.),
('x-small', None, 7.),
('small', 2, 8.),
('medium', 3, 9.),
('large', 4, 11.),
('x-large', 5, 13.),
('xx-large', 6, 15.),
(None, 7, 17.)]
FONT_SIZE_BY_NAME = {}
FONT_SIZE_BY_NUM = {}
for name, num, size in FONT_SIZE_LIST:
FONT_SIZE_BY_NAME[name] = size
FONT_SIZE_BY_NUM[num] = size
XPNSMAP = {'h': XHTML_NS,}
def xpath(elem, expr):
return elem.xpath(expr, namespaces=XPNSMAP)
class Page(object):
def __init__(self, width, height, dpi):
self.width = float(width)
self.height = float(height)
self.dpi = float(dpi)
class Profiles(object):
PRS500 = Page(584, 754, 168.451)
PRS505 = PRS500
class Stylizer(object):
STYLESHEETS = {}
def __init__(self, tree, path, oeb, page=Profiles.PRS505):
self.page = page
base = os.path.dirname(path)
basename = os.path.basename(path)
cssname = os.path.splitext(basename)[0] + '.css'
stylesheets = [HTML_CSS_STYLESHEET]
head = xpath(tree, '/h:html/h:head')[0]
parser = cssutils.CSSParser()
parser.setFetcher(lambda path: ('utf-8', oeb.container.read(path)))
for elem in head:
tag = barename(elem.tag)
if tag == 'style':
text = ''.join(elem.text)
stylesheet = parser.parseString(text, href=cssname)
stylesheets.append(stylesheet)
elif tag == 'link' \
and elem.get('rel', 'stylesheet') == 'stylesheet' \
and elem.get('type', CSS_MIME) in OEB_STYLES:
href = urlnormalize(elem.attrib['href'])
path = os.path.join(base, href)
path = os.path.normpath(path).replace('\\', '/')
if path in self.STYLESHEETS:
stylesheet = self.STYLESHEETS[path]
else:
data = XHTML_CSS_NAMESPACE
data += oeb.manifest.hrefs[path].data
stylesheet = parser.parseString(data, href=path)
self.STYLESHEETS[path] = stylesheet
stylesheets.append(stylesheet)
rules = []
index = 0
self.stylesheets = set()
for stylesheet in stylesheets:
href = stylesheet.href
self.stylesheets.add(href)
for rule in stylesheet.cssRules:
rules.extend(self.flatten_rule(rule, href, index))
index = index + 1
rules.sort()
self.rules = rules
self._styles = {}
def flatten_rule(self, rule, href, index):
results = []
if isinstance(rule, CSSStyleRule):
style = self.flatten_style(rule.style)
for selector in rule.selectorList:
specificity = selector.specificity + (index,)
text = selector.selectorText
selector = list(selector.seq)
results.append((specificity, selector, style, text, href))
elif isinstance(rule, CSSPageRule):
style = self.flatten_style(rule.style)
results.append(((0, 0, 0, 0), [], style, '@page', href))
return results
def flatten_style(self, cssstyle):
style = {}
for prop in cssstyle:
name = prop.name
if name in ('margin', 'padding'):
style.update(self._normalize_edge(prop.cssValue, name))
elif name == 'font':
style.update(self._normalize_font(prop.cssValue))
else:
style[name] = prop.value
if 'font-size' in style:
size = style['font-size']
if size == 'normal': size = 'medium'
if size in FONT_SIZE_NAMES:
style['font-size'] = "%dpt" % FONT_SIZE_BY_NAME[size]
return style
def _normalize_edge(self, cssvalue, name):
style = {}
if isinstance(cssvalue, CSSValueList):
primitives = [v.cssText for v in cssvalue]
else:
primitives = [cssvalue.cssText]
if len(primitives) == 1:
value, = primitives
values = [value, value, value, value]
elif len(primitives) == 2:
vert, horiz = primitives
values = [vert, horiz, vert, horiz]
elif len(primitives) == 3:
top, horiz, bottom = primitives
values = [top, horiz, bottom, horiz]
else:
values = primitives[:4]
edges = ('top', 'right', 'bottom', 'left')
for edge, value in itertools.izip(edges, values):
style["%s-%s" % (name, edge)] = value
return style
def _normalize_font(self, cssvalue):
composition = ('font-style', 'font-variant', 'font-weight',
'font-size', 'line-height', 'font-family')
style = {}
if cssvalue.cssText == 'inherit':
for key in composition:
style[key] = 'inherit'
else:
primitives = [v.cssText for v in cssvalue]
primitites.reverse()
value = primitives.pop()
for key in composition:
if cssproperties.cssvalues[key](value):
style[key] = value
if not primitives: break
value = primitives.pop()
for key in composition:
if key not in style:
style[key] = DEFAULTS[key]
return style
def style(self, element):
try: return self._styles[element]
except: pass
return Style(element, self)
def stylesheet(self, name, font_scale=None):
rules = []
for _, _, style, selector, href in self.rules:
if href != name: continue
if font_scale and 'font-size' in style and \
style['font-size'].endswith('pt'):
style = copy.copy(style)
size = float(style['font-size'][:-2])
style['font-size'] = "%.2fpt" % (size * font_scale)
style = ';\n '.join(': '.join(item) for item in style.items())
rules.append('%s {\n %s;\n}' % (selector, style))
return '\n'.join(rules)
class Style(object):
def __init__(self, element, stylizer):
self._element = element
self._page = stylizer.page
self._stylizer = stylizer
self._style = self._assemble_style(element, stylizer)
stylizer._styles[element] = self
def _assemble_style(self, element, stylizer):
result = {}
rules = stylizer.rules
for _, selector, style, _, _ in rules:
if self._selects_element(element, selector):
result.update(style)
try:
style = CSSStyleDeclaration(element.attrib['style'])
result.update(stylizer.flatten_style(style))
except KeyError:
pass
return result
def _selects_element(self, element, selector):
def _selects_element(element, items, index):
if index == -1:
return True
item = items[index]
if item.type == 'universal':
pass
elif item.type == 'type-selector':
name1 = ("{%s}%s" % item.value).lower()
name2 = element.tag.lower()
if name1 != name2:
return False
elif item.type == 'id':
name1 = item.value[1:]
name2 = element.get('id', '')
if name1 != name2:
return False
elif item.type == 'class':
name = item.value[1:].lower()
classes = element.get('class', '').lower().split()
if name not in classes:
return False
elif item.type == 'child':
parent = element.getparent()
if parent is None:
return False
element = parent
elif item.type == 'descendant':
element = element.getparent()
while element is not None:
if _selects_element(element, items, index - 1):
return True
element = element.getparent()
return False
elif item.type == 'pseudo-class':
if item.value == ':first-child':
e = element.getprevious()
if e is not None:
return False
else:
return False
elif item.type == 'pseudo-element':
return False
else:
return False
return _selects_element(element, items, index - 1)
return _selects_element(element, selector, len(selector) - 1)
def _has_parent(self):
parent = self._element.getparent()
return (parent is not None) \
and (parent in self._stylizer._styles)
def __getitem__(self, name):
domname = cssproperties._toDOMname(name)
if hasattr(self, domname):
return getattr(self, domname)
return self._unit_convert(self._get(name))
def _get(self, name):
result = None
if name in self._style:
result = self._style[name]
if (result == 'inherit'
or (result is None and name in INHERITED
and self._has_parent())):
styles = self._stylizer._styles
result = styles[self._element.getparent()]._get(name)
if result is None:
result = DEFAULTS[name]
return result
def _unit_convert(self, value, base=None, font=None):
if isinstance(value, (int, long, float)):
return value
try:
if float(value) == 0:
return 0.0
except:
pass
result = value
m = re.search(
r"^(-*[0-9]*\.?[0-9]*)\s*(%|em|px|mm|cm|in|pt|pc)$", value)
if m is not None and m.group(1):
value = float(m.group(1))
unit = m.group(2)
if unit == '%':
base = base or self.width
result = (value/100.0) * base
elif unit == 'px':
result = value * 72.0 / self._page.dpi
elif unit == 'in':
result = value * 72.0
elif unit == 'pt':
result = value
elif unit == 'em':
font = font or self.fontSize
result = value * font
elif unit == 'pc':
result = value * 12.0
elif unit == 'mm':
result = value * 0.04
elif unit == 'cm':
result = value * 0.40
return result
@property
def fontSize(self):
def normalize_fontsize(value, base=None):
result = None
factor = None
if value == 'inherit':
value = 'medium'
if value in FONT_SIZE_NAMES:
result = FONT_SIZE_BY_NAME[value]
elif value == 'smaller':
factor = 1.0/1.2
for _, _, size in FONT_SIZE_LIST:
if base <= size: break
factor = None
result = size
elif value == 'larger':
factor = 1.2
for _, _, size in reversed(FONT_SIZE_LIST):
if base >= size: break
factor = None
result = size
else:
result = self._unit_convert(value, base=base, font=base)
if result < 0:
result = normalize_fontsize("smaller", base)
if factor:
result = factor * base
return result
result = None
if self._has_parent():
styles = self._stylizer._styles
base = styles[self._element.getparent()].fontSize
else:
base = normalize_fontsize(DEFAULTS['font-size'])
if 'font-size' in self._style:
size = self._style['font-size']
result = normalize_fontsize(size, base)
else:
result = base
self.__dict__['fontSize'] = result
return result
@property
def width(self):
result = None
base = None
if self._has_parent():
styles = self._stylizer._styles
base = styles[self._element.getparent()].width
else:
base = self._page.width
if 'width' in self._style:
width = self._style['width']
if width == 'auto':
result = base
else:
result = self._unit_convert(width, base=base)
else:
result = base
self.__dict__['width'] = result
return result
def __str__(self):
items = self._style.items()
return '; '.join("%s: %s" % (key, val) for key, val in items)

View File

@ -0,0 +1,742 @@
'''
Basic support for writing LIT files.
'''
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import sys
import os
from cStringIO import StringIO
from struct import pack
from itertools import izip, count, chain
import time
import random
import re
import copy
import uuid
import functools
import logging
from urlparse import urldefrag
from urllib import unquote as urlunquote
from lxml import etree
from calibre.ebooks.lit.reader import DirectoryEntry
import calibre.ebooks.lit.maps as maps
from calibre.ebooks.lit.oeb import OEB_DOCS, OEB_STYLES, OEB_CSS_MIME, \
CSS_MIME, OPF_MIME, XML_NS, XML
from calibre.ebooks.lit.oeb import namespace, barename, urlnormalize, xpath
from calibre.ebooks.lit.oeb import OEBBook
from calibre.ebooks.lit.stylizer import Stylizer
from calibre.ebooks.lit.lzx import Compressor
import calibre
from calibre import plugins
msdes, msdeserror = plugins['msdes']
import calibre.ebooks.lit.mssha1 as mssha1
__all__ = ['LitWriter']
LIT_IMAGES = set(['image/png', 'image/jpeg', 'image/gif'])
LIT_MIMES = OEB_DOCS | OEB_STYLES | LIT_IMAGES
MS_COVER_TYPE = 'other.ms-coverimage-standard'
ALL_MS_COVER_TYPES = [
(MS_COVER_TYPE, 'Standard cover image'),
('other.ms-thumbimage-standard', 'Standard thumbnail image'),
('other.ms-coverimage', 'PocketPC cover image'),
('other.ms-thumbimage', 'PocketPC thumbnail image'),
]
def invert_tag_map(tag_map):
tags, dattrs, tattrs = tag_map
tags = dict((tags[i], i) for i in xrange(len(tags)))
dattrs = dict((v, k) for k, v in dattrs.items())
tattrs = [dict((v, k) for k, v in (map or {}).items()) for map in tattrs]
for map in tattrs:
if map: map.update(dattrs)
tattrs[0] = dattrs
return tags, tattrs
OPF_MAP = invert_tag_map(maps.OPF_MAP)
HTML_MAP = invert_tag_map(maps.HTML_MAP)
LIT_MAGIC = 'ITOLITLS'
LITFILE_GUID = "{0A9007C1-4076-11D3-8789-0000F8105754}"
PIECE3_GUID = "{0A9007C3-4076-11D3-8789-0000F8105754}"
PIECE4_GUID = "{0A9007C4-4076-11D3-8789-0000F8105754}"
DESENCRYPT_GUID = "{67F6E4A2-60BF-11D3-8540-00C04F58C3CF}"
LZXCOMPRESS_GUID = "{0A9007C6-4076-11D3-8789-0000F8105754}"
def packguid(guid):
values = guid[1:9], guid[10:14], guid[15:19], \
guid[20:22], guid[22:24], guid[25:27], guid[27:29], \
guid[29:31], guid[31:33], guid[33:35], guid[35:37]
values = [int(value, 16) for value in values]
return pack("<LHHBBBBBBBB", *values)
FLAG_OPENING = (1 << 0)
FLAG_CLOSING = (1 << 1)
FLAG_BLOCK = (1 << 2)
FLAG_HEAD = (1 << 3)
FLAG_ATOM = (1 << 4)
FLAG_CUSTOM = (1 << 15)
ATTR_NUMBER = 0xffff
PIECE_SIZE = 16
PRIMARY_SIZE = 40
SECONDARY_SIZE = 232
DCHUNK_SIZE = 0x2000
CCHUNK_SIZE = 0x0200
ULL_NEG1 = 0xffffffffffffffff
ROOT_OFFSET = 1284508585713721976
ROOT_SIZE = 4165955342166943123
BLOCK_CAOL = \
"\x43\x41\x4f\x4c\x02\x00\x00\x00" \
"\x50\x00\x00\x00\x37\x13\x03\x00" \
"\x00\x00\x00\x00\x00\x20\x00\x00" \
"\x00\x02\x00\x00\x00\x00\x10\x00" \
"\x00\x00\x02\x00\x00\x00\x00\x00" \
"\x00\x00\x00\x00\x00\x00\x00\x00"
BLOCK_ITSF = \
"\x49\x54\x53\x46\x04\x00\x00\x00" \
"\x20\x00\x00\x00\x01\x00\x00\x00"
MSDES_CONTROL = \
"\x03\x00\x00\x00\x29\x17\x00\x00" \
"\x01\x00\x00\x00\xa5\xa5\x00\x00"
LZXC_CONTROL = \
"\x07\x00\x00\x00\x4c\x5a\x58\x43" \
"\x03\x00\x00\x00\x04\x00\x00\x00" \
"\x04\x00\x00\x00\x02\x00\x00\x00" \
"\x00\x00\x00\x00\x00\x00\x00\x00"
COLLAPSE = re.compile(r'[ \t\r\n\v]+')
def prefixname(name, nsrmap):
prefix = nsrmap[namespace(name)]
if not prefix:
return barename(name)
return ':'.join((prefix, barename(name)))
def decint(value):
bytes = []
while True:
b = value & 0x7f
value >>= 7
if bytes:
b |= 0x80
bytes.append(chr(b))
if value == 0:
break
return ''.join(reversed(bytes))
def randbytes(n):
return ''.join(chr(random.randint(0, 255)) for x in xrange(n))
def warn(x):
print x
class ReBinary(object):
NSRMAP = {'': None, XML_NS: 'xml'}
def __init__(self, root, path, oeb, map=HTML_MAP, warn=warn):
self.path = path
self.log_warn = warn
self.dir = os.path.dirname(path)
self.manifest = oeb.manifest
self.tags, self.tattrs = map
self.buf = StringIO()
self.anchors = []
self.page_breaks = []
self.is_html = is_html = map is HTML_MAP
self.stylizer = Stylizer(root, path, oeb) if is_html else None
self.tree_to_binary(root)
self.content = self.buf.getvalue()
self.ahc = self.build_ahc() if is_html else None
self.aht = self.build_aht() if is_html else None
def write(self, *values):
for value in values:
if isinstance(value, (int, long)):
value = unichr(value)
self.buf.write(value.encode('utf-8'))
def is_block(self, style):
return style['display'] not in ('inline', 'inline-block')
def tree_to_binary(self, elem, nsrmap=NSRMAP, parents=[],
inhead=False, preserve=False):
if not isinstance(elem.tag, basestring):
self.write(etree.tostring(elem))
return
nsrmap = copy.copy(nsrmap)
attrib = dict(elem.attrib)
style = self.stylizer.style(elem) if self.stylizer else None
for key, value in elem.nsmap.items():
if value not in nsrmap or nsrmap[value] != key:
xmlns = ('xmlns:' + key) if key else 'xmlns'
attrib[xmlns] = value
nsrmap[value] = key
tag = prefixname(elem.tag, nsrmap)
tag_offset = self.buf.tell()
if tag == 'head':
inhead = True
flags = FLAG_OPENING
if not elem.text and len(elem) == 0:
flags |= FLAG_CLOSING
if inhead:
flags |= FLAG_HEAD
if style and self.is_block(style):
flags |= FLAG_BLOCK
self.write(0, flags)
tattrs = self.tattrs[0]
if tag in self.tags:
index = self.tags[tag]
self.write(index)
if self.tattrs[index]:
tattrs = self.tattrs[index]
else:
self.write(FLAG_CUSTOM, len(tag)+1, tag)
last_break = self.page_breaks[-1][0] if self.page_breaks else None
if style and last_break != tag_offset \
and style['page-break-before'] not in ('avoid', 'auto'):
self.page_breaks.append((tag_offset, list(parents)))
for attr, value in attrib.items():
attr = prefixname(attr, nsrmap)
if attr in ('href', 'src'):
value = urlnormalize(value)
path, frag = urldefrag(value)
prefix = unichr(3)
if path in self.manifest.hrefs:
prefix = unichr(2)
value = self.manifest.hrefs[path].id
if frag:
value = '#'.join((value, frag))
value = prefix + value
elif attr in ('id', 'name'):
self.anchors.append((value, tag_offset))
elif attr.startswith('ms--'):
attr = '%' + attr[4:]
elif tag == 'link' and attr == 'type' and value in OEB_STYLES:
value = OEB_CSS_MIME
if attr in tattrs:
self.write(tattrs[attr])
else:
self.write(FLAG_CUSTOM, len(attr)+1, attr)
try:
self.write(ATTR_NUMBER, int(value)+1)
except ValueError:
self.write(len(value)+1, value)
self.write(0)
old_preserve = preserve
if style:
preserve = (style['white-space'] in ('pre', 'pre-wrap'))
xml_space = elem.get(XML('space'))
if xml_space == 'preserve':
preserve = True
elif xml_space == 'normal':
preserve = False
if elem.text:
if preserve:
self.write(elem.text)
elif len(elem) == 0 or not elem.text.isspace():
self.write(COLLAPSE.sub(' ', elem.text))
# else: de nada
parents.append(tag_offset)
child = cstyle = nstyle = None
for next in chain(elem, [None]):
if self.stylizer:
nstyle = None if next is None else self.stylizer.style(next)
if child is not None:
if not preserve \
and (inhead or not nstyle
or self.is_block(cstyle)
or self.is_block(nstyle)) \
and child.tail and child.tail.isspace():
child.tail = None
self.tree_to_binary(child, nsrmap, parents, inhead, preserve)
child, cstyle = next, nstyle
parents.pop()
preserve = old_preserve
if not flags & FLAG_CLOSING:
self.write(0, (flags & ~FLAG_OPENING) | FLAG_CLOSING, 0)
if elem.tail and tag != 'html':
tail = elem.tail
if not preserve:
tail = COLLAPSE.sub(' ', tail)
self.write(tail)
if style and style['page-break-after'] not in ('avoid', 'auto'):
self.page_breaks.append((self.buf.tell(), list(parents)))
def build_ahc(self):
if len(self.anchors) > 6:
self.log_warn("More than six anchors in file %r. " \
"Some links may not work properly." % self.path)
data = StringIO()
data.write(unichr(len(self.anchors)).encode('utf-8'))
for anchor, offset in self.anchors:
data.write(unichr(len(anchor)).encode('utf-8'))
data.write(anchor)
data.write(pack('<I', offset))
return data.getvalue()
def build_aht(self):
return pack('<I', 0)
def preserve(function):
def wrapper(self, *args, **kwargs):
opos = self._stream.tell()
try:
return function(self, *args, **kwargs)
finally:
self._stream.seek(opos)
functools.update_wrapper(wrapper, function)
return wrapper
class LitWriter(object, calibre.LoggingInterface):
def __init__(self, oeb, verbose=0):
calibre.LoggingInterface.__init__(self, logging.getLogger('oeb2lit'))
self.setup_cli_handler(verbose)
self._oeb = oeb
self._litize_oeb()
def _litize_oeb(self):
oeb = self._oeb
oeb.metadata.add('calibre-oeb2lit-version', calibre.__version__)
cover = None
if oeb.metadata.cover:
id = str(oeb.metadata.cover[0])
cover = oeb.manifest[id]
elif MS_COVER_TYPE in oeb.guide:
href = oeb.guide[MS_COVER_TYPE].href
cover = oeb.manifest.hrefs[href]
else:
html = oeb.spine[0].data
imgs = xpath(html, '//img[position()=1]')
href = imgs[0].get('src') if imgs else None
cover = oeb.manifest.hrefs[href] if href else None
if cover:
if not oeb.metadata.cover:
oeb.metadata.add('cover', cover.id)
for type, title in ALL_MS_COVER_TYPES:
if type not in oeb.guide:
oeb.guide.add(type, title, cover.href)
else:
self.log_warn('No suitable cover image found.')
def dump(self, stream):
self._stream = stream
self._sections = [StringIO() for i in xrange(4)]
self._directory = []
self._meta = None
self._dump()
def _write(self, *data):
for datum in data:
self._stream.write(datum)
@preserve
def _writeat(self, pos, *data):
self._stream.seek(pos)
self._write(*data)
def _tell(self):
return self._stream.tell()
def _dump(self):
# Build content sections
self._build_sections()
# Build directory chunks
dcounts, dchunks, ichunk = self._build_dchunks()
# Write headers
self._write(LIT_MAGIC)
self._write(pack('<IIII',
1, PRIMARY_SIZE, 5, SECONDARY_SIZE))
self._write(packguid(LITFILE_GUID))
offset = self._tell()
pieces = list(xrange(offset, offset + (PIECE_SIZE * 5), PIECE_SIZE))
self._write((5 * PIECE_SIZE) * '\0')
aoli1 = len(dchunks) if ichunk else ULL_NEG1
last = len(dchunks) - 1
ddepth = 2 if ichunk else 1
self._write(pack('<IIQQQQIIIIQIIQQQQIIIIQIIIIQ',
2, 0x98, aoli1, 0, last, 0, DCHUNK_SIZE, 2, 0, ddepth, 0,
len(self._directory), 0, ULL_NEG1, 0, 0, 0, CCHUNK_SIZE, 2,
0, 1, 0, len(dcounts), 0, 0x100000, 0x20000, 0))
self._write(BLOCK_CAOL)
self._write(BLOCK_ITSF)
conoff_offset = self._tell()
timestamp = int(time.time())
self._write(pack('<QII', 0, timestamp, 0x409))
# Piece #0
piece0_offset = self._tell()
self._write(pack('<II', 0x1fe, 0))
filesz_offset = self._tell()
self._write(pack('<QQ', 0, 0))
self._writeat(pieces[0], pack('<QQ',
piece0_offset, self._tell() - piece0_offset))
# Piece #1: Directory chunks
piece1_offset = self._tell()
number = len(dchunks) + ((ichunk and 1) or 0)
self._write('IFCM', pack('<IIIQQ',
1, DCHUNK_SIZE, 0x100000, ULL_NEG1, number))
for dchunk in dchunks:
self._write(dchunk)
if ichunk:
self._write(ichunk)
self._writeat(pieces[1], pack('<QQ',
piece1_offset, self._tell() - piece1_offset))
# Piece #2: Count chunks
piece2_offset = self._tell()
self._write('IFCM', pack('<IIIQQ',
1, CCHUNK_SIZE, 0x20000, ULL_NEG1, 1))
cchunk = StringIO()
last = 0
for i, dcount in izip(count(), dcounts):
cchunk.write(decint(last))
cchunk.write(decint(dcount))
cchunk.write(decint(i))
last = dcount
cchunk = cchunk.getvalue()
rem = CCHUNK_SIZE - (len(cchunk) + 50)
self._write('AOLL', pack('<IQQQQQ',
rem, 0, ULL_NEG1, ULL_NEG1, 0, 1))
filler = '\0' * rem
self._write(cchunk, filler, pack('<H', len(dcounts)))
self._writeat(pieces[2], pack('<QQ',
piece2_offset, self._tell() - piece2_offset))
# Piece #3: GUID3
piece3_offset = self._tell()
self._write(packguid(PIECE3_GUID))
self._writeat(pieces[3], pack('<QQ',
piece3_offset, self._tell() - piece3_offset))
# Piece #4: GUID4
piece4_offset = self._tell()
self._write(packguid(PIECE4_GUID))
self._writeat(pieces[4], pack('<QQ',
piece4_offset, self._tell() - piece4_offset))
# The actual section content
content_offset = self._tell()
self._writeat(conoff_offset, pack('<Q', content_offset))
self._write(self._sections[0].getvalue())
self._writeat(filesz_offset, pack('<Q', self._tell()))
def _add_file(self, name, data, secnum=0):
if len(data) > 0:
section = self._sections[secnum]
offset = section.tell()
section.write(data)
else:
offset = 0
self._directory.append(
DirectoryEntry(name, secnum, offset, len(data)))
def _add_folder(self, name, offset=0, size=0):
if not name.endswith('/'):
name += '/'
self._directory.append(
DirectoryEntry(name, 0, offset, size))
def _djoin(self, *names):
return '/'.join(names)
def _build_sections(self):
self._add_folder('/', ROOT_OFFSET, ROOT_SIZE)
self._build_data()
self._build_manifest()
self._build_page_breaks()
self._build_meta()
self._build_drm_storage()
self._build_version()
self._build_namelist()
self._build_storage()
self._build_transforms()
def _build_data(self):
self._add_folder('/data')
for item in self._oeb.manifest.values():
if item.media_type not in LIT_MIMES:
self.log_warn("File %r of unknown media-type %r " \
"excluded from output." % (item.href, item.media_type))
continue
name = '/data/' + item.id
data = item.data
secnum = 0
if not isinstance(data, basestring):
self._add_folder(name)
rebin = ReBinary(data, item.href, self._oeb, warn=self.log_warn)
self._add_file(name + '/ahc', rebin.ahc, 0)
self._add_file(name + '/aht', rebin.aht, 0)
item.page_breaks = rebin.page_breaks
data = rebin.content
name = name + '/content'
secnum = 1
self._add_file(name, data, secnum)
item.size = len(data)
def _build_manifest(self):
states = ['linear', 'nonlinear', 'css', 'images']
manifest = dict((state, []) for state in states)
for item in self._oeb.manifest.values():
if item.spine_position is not None:
key = 'linear' if item.linear else 'nonlinear'
manifest[key].append(item)
elif item.media_type == CSS_MIME:
manifest['css'].append(item)
elif item.media_type in LIT_IMAGES:
manifest['images'].append(item)
data = StringIO()
data.write(pack('<Bc', 1, '\\'))
offset = 0
for state in states:
items = manifest[state]
items.sort()
data.write(pack('<I', len(items)))
for item in items:
id, media_type = item.id, item.media_type
href = urlunquote(item.href)
item.offset = offset \
if state in ('linear', 'nonlinear') else 0
data.write(pack('<I', item.offset))
entry = [unichr(len(id)), unicode(id),
unichr(len(href)), unicode(href),
unichr(len(media_type)), unicode(media_type)]
for value in entry:
data.write(value.encode('utf-8'))
data.write('\0')
offset += item.size
self._add_file('/manifest', data.getvalue())
def _build_page_breaks(self):
pb1 = StringIO()
pb2 = StringIO()
pb3 = StringIO()
pb3cur = 0
bits = 0
for item in self._oeb.spine:
page_breaks = copy.copy(item.page_breaks)
if not item.linear:
page_breaks.insert(0, (0, []))
for pbreak, parents in page_breaks:
pb3cur = (pb3cur << 2) | 1
if len(parents) > 1:
pb3cur |= 0x2
bits += 2
if bits >= 8:
pb3.write(pack('<B', pb3cur))
pb3cur = 0
bits = 0
pbreak += item.offset
pb1.write(pack('<II', pbreak, pb2.tell()))
pb2.write(pack('<I', len(parents)))
for parent in parents:
pb2.write(pack('<I', parent))
if bits != 0:
pb3cur <<= (8 - bits)
pb3.write(pack('<B', pb3cur))
self._add_file('/pb1', pb1.getvalue(), 0)
self._add_file('/pb2', pb2.getvalue(), 0)
self._add_file('/pb3', pb3.getvalue(), 0)
def _build_meta(self):
_, meta = self._oeb.to_opf1()[OPF_MIME]
meta.attrib['ms--minimum_level'] = '0'
meta.attrib['ms--attr5'] = '1'
meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper()
rebin = ReBinary(meta, 'content.opf', self._oeb, map=OPF_MAP, warn=self.log_warn)
meta = rebin.content
self._meta = meta
self._add_file('/meta', meta)
def _build_drm_storage(self):
drmsource = u'Free as in freedom\0'.encode('utf-16-le')
self._add_file('/DRMStorage/DRMSource', drmsource)
tempkey = self._calculate_deskey([self._meta, drmsource])
msdes.deskey(tempkey, msdes.EN0)
self._add_file('/DRMStorage/DRMSealed', msdes.des("\0" * 16))
self._bookkey = '\0' * 8
self._add_file('/DRMStorage/ValidationStream', 'MSReader', 3)
def _build_version(self):
self._add_file('/Version', pack('<HH', 8, 1))
def _build_namelist(self):
data = StringIO()
data.write(pack('<HH', 0x3c, len(self._sections)))
names = ['Uncompressed', 'MSCompressed', 'EbEncryptDS',
'EbEncryptOnlyDS']
for name in names:
data.write(pack('<H', len(name)))
data.write(name.encode('utf-16-le'))
data.write('\0\0')
self._add_file('::DataSpace/NameList', data.getvalue())
def _build_storage(self):
mapping = [(1, 'MSCompressed', (LZXCOMPRESS_GUID,)),
(2, 'EbEncryptDS', (LZXCOMPRESS_GUID, DESENCRYPT_GUID)),
(3, 'EbEncryptOnlyDS', (DESENCRYPT_GUID,)),]
for secnum, name, transforms in mapping:
root = '::DataSpace/Storage/' + name
data = self._sections[secnum].getvalue()
cdata, sdata, tdata, rdata = '', '', '', ''
for guid in transforms:
tdata = packguid(guid) + tdata
sdata = sdata + pack('<Q', len(data))
if guid == DESENCRYPT_GUID:
cdata = MSDES_CONTROL + cdata
if not data: continue
msdes.deskey(self._bookkey, msdes.EN0)
pad = 8 - (len(data) & 0x7)
if pad != 8:
data = data + ('\0' * pad)
data = msdes.des(data)
elif guid == LZXCOMPRESS_GUID:
cdata = LZXC_CONTROL + cdata
if not data: continue
unlen = len(data)
lzx = Compressor(17)
data, rtable = lzx.compress(data, flush=True)
rdata = StringIO()
rdata.write(pack('<IIIIQQQQ',
3, len(rtable), 8, 0x28, unlen, len(data), 0x8000, 0))
for uncomp, comp in rtable[:-1]:
rdata.write(pack('<Q', comp))
rdata = rdata.getvalue()
self._add_file(root + '/Content', data)
self._add_file(root + '/ControlData', cdata)
self._add_file(root + '/SpanInfo', sdata)
self._add_file(root + '/Transform/List', tdata)
troot = root + '/Transform'
for guid in transforms:
dname = self._djoin(troot, guid, 'InstanceData')
self._add_folder(dname)
if guid == LZXCOMPRESS_GUID:
dname += '/ResetTable'
self._add_file(dname, rdata)
def _build_transforms(self):
for guid in (LZXCOMPRESS_GUID, DESENCRYPT_GUID):
self._add_folder('::Transform/'+ guid)
def _calculate_deskey(self, hashdata):
prepad = 2
hash = mssha1.new()
for data in hashdata:
if prepad > 0:
data = ("\000" * prepad) + data
prepad = 0
postpad = 64 - (len(data) % 64)
if postpad < 64:
data = data + ("\000" * postpad)
hash.update(data)
digest = hash.digest()
key = [0] * 8
for i in xrange(0, len(digest)):
key[i % 8] ^= ord(digest[i])
return ''.join(chr(x) for x in key)
def _build_dchunks(self):
ddata = []
directory = list(self._directory)
directory.sort(cmp=lambda x, y: \
cmp(x.name.lower(), y.name.lower()))
qrn = 1 + (1 << 2)
dchunk = StringIO()
dcount = 0
quickref = []
name = directory[0].name
for entry in directory:
next = ''.join([decint(len(entry.name)), entry.name,
decint(entry.section), decint(entry.offset),
decint(entry.size)])
usedlen = dchunk.tell() + len(next) + (len(quickref) * 2) + 52
if usedlen >= DCHUNK_SIZE:
ddata.append((dchunk.getvalue(), quickref, dcount, name))
dchunk = StringIO()
dcount = 0
quickref = []
name = entry.name
if (dcount % qrn) == 0:
quickref.append(dchunk.tell())
dchunk.write(next)
dcount = dcount + 1
ddata.append((dchunk.getvalue(), quickref, dcount, name))
cidmax = len(ddata) - 1
rdcount = 0
dchunks = []
dcounts = []
ichunk = None
if len(ddata) > 1:
ichunk = StringIO()
for cid, (content, quickref, dcount, name) in izip(count(), ddata):
dchunk = StringIO()
prev = cid - 1 if cid > 0 else ULL_NEG1
next = cid + 1 if cid < cidmax else ULL_NEG1
rem = DCHUNK_SIZE - (len(content) + 50)
pad = rem - (len(quickref) * 2)
dchunk.write('AOLL')
dchunk.write(pack('<IQQQQQ', rem, cid, prev, next, rdcount, 1))
dchunk.write(content)
dchunk.write('\0' * pad)
for ref in reversed(quickref):
dchunk.write(pack('<H', ref))
dchunk.write(pack('<H', dcount))
rdcount = rdcount + dcount
dchunks.append(dchunk.getvalue())
dcounts.append(dcount)
if ichunk:
ichunk.write(decint(len(name)))
ichunk.write(name)
ichunk.write(decint(cid))
if ichunk:
rem = DCHUNK_SIZE - (ichunk.tell() + 16)
pad = rem - 2
ichunk = ''.join(['AOLI', pack('<IQ', rem, len(dchunks)),
ichunk.getvalue(), ('\0' * pad), pack('<H', len(dchunks))])
return dcounts, dchunks, ichunk
def option_parser():
from calibre.utils.config import OptionParser
parser = OptionParser(usage=_('%prog [options] OPFFILE'))
parser.add_option(
'-o', '--output', default=None,
help=_('Output file. Default is derived from input filename.'))
return parser
def oeb2lit(opts, opfpath):
litpath = opts.output
if litpath is None:
litpath = os.path.basename(opfpath)
litpath = os.path.splitext(litpath)[0] + '.lit'
litpath = os.path.abspath(litpath)
lit = LitWriter(OEBBook(opfpath), opts.verbose)
with open(litpath, 'wb') as f:
lit.dump(f)
logger = logging.getLogger('oeb2lit')
logger.info(_('Output written to ')+litpath)
def main(argv=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(argv[1:])
if len(args) != 1:
parser.print_help()
return 1
opfpath = args[0]
oeb2lit(opts, opfpath)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -473,9 +473,12 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
return current_page
def save_current_position(self):
pos = self.view.bookmark()
bookmark = '%d#%s'%(self.current_index, pos)
self.iterator.add_bookmark(('calibre_current_page_bookmark', bookmark))
try:
pos = self.view.bookmark()
bookmark = '%d#%s'%(self.current_index, pos)
self.iterator.add_bookmark(('calibre_current_page_bookmark', bookmark))
except:
traceback.print_exc()
def load_ebook(self, pathtoebook):
if self.iterator is not None:

View File

@ -47,6 +47,7 @@ entry_points = {
'fb2-meta = calibre.ebooks.metadata.fb2:main',
'any2lrf = calibre.ebooks.lrf.any.convert_from:main',
'any2epub = calibre.ebooks.epub.from_any:main',
'any2lit = calibre.ebooks.lit.from_any:main',
'lrf2lrs = calibre.ebooks.lrf.lrfparser:main',
'lrs2lrf = calibre.ebooks.lrf.lrs.convert_from:main',
'pdfreflow = calibre.ebooks.lrf.pdf.reflow:main',
@ -55,6 +56,7 @@ entry_points = {
'mobi2oeb = calibre.ebooks.mobi.reader:main',
'lrf2html = calibre.ebooks.lrf.html.convert_to:main',
'lit2oeb = calibre.ebooks.lit.reader:main',
'oeb2lit = calibre.ebooks.lit.writer:main',
'comic2lrf = calibre.ebooks.lrf.comic.convert_from:main',
'comic2epub = calibre.ebooks.epub.from_comic:main',
'calibre-debug = calibre.debug:main',
@ -183,6 +185,7 @@ def setup_completion(fatal_errors):
from calibre.ebooks.odt.to_oeb import option_parser as odt2oeb
from calibre.ebooks.epub.from_feeds import option_parser as feeds2epub
from calibre.ebooks.epub.from_any import option_parser as any2epub
from calibre.ebooks.lit.from_any import option_parser as any2lit
from calibre.ebooks.epub.from_comic import option_parser as comic2epub
from calibre.gui2.main import option_parser as guiop
any_formats = ['epub', 'htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip',
@ -206,7 +209,8 @@ def setup_completion(fatal_errors):
f.write(opts_and_exts('pdf2lrf', htmlop, ['pdf']))
f.write(opts_and_exts('any2lrf', htmlop, any_formats))
f.write(opts_and_exts('calibre', guiop, any_formats))
f.write(opts_and_exts('any2lrf', any2epub, any_formats))
f.write(opts_and_exts('any2epub', any2epub, any_formats))
f.write(opts_and_exts('any2lit', any2lit, any_formats))
f.write(opts_and_exts('lrf2lrs', lrf2lrsop, ['lrf']))
f.write(opts_and_exts('lrf-meta', metaop, ['lrf']))
f.write(opts_and_exts('rtf-meta', metaop, ['rtf']))

View File

@ -0,0 +1,375 @@
/* __license__ = 'GPL v3'
* __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
*
* Python/C implementation of an LZX compressor type.
*/
#include <Python.h>
#include <structmember.h>
#include <lzxc.h>
#include <lzxmodule.h>
#define BUFFER_INIT(buffer) \
do { \
(buffer).data = NULL; \
(buffer).size = 0; \
(buffer).offset = 0; \
} while (0)
#define COMPRESSOR_REMAINING(compressor) \
(((compressor)->residue.size - (compressor)->residue.offset) \
+ ((compressor)->input.size - (compressor)->input.offset))
typedef struct buffer_t {
void *data;
unsigned int size;
unsigned int offset;
} buffer_t;
typedef struct Compressor {
PyObject_HEAD
int reset;
int wbits;
int blocksize;
int flushing;
struct lzxc_data *stream;
buffer_t residue;
buffer_t input;
buffer_t output;
PyObject *rtable;
} Compressor;
static PyMemberDef Compressor_members[] = {
{ "reset", T_INT, offsetof(Compressor, reset), READONLY,
"whether or not the Compressor resets each block" },
{ "wbits", T_INT, offsetof(Compressor, wbits), READONLY,
"window size in bits" },
{ "blocksize", T_INT, offsetof(Compressor, blocksize), READONLY,
"block size in bytes" },
{ NULL }
};
static int
Compressor_traverse(Compressor *self, visitproc visit, void *arg)
{
Py_VISIT(self->rtable);
return 0;
}
static int
Compressor_clear(Compressor *self)
{
Py_CLEAR(self->rtable);
return 0;
}
static void
Compressor_dealloc(Compressor *self)
{
Compressor_clear(self);
if (self->stream) {
lzxc_finish(self->stream, NULL);
self->stream = NULL;
}
if (self->residue.data) {
PyMem_Free(self->residue.data);
self->residue.data = NULL;
}
if (self->output.data) {
PyMem_Free(self->output.data);
self->output.data = NULL;
}
self->ob_type->tp_free((PyObject *)self);
}
static PyObject *
Compressor_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
Compressor *self = NULL;
self = (Compressor *)type->tp_alloc(type, 0);
if (self != NULL) {
self->rtable = PyList_New(0);
if (self->rtable == NULL) {
Py_DECREF(self);
return NULL;
}
self->wbits = 0;
self->blocksize = 0;
self->flushing = 0;
BUFFER_INIT(self->residue);
BUFFER_INIT(self->input);
BUFFER_INIT(self->output);
}
return (PyObject *)self;
}
static int
get_bytes(void *context, int nbytes, void *buf)
{
Compressor *self = (Compressor *)context;
unsigned char *data = (unsigned char *)buf;
buffer_t *residue = &self->residue;
buffer_t *input = &self->input;
int resrem = residue->size - residue->offset;
int inrem = input->size - input->offset;
if (resrem > 0) {
if (resrem <= nbytes) {
memcpy(data, residue->data + residue->offset, nbytes);
residue->offset += nbytes;
return nbytes;
} else {
memcpy(data, residue->data + residue->offset, resrem);
residue->offset += resrem;
data += resrem;
nbytes -= resrem;
}
}
if (inrem == 0) {
return resrem;
} else if (nbytes > inrem) {
nbytes = inrem;
}
memcpy(data, input->data + input->offset, nbytes);
input->offset += nbytes;
return nbytes + resrem;
}
static int
at_eof(void *context)
{
Compressor *self = (Compressor *)context;
return (self->flushing && (COMPRESSOR_REMAINING(self) == 0));
}
static int
put_bytes(void *context, int nbytes, void *data)
{
Compressor *self = (Compressor *)context;
buffer_t *output = &self->output;
int remaining = output->size - output->offset;
if (nbytes > remaining) {
PyErr_SetString(LZXError,
"Attempt to write compressed data beyond end of buffer");
nbytes = remaining;
}
memcpy(output->data + output->offset, data, nbytes);
output->offset += nbytes;
return nbytes;
}
static void
mark_frame(void *context, uint32_t uncomp, uint32_t comp)
{
Compressor *self = (Compressor *)context;
PyObject *rtable = self->rtable;
PyObject *entry = NULL;
entry = Py_BuildValue("(LL)", uncomp, comp);
if (entry) {
PyList_Append(rtable, entry);
Py_DECREF(entry);
}
}
static int
Compressor_init(Compressor *self, PyObject *args, PyObject *kwds)
{
static char *kwlist[] = {"wbits", "reset", NULL};
int wbits = 0;
int retval = 0;
self->reset = 1;
if (!PyArg_ParseTupleAndKeywords(
args, kwds, "I|b", kwlist, &wbits, &self->reset)) {
return -1;
}
/* TODO: check window size. */
self->wbits = wbits;
self->blocksize = 1 << wbits;
self->residue.data = PyMem_Realloc(self->residue.data, self->blocksize);
if (self->residue.data == NULL) {
PyErr_NoMemory();
return -1;
}
if (self->stream != NULL) {
lzxc_finish(self->stream, NULL);
}
retval = lzxc_init(&self->stream, wbits, get_bytes, self, at_eof,
put_bytes, self, mark_frame, self);
if (retval != 0) {
self->stream = NULL;
PyErr_SetString(LZXError, "Failed to create compression stream");
return -1;
}
return 0;
}
static PyObject *
Compressor_compress__(
Compressor *self, unsigned char *data, unsigned int inlen, int flush)
{
buffer_t *residue = &self->residue;
buffer_t *input = &self->input;
buffer_t *output = &self->output;
unsigned int outlen, remainder;
int reset = self->reset;
unsigned int blocksize = self->blocksize;
int retval = 0;
PyObject *cdata = NULL;
PyObject *rtable = NULL;
PyObject *result = NULL;
self->flushing = flush;
input->data = data;
input->size = inlen;
input->offset = 0;
outlen = inlen;
remainder = outlen % blocksize;
if (remainder != 0) {
outlen += (blocksize - remainder) + 1;
}
if (output->size < outlen) {
output->data = PyMem_Realloc(output->data, outlen);
if (output->data == NULL) {
return PyErr_NoMemory();
}
output->size = outlen;
}
output->offset = 0;
while (COMPRESSOR_REMAINING(self) >= blocksize) {
retval = lzxc_compress_block(self->stream, blocksize, 1);
if (retval != 0) {
PyErr_SetString(LZXError, "Error during compression");
return NULL;
}
if (reset) {
lzxc_reset(self->stream);
}
}
if (flush && COMPRESSOR_REMAINING(self) > 0) {
retval = lzxc_compress_block(self->stream, blocksize, 1);
if (retval != 0) {
PyErr_SetString(LZXError, "Error during compression");
return NULL;
}
if (reset) {
lzxc_reset(self->stream);
}
residue->size = 0;
residue->offset = 0;
} else {
int reslen = input->size - input->offset;
memcpy(residue->data, input->data + input->offset, reslen);
residue->size = reslen;
residue->offset = 0;
}
rtable = self->rtable;
self->rtable = PyList_New(0);
if (self->rtable == NULL) {
self->rtable = rtable;
return NULL;
}
cdata = PyString_FromStringAndSize(output->data, output->offset);
if (cdata == NULL) {
Py_DECREF(rtable);
return NULL;
}
result = Py_BuildValue("(OO)", cdata, rtable);
Py_DECREF(rtable);
Py_DECREF(cdata);
return result;
}
static PyObject *
Compressor_compress(Compressor *self, PyObject *args, PyObject *kwds)
{
static char *kwlist[] = {"data", "flush", NULL};
unsigned char *data = NULL;
unsigned int inlen = 0;
int flush = 0;
if (!PyArg_ParseTupleAndKeywords(
args, kwds, "s#|b", kwlist, &data, &inlen, &flush)) {
return NULL;
}
return Compressor_compress__(self, data, inlen, flush);
}
static PyObject *
Compressor_flush(Compressor *self)
{
return Compressor_compress__(self, NULL, 0, 1);
}
static PyMethodDef Compressor_methods[] = {
{ "compress", (PyCFunction)Compressor_compress,
METH_VARARGS | METH_KEYWORDS,
"Return a string containing data LZX compressed." },
{ "flush", (PyCFunction)Compressor_flush, METH_NOARGS,
"Return a string containing any remaining LZX compressed data." },
{ NULL }
};
PyTypeObject CompressorType = {
PyObject_HEAD_INIT(NULL)
0, /*ob_size*/
"lzx.Compressor", /*tp_name*/
sizeof(Compressor), /*tp_basicsize*/
0, /*tp_itemsize*/
(destructor)Compressor_dealloc, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
0, /*tp_compare*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash */
0, /*tp_call*/
0, /*tp_str*/
0, /*tp_getattro*/
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
"Compressor objects", /* tp_doc */
(traverseproc)Compressor_traverse, /* tp_traverse */
(inquiry)Compressor_clear, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
Compressor_methods, /* tp_methods */
Compressor_members, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
(initproc)Compressor_init, /* tp_init */
0, /* tp_alloc */
Compressor_new, /* tp_new */
};

394
src/calibre/utils/lzx/lzc.c Normal file
View File

@ -0,0 +1,394 @@
/*
File lz_nonslide.c, part of lzxcomp library
Copyright (C) 2002 Matthew T. Russotto
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; version 2.1 only
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/* Force using (actually working) non-sliding version. */
#define NONSLIDE 1
#define LZ_ONEBUFFER 1
#define LAZY 1
/*
* Document here
*/
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <strings.h>
#include <string.h>
#ifdef DEBUG_PERF
#include <sys/time.h>
#include <sys/resource.h>
#endif
#include "lzc.h"
#define MAX_MATCH 253
#define MIN_MATCH 2
void lz_init(lz_info *lzi, int wsize, int max_dist,
int max_match, int min_match,
int frame_size,
get_chars_t get_chars,
output_match_t output_match,
output_literal_t output_literal, void *user_data)
{
/* the reason for the separate max_dist value is LZX can't reach the
first three characters in its nominal window. But using a smaller
window results in inefficiency when dealing with reset intervals
which are the length of the nominal window */
lzi->wsize = wsize;
if (max_match > wsize)
lzi->max_match = wsize;
else
lzi->max_match = max_match;
lzi->min_match = min_match;
if (lzi->min_match < 3) lzi->min_match = 3;
lzi->max_dist = max_dist;
lzi->block_buf_size = wsize + lzi->max_dist;
lzi->block_buf = malloc(lzi->block_buf_size);
lzi->block_bufe = lzi->block_buf + lzi->block_buf_size;
assert(lzi->block_buf != NULL);
lzi->cur_loc = 0;
lzi->block_loc = 0;
lzi->chars_in_buf = 0;
lzi->eofcount = 0;
lzi->get_chars = get_chars;
lzi->output_match = output_match;
lzi->output_literal = output_literal;
lzi->user_data = user_data;
lzi->frame_size = frame_size;
lzi->lentab = calloc(lzi->block_buf_size + 1, sizeof(int));
lzi->prevtab = calloc(lzi->block_buf_size + 1, sizeof(u_char *));
lzi->analysis_valid = 0;
}
void lz_release(lz_info *lzi)
{
free(lzi->block_buf);
free(lzi->lentab);
free(lzi->prevtab);
}
void lz_reset(lz_info *lzi)
{
int residual = lzi->chars_in_buf - lzi->block_loc;
memmove(lzi->block_buf, lzi->block_buf + lzi->block_loc, residual);
lzi->chars_in_buf = residual;
lzi->block_loc = 0;
lzi->analysis_valid = 0;
}
#ifdef LZNONSLIDE_MAIN
typedef struct lz_user_data
{
FILE *infile;
FILE *outfile;
int R0, R1, R2;
} lz_user_data;
int tmp_get_chars(lz_info *lzi, int n, u_char *buf)
{
lz_user_data *lzud = (lz_user_data *)lzi->user_data;
return fread(buf, 1, n, lzud->infile);
}
int tmp_output_match(lz_info *lzi, int match_pos, int match_len)
{
lz_user_data *lzud = (lz_user_data *)lzi->user_data;
int mod_match_loc;
mod_match_loc = match_pos;
fprintf(lzud->outfile, "(%d, %d)(%d)\n", match_pos, match_len, mod_match_loc);
return 0;
}
void tmp_output_literal(lz_info *lzi, u_char ch)
{
lz_user_data *lzud = (lz_user_data *)lzi->user_data;
fprintf(lzud->outfile, "'%c'", ch);
}
int main(int argc, char *argv[])
{
int wsize = atoi(argv[1]);
lz_info lzi;
lz_user_data lzu = {stdin, stdout, 1, 1, 1};
lz_init(&lzi, wsize, wsize, MAX_MATCH, MIN_MATCH, 8192, tmp_get_chars, tmp_output_match, tmp_output_literal,&lzu);
lz_compress(&lzi);
return 0;
}
#endif
__inline__ int lz_left_to_process(lz_info *lzi)
{
return lzi->chars_in_buf - lzi->block_loc;
}
static void
fill_blockbuf(lz_info *lzi, int maxchars)
{
int toread;
u_char *readhere;
int nread;
if (lzi->eofcount) return;
maxchars -= lz_left_to_process(lzi);
toread = lzi->block_buf_size - lzi->chars_in_buf;
if (toread > maxchars) toread = maxchars;
readhere = lzi->block_buf + lzi->chars_in_buf;
nread = lzi->get_chars(lzi, toread, readhere);
lzi->chars_in_buf += nread;
if (nread != toread)
lzi->eofcount++;
}
static void lz_analyze_block(lz_info *lzi)
{
int *lentab, *lenp;
u_char **prevtab, **prevp;
u_char *bbp, *bbe;
u_char *chartab[256];
u_char *cursor;
int prevlen;
int ch;
int maxlen;
long wasinc;
int max_dist = lzi->max_dist;
#ifdef DEBUG_ANALYZE_BLOCK
static short n = 0;
#endif
#ifdef DEBUG_PERF
struct rusage innerloop;
struct timeval innertime, tmptime;
struct rusage outerloop;
struct timeval outertime;
struct rusage initialloop;
struct timeval initialtime;
struct rusage totalloop;
struct timeval totaltime;
#endif
#ifdef DEBUG_ANALYZE_BLOCK
fprintf(stderr, "Analyzing block %d, cur_loc = %06x\n", n, lzi->cur_loc);
#endif
memset(chartab, 0, sizeof(chartab));
prevtab = prevp = lzi->prevtab;
lentab = lenp = lzi->lentab;
memset(prevtab, 0, sizeof(*prevtab) * lzi->chars_in_buf);
memset(lentab, 0, sizeof(*lentab) * lzi->chars_in_buf);
#ifdef DEBUG_PERF
memset(&innertime, 0, sizeof(innertime));
memset(&outertime, 0, sizeof(outertime));
getrusage(RUSAGE_SELF, &initialloop);
totalloop = initialloop;
#endif
bbp = lzi->block_buf;
bbe = bbp + lzi->chars_in_buf;
while (bbp < bbe) {
if (chartab[ch = *bbp]) {
*prevp = chartab[ch];
*lenp = 1;
}
chartab[ch] = bbp;
bbp++;
prevp++;
lenp++;
}
#ifdef DEBUG_PERF
initialtime = initialloop.ru_utime;
getrusage(RUSAGE_SELF, &initialloop);
timersub(&initialloop.ru_utime, &initialtime, &initialtime);
#endif
wasinc = 1;
for (maxlen = 1; wasinc && (maxlen < lzi->max_match); maxlen++) {
#ifdef DEBUG_PERF
getrusage(RUSAGE_SELF, &outerloop);
#endif
bbp = bbe - maxlen - 1;
lenp = lentab + lzi->chars_in_buf - maxlen - 1;
prevp = prevtab + lzi->chars_in_buf - maxlen - 1;
wasinc = 0;
while (bbp > lzi->block_buf) {
if (*lenp == maxlen) {
#ifdef DEBUG_PERF
getrusage(RUSAGE_SELF, &innerloop);
#endif
ch = bbp[maxlen];
cursor = *prevp;
while(cursor && ((bbp - cursor) <= max_dist)) {
prevlen = *(cursor - lzi->block_buf + lentab);
if (cursor[maxlen] == ch) {
*prevp = cursor;
(*lenp)++;
wasinc++;
break;
}
if (prevlen != maxlen) break;
cursor = *(cursor - lzi->block_buf + prevtab);
}
#ifdef DEBUG_PERF
tmptime = innerloop.ru_utime;
getrusage(RUSAGE_SELF, &innerloop);
timersub(&innerloop.ru_utime, &tmptime, &tmptime);
timeradd(&tmptime, &innertime, &innertime);
#endif
}
bbp--;
prevp--;
lenp--;
}
#ifdef DEBUG_PERF
tmptime = outerloop.ru_utime;
getrusage(RUSAGE_SELF, &outerloop);
timersub(&outerloop.ru_utime, &tmptime, &tmptime);
timeradd(&tmptime, &outertime, &outertime);
#endif
// fprintf(stderr, "maxlen = %d, wasinc = %ld\n", maxlen, wasinc);
}
#ifdef DEBUG_PERF
totaltime = totalloop.ru_utime;
getrusage(RUSAGE_SELF, &totalloop);
timersub(&totalloop.ru_utime, &totaltime, &totaltime);
fprintf(stderr, "Time spend in initial loop = %f\n", initialtime.tv_sec + initialtime.tv_usec/(double)1E6);
fprintf(stderr, "Time spend in outer loop = %f\n", outertime.tv_sec + outertime.tv_usec/(double)1E6);
fprintf(stderr, "Time spend in inner loop = %f\n", innertime.tv_sec + innertime.tv_usec/(double)1E6);
fprintf(stderr, "Time spend in all loops = %f\n", totaltime.tv_sec + totaltime.tv_usec/(double)1E6);
#endif
lzi->analysis_valid = 1;
#ifdef DEBUG_ANALYZE_BLOCK
fprintf(stderr, "Done analyzing block %d, cur_loc = %06x\n", n++, lzi->cur_loc);
#endif
}
void lz_stop_compressing(lz_info *lzi)
{
lzi->stop = 1;
/* fprintf(stderr, "Stopping...\n");*/
}
int lz_compress(lz_info *lzi, int nchars)
{
u_char *bbp, *bbe;
int *lentab, *lenp;
u_char **prevtab, **prevp;
int len;
int holdback;
short trimmed;
lzi->stop = 0;
while ((lz_left_to_process(lzi) || !lzi->eofcount) && !lzi->stop && nchars > 0) {
#if 1
if (!lzi->analysis_valid ||
(!lzi->eofcount &&
((lzi->chars_in_buf- lzi->block_loc) < nchars))) {
int residual = lzi->chars_in_buf - lzi->block_loc;
int bytes_to_move = lzi->max_dist + residual;
if (bytes_to_move > lzi->chars_in_buf)
bytes_to_move = lzi->chars_in_buf;
#ifdef DEBUG_ANALYZE_BLOCK
fprintf(stderr, "Moving %06x, chars_in_buf %06x, residual = %06x, nchars= %06x block_loc = %06x\n", bytes_to_move, lzi->chars_in_buf, residual, nchars, lzi->block_loc);
#endif
memmove(lzi->block_buf, lzi->block_buf + lzi->chars_in_buf - bytes_to_move,
bytes_to_move);
lzi->block_loc = bytes_to_move - residual;
lzi->chars_in_buf = bytes_to_move;
#ifdef DEBUG_ANALYZE_BLOCK
fprintf(stderr, "New chars_in_buf %06x, new block_loc = %06x, eof = %1d\n", lzi->chars_in_buf, lzi->block_loc, lzi->eofcount);
#endif
fill_blockbuf(lzi, nchars);
#ifdef DEBUG_ANALYZE_BLOCK
fprintf(stderr, "Really new chars_in_buf %06x, new block_loc = %06x, eof = %1d\n", lzi->chars_in_buf, lzi->block_loc, lzi->eofcount);
#endif
lz_analyze_block(lzi);
}
#else
if (!lzi->analysis_valid ||
(lzi->block_loc - lzi->chars_in_buf) == 0) {
lzi->block_loc = 0;
lzi->chars_in_buf = 0;
fill_blockbuf(lzi, nchars);
lz_analyze_block(lzi);
}
#endif
prevtab = prevp = lzi->prevtab + lzi->block_loc;
lentab = lenp = lzi->lentab + lzi->block_loc;
bbp = lzi->block_buf + lzi->block_loc;
holdback = lzi->max_match;
if (lzi->eofcount) holdback = 0;
if (lzi->chars_in_buf < (nchars + lzi->block_loc))
bbe = lzi->block_buf + lzi->chars_in_buf - holdback;
else
bbe = bbp + nchars;
while ((bbp < bbe) && (!lzi->stop)) {
trimmed = 0;
len = *lenp;
if (lzi->frame_size && (len > (lzi->frame_size - lzi->cur_loc % lzi->frame_size))) {
#ifdef DEBUG_TRIMMING
fprintf(stderr, "Trim for framing: %06x %d %d\n", lzi->cur_loc,len, (lzi->frame_size - lzi->cur_loc % lzi->frame_size));
#endif
trimmed = 1;
len = (lzi->frame_size - lzi->cur_loc % lzi->frame_size);
}
if (len > nchars) {
#ifdef DEBUG_TRIMMING
fprintf(stderr, "Trim for blocking: %06x %d %d\n", lzi->cur_loc,len, nchars);
#endif
trimmed = 1;
len = nchars;
}
if (len >= lzi->min_match) {
#ifdef LAZY
if ((bbp < bbe -1) && !trimmed &&
((lenp[1] > (len + 1)) /* || ((lenp[1] == len) && (prevp[1] > prevp[0])) */)) {
len = 1;
/* this is the lazy eval case */
}
else
#endif
if (lzi->output_match(lzi, (*prevp - lzi->block_buf) - lzi->block_loc,
len) < 0) {
// fprintf(stderr, "Match rejected: %06x %d\n", lzi->cur_loc, len);
len = 1; /* match rejected */
}
}
else
len = 1;
if (len < lzi->min_match) {
assert(len == 1);
lzi->output_literal(lzi, *bbp);
}
// fprintf(stderr, "len = %3d, *lenp = %3d, cur_loc = %06x, block_loc = %06x\n", len, *lenp, lzi->cur_loc, lzi->block_loc);
bbp += len;
prevp += len;
lenp += len;
lzi->cur_loc += len;
lzi->block_loc += len;
assert(nchars >= len);
nchars -= len;
}
}
return 0;
}

View File

@ -0,0 +1,60 @@
/*
File lz_nonslide.h, part of lzxcomp library
Copyright (C) 2002 Matthew T. Russotto
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; version 2.1 only
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
typedef struct lz_info lz_info;
typedef int (*get_chars_t)(lz_info *lzi, int n, u_char *buf);
typedef int (*output_match_t)(lz_info *lzi, int match_pos, int match_len);
typedef void (*output_literal_t)(lz_info *lzi, u_char ch);
struct lz_info
{
int wsize; /* window size in bytes */
int max_match; /* size of longest match in bytes */
int min_match;
u_char *block_buf;
u_char *block_bufe;
int block_buf_size;
int chars_in_buf;
int cur_loc; /* location within stream */
int block_loc;
int frame_size;
int max_dist;
u_char **prevtab;
int *lentab;
short eofcount;
short stop;
short analysis_valid;
get_chars_t get_chars;
output_match_t output_match;
output_literal_t output_literal;
void *user_data;
};
void lz_init(lz_info *lzi, int wsize, int max_dist,
int max_match, int min_match,
int frame_size,
get_chars_t get_chars,
output_match_t output_match,
output_literal_t output_literal, void *user_data);
void lz_release(lz_info *lzi);
void lz_reset(lz_info *lzi);
void lz_stop_compressing(lz_info *lzi);
int lz_left_to_process(lz_info *lzi); /* returns # chars read in but unprocessed */
int lz_compress(lz_info *lzi, int nchars);

1281
src/calibre/utils/lzx/lzxc.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,42 @@
/*
File lzx_compress.h, part of lzxcomp library
Copyright (C) 2002 Matthew T. Russotto
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; version 2.1 only
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
typedef struct lzxc_data lzxc_data;
typedef int (*lzxc_get_bytes_t)(void *arg, int n, void *buf);
typedef int (*lzxc_put_bytes_t)(void *arg, int n, void *buf);
typedef void (*lzxc_mark_frame_t)(void *arg, uint32_t uncomp, uint32_t comp);
typedef int (*lzxc_at_eof_t)(void *arg);
typedef struct lzxc_results
{
/* add more here? Error codes, # blocks, # frames, etc? */
long len_compressed_output;
long len_uncompressed_input;
} lzxc_results;
int lzxc_init(struct lzxc_data **lzxdp, int wsize_code,
lzxc_get_bytes_t get_bytes, void *get_bytes_arg,
lzxc_at_eof_t at_eof,
lzxc_put_bytes_t put_bytes, void *put_bytes_arg,
lzxc_mark_frame_t mark_frame, void *mark_frame_arg);
void lzxc_reset(lzxc_data *lzxd);
int lzxc_compress_block(lzxc_data *lzxd, int block_size, int subdivide);
int lzxc_finish(struct lzxc_data *lzxd, struct lzxc_results *lzxr);

View File

@ -18,7 +18,7 @@
#include <mspack.h>
#include <system.h>
#include <lzx.h>
#include <lzxd.h>
/* Microsoft's LZX document and their implementation of the
* com.ms.util.cab Java package do not concur.

View File

@ -4,25 +4,27 @@
* Python module C glue code.
*/
#include <Python.h>
#include <mspack.h>
#include <lzx.h>
#include <lzxd.h>
#include <lzxmodule.h>
static char lzx_doc[] =
"Provide basic LZX decompression using the code from libmspack.";
"Provide basic LZX compression and decompression using the code from\n"
"liblzxcomp and libmspack respectively.";
static PyObject *LzxError = NULL;
PyObject *LZXError = NULL;
typedef struct memory_file {
unsigned int magic; /* 0xB5 */
void * buffer;
void *buffer;
int total_bytes;
int current_bytes;
} memory_file;
void *
static void *
glue_alloc(struct mspack_system *this, size_t bytes)
{
void *p = NULL;
@ -33,33 +35,33 @@ glue_alloc(struct mspack_system *this, size_t bytes)
return p;
}
void
static void
glue_free(void *p)
{
free(p);
}
void
static void
glue_copy(void *src, void *dest, size_t bytes)
{
memcpy(dest, src, bytes);
}
struct mspack_file *
static struct mspack_file *
glue_open(struct mspack_system *this, char *filename, int mode)
{
PyErr_SetString(LzxError, "MSPACK_OPEN unsupported");
PyErr_SetString(LZXError, "MSPACK_OPEN unsupported");
return NULL;
}
void
static void
glue_close(struct mspack_file *file)
{
return;
}
int
glue_read(struct mspack_file *file, void * buffer, int bytes)
static int
glue_read(struct mspack_file *file, void *buffer, int bytes)
{
memory_file *mem;
int remaining;
@ -76,8 +78,8 @@ glue_read(struct mspack_file *file, void * buffer, int bytes)
return bytes;
}
int
glue_write(struct mspack_file * file, void * buffer, int bytes)
static int
glue_write(struct mspack_file *file, void *buffer, int bytes)
{
memory_file *mem;
int remaining;
@ -86,9 +88,8 @@ glue_write(struct mspack_file * file, void * buffer, int bytes)
if (mem->magic != 0xB5) return -1;
remaining = mem->total_bytes - mem->current_bytes;
if (!remaining) return 0;
if (bytes > remaining) {
PyErr_SetString(LzxError,
PyErr_SetString(LZXError,
"MSPACK_WRITE tried to write beyond end of buffer");
bytes = remaining;
}
@ -188,7 +189,7 @@ decompress(PyObject *self, PyObject *args)
if (err != MSPACK_ERR_OK) {
Py_DECREF(retval);
retval = NULL;
PyErr_SetString(LzxError, "LZX decompression failed");
PyErr_SetString(LZXError, "LZX decompression failed");
}
return retval;
@ -198,7 +199,7 @@ static PyMethodDef lzx_methods[] = {
{ "init", &init, METH_VARARGS, "Initialize the LZX decompressor" },
{ "reset", &reset, METH_VARARGS, "Reset the LZX decompressor" },
{ "decompress", &decompress, METH_VARARGS, "Run the LZX decompressor" },
{ NULL, NULL }
{ NULL }
};
PyMODINIT_FUNC
@ -206,14 +207,21 @@ initlzx(void)
{
PyObject *m;
if (PyType_Ready(&CompressorType) < 0) {
return;
}
m = Py_InitModule3("lzx", lzx_methods, lzx_doc);
if (m == NULL) {
return;
}
LzxError = PyErr_NewException("lzx.LzxError", NULL, NULL);
Py_INCREF(LzxError);
PyModule_AddObject(m, "LzxError", LzxError);
LZXError = PyErr_NewException("lzx.LZXError", NULL, NULL);
Py_INCREF(LZXError);
PyModule_AddObject(m, "LZXError", LZXError);
Py_INCREF(&CompressorType);
PyModule_AddObject(m, "Compressor", (PyObject *)&CompressorType);
return;
}

View File

@ -0,0 +1,15 @@
/* __license__ = 'GPL v3'
* __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
*
* Common declarations for Python module C glue code.
*/
#include <Python.h>
#ifndef LZXMODULE_H
#define LZXMODULE_H
extern PyObject *LZXError;
extern PyTypeObject CompressorType;
#endif /* LZXMODULE_H */