From 61f2854c11b80c77cbe1a8235bd1681c4e5113cf Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Mon, 29 Dec 2008 00:07:34 -0500 Subject: [PATCH 1/8] Implemented basic Mobipocket container skeleton. --- src/calibre/ebooks/mobi/palmdoc.py | 57 +++++++- src/calibre/ebooks/mobi/writer.py | 218 +++++++++++++++++++++++++++++ 2 files changed, 273 insertions(+), 2 deletions(-) create mode 100644 src/calibre/ebooks/mobi/writer.py diff --git a/src/calibre/ebooks/mobi/palmdoc.py b/src/calibre/ebooks/mobi/palmdoc.py index 30d0905973..e74f6eeedc 100644 --- a/src/calibre/ebooks/mobi/palmdoc.py +++ b/src/calibre/ebooks/mobi/palmdoc.py @@ -2,7 +2,11 @@ # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai __license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal ' +__copyright__ = '2008, Kovid Goyal ' \ + 'and Marshall T. Vandegrift ' + +from cStringIO import StringIO +from struct import pack COUNT_BITS = 3 @@ -31,4 +35,53 @@ def decompress_doc(data): res.append(res[j - di+k]) return ''.join([chr(i) for i in res]) - \ No newline at end of file + +def compress_doc(data): + out = StringIO() + i = 0 + ldata = len(data) + while i < ldata: + if i > 10 and (ldata - i) > 10: + chunk = '' + match = -1 + for j in xrange(10, 2, -1): + chunk = data[i:i+j] + try: + match = data.rindex(chunk, 0, i) + except ValueError: + continue + if (i - match) <= 2047: + break + match = -1 + if match >= 0: + n = len(chunk) + m = i - match + code = 0x8000 + ((m << 3) & 0x3ff8) + (n - 3) + out.write(pack('>H', code)) + i += n + continue + ch = data[i] + och = ord(ch) + i += 1 + if ch == ' ' and (i + 1) < ldata: + onch = ord(data[i]) + if onch >= 0x40 and onch < 0x80: + out.write(pack('>B', onch ^ 0x80)) + i += 1 + continue + if och == 0 or (och >= 9 and och < 0x80): + out.write(ch) + else: + j = i + binseq = [ch] + while True: + ch = data[j] + och = ord(ch) + if och < 1 or (och > 8 and och < 0x80): + break + binseq.append(ch) + out.write(pack('>B', len(binseq))) + out.write(''.join(binseq)) + i += len(binseq) - 1 + return out.getvalue() + diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py new file mode 100644 index 0000000000..b16fc8e2c4 --- /dev/null +++ b/src/calibre/ebooks/mobi/writer.py @@ -0,0 +1,218 @@ +''' +Write content to Mobipocket books. +''' +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2008, Marshall T. Vandegrift ' + +import sys +import os +from struct import pack +import functools +import time +import random +from cStringIO import StringIO +import re +from itertools import izip, count +from lxml import etree +from calibre.ebooks.mobi.palmdoc import compress_doc +from calibre.ebooks.lit.oeb import XHTML, XHTML_NS, OEB_DOCS +from calibre.ebooks.lit.oeb import barename, namespace +from calibre.ebooks.lit.oeb import FauxLogger, OEBBook + +MBP_NS = 'http://mobipocket.cam/ns/mbp' +def MBP(name): return '{%s}%s' % (MBP_NS, name) + +EXTH_CODES = { + 'creator': 100, + 'publisher': 101, + 'description': 103, + 'identifier': 104, + 'subject': 105, + 'date': 106, + 'review': 107, + 'contributor': 108, + 'rights': 109, + 'type': 111, + 'source': 112, + 'title': 503, + } + +UNCOMPRESSED = 1 +PALMDOC = 2 +HUFFDIC = 17480 + + +class Serializer(object): + def __init__(self, oeb, images): + self.oeb = oeb + self.images = images + self.root = etree.Element(XHTML('html'), + nsmap={None: XHTML_NS, 'mbp': MBP_NS}) + self.generate_head() + self.generate_body() + + def __str__(self): + return etree.tostring(self.root) + + def generate_head(self): + head = etree.SubElement(self.root, XHTML('head')) + + def generate_body(self): + body = etree.SubElement(self.root, XHTML('body')) + first = True + for item in self.oeb.spine: + if item.media_type not in OEB_DOCS: continue + for elem in item.data.find(XHTML('body')): + body.append(elem) + etree.SubElement(body, MBP('pagebreak')) + +def preserve(function): + def wrapper(self, *args, **kwargs): + opos = self._stream.tell() + try: + return function(self, *args, **kwargs) + finally: + self._stream.seek(opos) + functools.update_wrapper(wrapper, function) + return wrapper + +class MobiWriter(object): + def __init__(self, compress=PALMDOC, logger=FauxLogger()): + self._compress = compress or 1 + self._logger = logger + + def dump(self, oeb, path): + if hasattr(path, 'write'): + return self._dump_stream(oeb, path) + with open(path, 'w+b') as stream: + return self._dump_stream(oeb, stream) + + def _write(self, *data): + for datum in data: + self._stream.write(datum) + + @preserve + def _writeat(self, pos, *data): + self._stream.seek(pos) + self._write(*data) + + def _tell(self): + return self._stream.tell() + + def _dump_stream(self, oeb, stream): + self._oeb = oeb + self._stream = stream + self._records = [None] + self._generate_content() + self._generate_record0() + self._write_header() + self._write_content() + + def _generate_content(self): + self._map_image_names() + self._generate_text() + self._generate_images() + + def _map_image_names(self): + index = 0 + self._images = images = {} + for item in self._oeb.manifest.values(): + if item.media_type.startswith('image/'): + images[item.href] = index + index += 1 + + def _generate_text(self): + serializer = Serializer(self._oeb, self._images) + text = str(serializer) + self._text_length = len(text) + text = StringIO(text) + nrecords = 0 + data = text.read(0x1000) + while len(data) > 0: + nrecords += 1 + if self._compress == PALMDOC: + data = compress_doc(data) + # Without the NUL Mobipocket Desktop 6.2 will thrash. Why? + self._records.append(data + '\0') + data = text.read(0x1000) + self._text_nrecords = nrecords + + def _generate_images(self): + pass + + def _generate_record0(self): + exth = self._build_exth() + record0 = StringIO() + record0.write(pack('>HHIHHHH', self._compress, 0, self._text_length, + self._text_nrecords, 0x1000, 0, 0)) + uid = random.randint(0, 0xffffffff) + title = str(self._oeb.metadata.title[0]) + record0.write('MOBI') + record0.write(pack('>IIIII', 0xe8, 2, 65001, uid, 5)) + record0.write('\xff' * 40) + record0.write(pack('>I', self._text_nrecords + 1)) + record0.write(pack('>II', 0xe8 + 16 + len(exth), len(title))) + # TODO: Translate to language code + record0.write(pack('>I', 9)) + record0.write('\0' * 8) + record0.write(pack('>II', 5, self._text_nrecords + 1)) + record0.write('\0' * 16) + record0.write(pack('>I', 0x50)) + record0.write('\0' * 32) + record0.write(pack('>IIII', 0xffffffff, 0xffffffff, 0, 0)) + # TODO: What the hell are these fields? + record0.write(pack('>IIIIIIIIIIIIIIIII', + 0, 0, 0, 0xffffffff, 0, 0xffffffff, 0, 0xffffffff, 0, 0xffffffff, + 0, 0xffffffff, 0, 0xffffffff, 0xffffffff, 1, 0xffffffff)) + record0.write(exth) + npad = 4 - (record0.tell() % 4) + if npad < 4: record0.write('\0' * npad) + record0.write(title) + record0 = record0.getvalue() + self._records[0] = record0 + ('\0' * (2452 - len(record0))) + + def _build_exth(self): + oeb = self._oeb + exth = StringIO() + nrecs = 0 + for term in oeb.metadata: + if term not in EXTH_CODES: continue + code = EXTH_CODES[term] + for item in oeb.metadata[term]: + data = str(item) + exth.write(pack('>II', code, len(data) + 8)) + exth.write(data) + nrecs += 1 + exth = exth.getvalue() + return ''.join(['EXTH', pack('>II', len(exth) + 12, nrecs), exth]) + + def _write_header(self): + title = str(self._oeb.metadata.title[0]) + title = re.sub('[^-A-Za-z0-9]+', '_', title)[:32] + title = title + ('\0' * (32 - len(title))) + now = int(time.time()) + nrecords = len(self._records) + self._write(title, pack('>HHIIIIII', 0, 0, now, now, 0, 0, 0, 0), + 'BOOK', 'MOBI', pack('>IIH', nrecords, 0, nrecords)) + offset = self._tell() + (8 * nrecords) + 2 + for id, record in izip(count(), self._records): + self._write(pack('>I', offset), '\0', pack('>I', id)[1:]) + offset += len(record) + self._write('\0\0') + + def _write_content(self): + for record in self._records: + self._write(record) + + +def main(argv=sys.argv): + inpath, outpath = argv[1:] + oeb = OEBBook(inpath) + writer = MobiWriter() + writer.dump(oeb, outpath) + return 0 + +if __name__ == '__main__': + sys.exit(main()) From e61c667505319bf3af8fbab575b0e7165a5bd143 Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Mon, 29 Dec 2008 02:56:00 -0500 Subject: [PATCH 2/8] Link serialization. Basic image embedding. --- src/calibre/ebooks/lit/oeb.py | 26 +++++- src/calibre/ebooks/mobi/writer.py | 140 ++++++++++++++++++++++++------ 2 files changed, 137 insertions(+), 29 deletions(-) diff --git a/src/calibre/ebooks/lit/oeb.py b/src/calibre/ebooks/lit/oeb.py index acc3275876..39b22b7e7f 100644 --- a/src/calibre/ebooks/lit/oeb.py +++ b/src/calibre/ebooks/lit/oeb.py @@ -48,6 +48,8 @@ OEB_CSS_MIME = 'text/x-oeb1-css' OEB_STYLES = set([CSS_MIME, OEB_CSS_MIME, 'text/x-oeb-css']) OEB_DOCS = set([XHTML_MIME, 'text/html', OEB_DOC_MIME, 'text/x-oeb-document']) +MS_COVER_TYPE = 'other.ms-coverimage-standard' + def element(parent, *args, **kwargs): if parent is not None: @@ -153,7 +155,7 @@ class Metadata(object): % (barename(self.term), self.value, self.attrib) def __str__(self): - return str(self.value) + return self.value.encode('ascii', 'xmlcharrefreplace') def __unicode__(self): return unicode(self.value) @@ -687,6 +689,26 @@ class OEBBook(object): if self._toc_from_tour(opf): return if self._toc_from_html(opf): return self._toc_from_spine(opf) + + def _ensure_cover_image(self): + cover = None + if self.metadata.cover: + id = str(self.metadata.cover[0]) + cover = self.manifest[id] + elif MS_COVER_TYPE in self.guide: + href = self.guide[MS_COVER_TYPE].href + cover = self.manifest.hrefs[href] + elif 'cover' in self.guide: + href = self.guide['cover'].href + cover = self.manifest.hrefs[href] + else: + html = self.spine[0].data + imgs = xpath(html, '//h:img[position()=1]') + href = imgs[0].get('src') if imgs else None + cover = self.manifest.hrefs[href] if href else None + if cover: + if not self.metadata.cover: + self.metadata.add('cover', cover.id) def _all_from_opf(self, opf): self._metadata_from_opf(opf) @@ -694,6 +716,7 @@ class OEBBook(object): self._spine_from_opf(opf) self._guide_from_opf(opf) self._toc_from_opf(opf) + self._ensure_cover_image() def to_opf1(self): package = etree.Element('package', @@ -757,6 +780,7 @@ class OEBBook(object): NCX_MIME: (href, ncx)} + def main(argv=sys.argv): for arg in argv[1:]: oeb = OEBBook(arg) diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index b16fc8e2c4..614764718d 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -15,10 +15,12 @@ import random from cStringIO import StringIO import re from itertools import izip, count +from collections import defaultdict +from urlparse import urldefrag from lxml import etree from calibre.ebooks.mobi.palmdoc import compress_doc from calibre.ebooks.lit.oeb import XHTML, XHTML_NS, OEB_DOCS -from calibre.ebooks.lit.oeb import barename, namespace +from calibre.ebooks.lit.oeb import xpath, barename, namespace from calibre.ebooks.lit.oeb import FauxLogger, OEBBook MBP_NS = 'http://mobipocket.cam/ns/mbp' @@ -43,31 +45,105 @@ UNCOMPRESSED = 1 PALMDOC = 2 HUFFDIC = 17480 +COLLAPSE = re.compile(r'[ \t\r\n\v]+') + +def encode(data): + return COLLAPSE.sub(' ', data).encode('ascii', 'xmlcharrefreplace') + class Serializer(object): def __init__(self, oeb, images): self.oeb = oeb self.images = images - self.root = etree.Element(XHTML('html'), - nsmap={None: XHTML_NS, 'mbp': MBP_NS}) - self.generate_head() - self.generate_body() + self.id_offsets = {} + self.href_offsets = defaultdict(list) + buffer = self.buffer = StringIO() + buffer.write('') + self.serialize_head() + self.serialize_body() + buffer.write('') + self.fixup_links() + self.raw = buffer.getvalue() def __str__(self): - return etree.tostring(self.root) + return self.raw - def generate_head(self): - head = etree.SubElement(self.root, XHTML('head')) - - def generate_body(self): - body = etree.SubElement(self.root, XHTML('body')) - first = True + def serialize_head(self): + buffer = self.buffer + buffer.write('') + buffer.write('') + + def serialize_body(self): + buffer = self.buffer + buffer.write('') for item in self.oeb.spine: - if item.media_type not in OEB_DOCS: continue - for elem in item.data.find(XHTML('body')): - body.append(elem) - etree.SubElement(body, MBP('pagebreak')) + self.serialize_item(item) + buffer.write('') + def serialize_item(self, item): + buffer = self.buffer + buffer.write('') + # TODO: Figure out how to make the 'crossable' stuff work for + # non-"linear" spine items. + self.id_offsets[item.id + '_calibre_top'] = buffer.tell() + for elem in item.data.find(XHTML('body')): + self.serialize_elem(elem, item) + + def serialize_elem(self, elem, item): + ns = namespace(elem.tag) + if ns not in (XHTML_NS, MBP_NS): + return + buffer = self.buffer + hrefs = self.oeb.manifest.hrefs + tag = barename(elem.tag) + if ns == MBP_NS: tag = 'mbp:' + tag + for attr in ('name', 'id'): + if attr in elem.attrib: + id = '_'.join((item.id, elem.attrib[attr])) + self.id_offsets[id] = buffer.tell() + del elem.attrib[attr] + buffer.write('<') + buffer.write(tag) + if elem.attrib: + for attr, val in elem.attrib.items(): + buffer.write(' ') + if attr == 'href': + path, frag = urldefrag(val) + # TODO: Absolute path translation + if not path or path in hrefs: + id = hrefs[path].id if path else item.id + frag = frag if frag else 'calibre_top' + href = '_'.join((id, frag)) + buffer.write('filepos=') + self.href_offsets[href].append(buffer.tell()) + buffer.write('0000000000') + continue + elif attr == 'src' and val in hrefs: + index = self.images[val] + buffer.write('recindex="%05d"' % index) + continue + buffer.write('%s="%s"' % (attr, val)) + if not elem.text and len(elem) == 0: + buffer.write('/>') + return + buffer.write('>') + if elem.text: + buffer.write(encode(elem.text)) + for child in elem: + self.serialize_elem(child, item) + buffer.write('' % tag) + if elem.tail: + buffer.write(encode(elem.tail)) + + def fixup_links(self): + buffer = self.buffer + for id, hoffs in self.href_offsets.items(): + ioff = self.id_offsets[id] + for hoff in hoffs: + buffer.seek(hoff) + buffer.write('%010d' % ioff) + + def preserve(function): def wrapper(self, *args, **kwargs): opos = self._stream.tell() @@ -79,8 +155,8 @@ def preserve(function): return wrapper class MobiWriter(object): - def __init__(self, compress=PALMDOC, logger=FauxLogger()): - self._compress = compress or 1 + def __init__(self, compress=None, logger=FauxLogger()): + self._compress = compress or UNCOMPRESSED self._logger = logger def dump(self, oeb, path): @@ -93,11 +169,6 @@ class MobiWriter(object): for datum in data: self._stream.write(datum) - @preserve - def _writeat(self, pos, *data): - self._stream.seek(pos) - self._write(*data) - def _tell(self): return self._stream.tell() @@ -116,7 +187,7 @@ class MobiWriter(object): self._generate_images() def _map_image_names(self): - index = 0 + index = 1 self._images = images = {} for item in self._oeb.manifest.values(): if item.media_type.startswith('image/'): @@ -140,7 +211,13 @@ class MobiWriter(object): self._text_nrecords = nrecords def _generate_images(self): - pass + images = [(index, href) for href, index in self._images.items()] + images.sort() + for _, href in images: + item = self._oeb.manifest.hrefs[href] + data = item.data + # TODO: Re-size etc images + self._records.append(data) def _generate_record0(self): exth = self._build_exth() @@ -167,8 +244,6 @@ class MobiWriter(object): 0, 0, 0, 0xffffffff, 0, 0xffffffff, 0, 0xffffffff, 0, 0xffffffff, 0, 0xffffffff, 0, 0xffffffff, 0xffffffff, 1, 0xffffffff)) record0.write(exth) - npad = 4 - (record0.tell() % 4) - if npad < 4: record0.write('\0' * npad) record0.write(title) record0 = record0.getvalue() self._records[0] = record0 + ('\0' * (2452 - len(record0))) @@ -185,8 +260,17 @@ class MobiWriter(object): exth.write(pack('>II', code, len(data) + 8)) exth.write(data) nrecs += 1 + if oeb.metadata.cover: + id = str(oeb.metadata.cover[0]) + href = oeb.manifest[id].href + index = self._images[href] + self._text_nrecords - 1 + exth.write(pack('>III', 0xc9, 0x0c, index)) + nrecs += 1 + trail = exth.tell() % 4 + pad = '' if not trail else '\0' * (4 - trail) exth = exth.getvalue() - return ''.join(['EXTH', pack('>II', len(exth) + 12, nrecs), exth]) + exth = ['EXTH', pack('>II', len(exth) + 12, nrecs), exth, pad] + return ''.join(exth) def _write_header(self): title = str(self._oeb.metadata.title[0]) From b8373de13bbb417aa4c949207312b213cc92fe62 Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Mon, 29 Dec 2008 09:22:51 -0500 Subject: [PATCH 3/8] Implemented adding Mobipocket covers and cover thumbnails. Degrade image quality to fit Mobi size constraints. --- src/calibre/ebooks/lit/oeb.py | 63 ++++++++++++++++++++----------- src/calibre/ebooks/mobi/writer.py | 43 ++++++++++++++++++--- 2 files changed, 79 insertions(+), 27 deletions(-) diff --git a/src/calibre/ebooks/lit/oeb.py b/src/calibre/ebooks/lit/oeb.py index 39b22b7e7f..68b51695b2 100644 --- a/src/calibre/ebooks/lit/oeb.py +++ b/src/calibre/ebooks/lit/oeb.py @@ -115,7 +115,7 @@ class DirContainer(AbstractContainer): def exists(self, path): path = os.path.join(self.rootdir, path) - return os.path.isfile(path) + return os.path.isfile(urlunquote(path)) class Metadata(object): @@ -227,7 +227,8 @@ class Metadata(object): class Manifest(object): class Item(object): - def __init__(self, id, href, media_type, fallback=None, loader=str): + def __init__(self, id, href, media_type, + fallback=None, loader=str, data=None): self.id = id self.href = self.path = urlnormalize(href) self.media_type = media_type @@ -235,7 +236,7 @@ class Manifest(object): self.spine_position = None self.linear = True self._loader = loader - self._data = None + self._data = data def __repr__(self): return 'Item(id=%r, href=%r, media_type=%r)' \ @@ -243,10 +244,10 @@ class Manifest(object): def data(): def fget(self): - if self._data: + if self._data is not None: return self._data data = self._loader(self.href) - if self.media_type == XHTML_MIME: + if self.media_type in OEB_DOCS: data = etree.fromstring(data, parser=XML_PARSER) if namespace(data.tag) != XHTML_NS: data.attrib['xmlns'] = XHTML_NS @@ -255,6 +256,7 @@ class Manifest(object): elif self.media_type.startswith('application/') \ and self.media_type.endswith('+xml'): data = etree.fromstring(data, parser=XML_PARSER) + self._data = data return data def fset(self, value): self._data = value @@ -271,38 +273,56 @@ class Manifest(object): def __init__(self, oeb): self.oeb = oeb - self.items = {} + self.ids = {} self.hrefs = {} - def add(self, id, href, media_type, fallback=None): + def add(self, id, href, media_type, fallback=None, loader=None, data=None): + loader = loader or self.oeb.container.read item = self.Item( - id, href, media_type, fallback, self.oeb.container.read) - self.items[item.id] = item + id, href, media_type, fallback, loader, data) + self.ids[item.id] = item self.hrefs[item.href] = item return item - def remove(self, id): - href = self.items[id].href - del self.items[id] - del self.hrefs[href] + def remove(self, item): + if item in self.ids: + item = self.ids[item] + del self.ids[item.id] + del self.hrefs[item.href] + if item in self.oeb.spine: + self.oeb.spine.remove(item) + + def generate(self, id, href): + href = urlnormalize(href) + base = id + index = 1 + while id in self.ids: + id = base + str(index) + index += 1 + base, ext = os.path.splitext(href) + index = 1 + while href in self.hrefs: + href = base + str(index) + ext + index += 1 + return id, href def __iter__(self): - for id in self.items: + for id in self.ids: yield id def __getitem__(self, id): - return self.items[id] + return self.ids[id] def values(self): - for item in self.items.values(): + for item in self.ids.values(): yield item def items(self): - for id, item in self.refs.items(): - yield id, items + for id, item in self.ids.items(): + yield id, item def __contains__(self, key): - return key in self.items + return key in self.ids def to_opf1(self, parent=None): elem = element(parent, 'manifest') @@ -706,9 +726,8 @@ class OEBBook(object): imgs = xpath(html, '//h:img[position()=1]') href = imgs[0].get('src') if imgs else None cover = self.manifest.hrefs[href] if href else None - if cover: - if not self.metadata.cover: - self.metadata.add('cover', cover.id) + if cover and not self.metadata.cover: + self.metadata.add('cover', cover.id) def _all_from_opf(self, opf): self._metadata_from_opf(opf) diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index 614764718d..a382492e40 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -18,6 +18,7 @@ from itertools import izip, count from collections import defaultdict from urlparse import urldefrag from lxml import etree +from PIL import Image from calibre.ebooks.mobi.palmdoc import compress_doc from calibre.ebooks.lit.oeb import XHTML, XHTML_NS, OEB_DOCS from calibre.ebooks.lit.oeb import xpath, barename, namespace @@ -213,10 +214,21 @@ class MobiWriter(object): def _generate_images(self): images = [(index, href) for href, index in self._images.items()] images.sort() + metadata = self._oeb.metadata + coverid = metadata.cover[0] if metadata.cover else None for _, href in images: item = self._oeb.manifest.hrefs[href] data = item.data # TODO: Re-size etc images + image = Image.open(StringIO(item.data)) + maxsizek = 89 if coverid == item.id else 63 + maxsizeb = maxsizek * 1024 + for quality in xrange(95, -1, -1): + data = StringIO() + image.save(data, 'JPEG', quality=quality) + data = data.getvalue() + if len(data) <= maxsizeb: + break self._records.append(data) def _generate_record0(self): @@ -262,16 +274,37 @@ class MobiWriter(object): nrecs += 1 if oeb.metadata.cover: id = str(oeb.metadata.cover[0]) - href = oeb.manifest[id].href - index = self._images[href] + self._text_nrecords - 1 + item = oeb.manifest[id] + href = item.href + index = self._images[href] - 1 exth.write(pack('>III', 0xc9, 0x0c, index)) - nrecs += 1 - trail = exth.tell() % 4 - pad = '' if not trail else '\0' * (4 - trail) + exth.write(pack('>III', 0xcb, 0x0c, 0)) + index = self._add_thumbnail(item) - 1 + exth.write(pack('>III', 0xca, 0x0c, index)) + nrecs += 3 exth = exth.getvalue() + trail = len(exth) % 4 + pad = '' if not trail else '\0' * (4 - trail) exth = ['EXTH', pack('>II', len(exth) + 12, nrecs), exth, pad] return ''.join(exth) + def _add_thumbnail(self, item): + thumbnail = Image.open(StringIO(item.data)) + thumbnail.thumbnail((180, 240), Image.ANTIALIAS) + for quality in xrange(95, -1, -1): + data = StringIO() + thumbnail.save(data, 'JPEG', quality=quality) + data = data.getvalue() + if len(data) <= (1024 * 16): + break + manifest = self._oeb.manifest + id, href = manifest.generate('thumbnail', 'thumbnail.jpeg') + manifest.add(id, href, 'image/jpeg', data=data) + index = len(self._images) + 1 + self._images[href] = index + self._records.append(data) + return index + def _write_header(self): title = str(self._oeb.metadata.title[0]) title = re.sub('[^-A-Za-z0-9]+', '_', title)[:32] From 7ff96f540ffdc1ec16444a8b0e9a57fef73e673b Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Mon, 29 Dec 2008 10:27:27 -0500 Subject: [PATCH 4/8] Fix bug in HTML serialization. Remove whitespace collapse -- will be done in markup conversion. Add support to generate Mobipocket elements from OPF guide. --- src/calibre/ebooks/lit/oeb.py | 3 ++ src/calibre/ebooks/mobi/writer.py | 60 ++++++++++++++++++++----------- 2 files changed, 43 insertions(+), 20 deletions(-) diff --git a/src/calibre/ebooks/lit/oeb.py b/src/calibre/ebooks/lit/oeb.py index 68b51695b2..e8ae48bfa1 100644 --- a/src/calibre/ebooks/lit/oeb.py +++ b/src/calibre/ebooks/lit/oeb.py @@ -439,6 +439,9 @@ class Guide(object): def __contains__(self, key): return key in self.refs + def __len__(self): + return len(self.refs) + def to_opf1(self, parent=None): elem = element(parent, 'guide') for ref in self.refs.values(): diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index a382492e40..b273d1946b 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -46,10 +46,8 @@ UNCOMPRESSED = 1 PALMDOC = 2 HUFFDIC = 17480 -COLLAPSE = re.compile(r'[ \t\r\n\v]+') - def encode(data): - return COLLAPSE.sub(' ', data).encode('ascii', 'xmlcharrefreplace') + return data.encode('ascii', 'xmlcharrefreplace') class Serializer(object): @@ -72,7 +70,37 @@ class Serializer(object): def serialize_head(self): buffer = self.buffer buffer.write('') + if len(self.oeb.guide) > 0: + self.serialize_guide() buffer.write('') + + def serialize_guide(self): + buffer = self.buffer + buffer.write('') + for ref in self.oeb.guide.values(): + buffer.write('') + buffer.write('') + + def serialize_href(self, href, baseid=None): + hrefs = self.oeb.manifest.hrefs + path, frag = urldefrag(href) + # TODO: Absolute path translation + if path and path not in hrefs: + return False + buffer = self.buffer + item = hrefs[path] if path else None + if item and item.spine_position is None: + return False + id = item.id if item else baseid + frag = frag if frag else 'calibre_top' + href = '_'.join((id, frag)) + buffer.write('filepos=') + self.href_offsets[href].append(buffer.tell()) + buffer.write('0000000000') + return True def serialize_body(self): buffer = self.buffer @@ -109,30 +137,22 @@ class Serializer(object): for attr, val in elem.attrib.items(): buffer.write(' ') if attr == 'href': - path, frag = urldefrag(val) - # TODO: Absolute path translation - if not path or path in hrefs: - id = hrefs[path].id if path else item.id - frag = frag if frag else 'calibre_top' - href = '_'.join((id, frag)) - buffer.write('filepos=') - self.href_offsets[href].append(buffer.tell()) - buffer.write('0000000000') + if self.serialize_href(val, item.id): continue elif attr == 'src' and val in hrefs: index = self.images[val] buffer.write('recindex="%05d"' % index) continue buffer.write('%s="%s"' % (attr, val)) - if not elem.text and len(elem) == 0: + if elem.text or len(elem) > 0: + buffer.write('>') + if elem.text: + buffer.write(encode(elem.text)) + for child in elem: + self.serialize_elem(child, item) + buffer.write('' % tag) + else: buffer.write('/>') - return - buffer.write('>') - if elem.text: - buffer.write(encode(elem.text)) - for child in elem: - self.serialize_elem(child, item) - buffer.write('' % tag) if elem.tail: buffer.write(encode(elem.tail)) From c9124017af76fcb3cdb57bebd26a3d17b1e2ed17 Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Tue, 30 Dec 2008 00:39:54 -0500 Subject: [PATCH 5/8] Minor namespace clean-ups for mobi-generation. --- src/calibre/ebooks/lit/oeb.py | 6 ++++++ src/calibre/ebooks/lit/writer.py | 8 +------- src/calibre/ebooks/mobi/writer.py | 20 ++++++++++++-------- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/src/calibre/ebooks/lit/oeb.py b/src/calibre/ebooks/lit/oeb.py index e8ae48bfa1..432c35cde2 100644 --- a/src/calibre/ebooks/lit/oeb.py +++ b/src/calibre/ebooks/lit/oeb.py @@ -66,6 +66,12 @@ def barename(name): return name.split('}', 1)[1] return name +def prefixname(name, nsrmap): + prefix = nsrmap[namespace(name)] + if not prefix: + return barename(name) + return ':'.join((prefix, barename(name))) + def xpath(elem, expr): return elem.xpath(expr, namespaces=XPNSMAP) diff --git a/src/calibre/ebooks/lit/writer.py b/src/calibre/ebooks/lit/writer.py index 02981dac37..af11f04eb1 100644 --- a/src/calibre/ebooks/lit/writer.py +++ b/src/calibre/ebooks/lit/writer.py @@ -26,7 +26,7 @@ import calibre.ebooks.lit.maps as maps from calibre.ebooks.lit.oeb import OEB_DOCS, OEB_STYLES, OEB_CSS_MIME, \ CSS_MIME, OPF_MIME, XML_NS, XML from calibre.ebooks.lit.oeb import namespace, barename, urlnormalize, xpath -from calibre.ebooks.lit.oeb import FauxLogger, OEBBook +from calibre.ebooks.lit.oeb import prefixname, FauxLogger, OEBBook from calibre.ebooks.lit.stylizer import Stylizer from calibre.ebooks.lit.lzx import Compressor import calibre @@ -116,12 +116,6 @@ LZXC_CONTROL = \ COLLAPSE = re.compile(r'[ \t\r\n\v]+') -def prefixname(name, nsrmap): - prefix = nsrmap[namespace(name)] - if not prefix: - return barename(name) - return ':'.join((prefix, barename(name))) - def decint(value): bytes = [] while True: diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index b273d1946b..16bb9fca9d 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -20,11 +20,11 @@ from urlparse import urldefrag from lxml import etree from PIL import Image from calibre.ebooks.mobi.palmdoc import compress_doc -from calibre.ebooks.lit.oeb import XHTML, XHTML_NS, OEB_DOCS -from calibre.ebooks.lit.oeb import xpath, barename, namespace +from calibre.ebooks.lit.oeb import XML_NS, XHTML, XHTML_NS, OEB_DOCS +from calibre.ebooks.lit.oeb import xpath, barename, namespace, prefixname from calibre.ebooks.lit.oeb import FauxLogger, OEBBook -MBP_NS = 'http://mobipocket.cam/ns/mbp' +MBP_NS = 'http://mobipocket.com/ns/mbp' def MBP(name): return '{%s}%s' % (MBP_NS, name) EXTH_CODES = { @@ -50,7 +50,10 @@ def encode(data): return data.encode('ascii', 'xmlcharrefreplace') + class Serializer(object): + NSRMAP = {'': None, XML_NS: 'xml', XHTML_NS: '', MBP_NS: 'mbp'} + def __init__(self, oeb, images): self.oeb = oeb self.images = images @@ -118,14 +121,12 @@ class Serializer(object): for elem in item.data.find(XHTML('body')): self.serialize_elem(elem, item) - def serialize_elem(self, elem, item): - ns = namespace(elem.tag) - if ns not in (XHTML_NS, MBP_NS): + def serialize_elem(self, elem, item, nsrmap=NSRMAP): + if namespace(elem.tag) not in nsrmap: return buffer = self.buffer hrefs = self.oeb.manifest.hrefs - tag = barename(elem.tag) - if ns == MBP_NS: tag = 'mbp:' + tag + tag = prefixname(elem.tag, nsrmap) for attr in ('name', 'id'): if attr in elem.attrib: id = '_'.join((item.id, elem.attrib[attr])) @@ -135,6 +136,9 @@ class Serializer(object): buffer.write(tag) if elem.attrib: for attr, val in elem.attrib.items(): + if namespace(attr) not in nsrmap: + continue + attr = prefixname(attr, nsrmap) buffer.write(' ') if attr == 'href': if self.serialize_href(val, item.id): From 4bae0a44fdcd0dd13b226667f1a2d20e6101f596 Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Tue, 30 Dec 2008 09:50:52 -0500 Subject: [PATCH 6/8] Shore-up image rescaling. --- src/calibre/ebooks/mobi/writer.py | 49 ++++++++++++++----------------- 1 file changed, 22 insertions(+), 27 deletions(-) diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index 16bb9fca9d..43c4aaa52d 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -169,16 +169,6 @@ class Serializer(object): buffer.write('%010d' % ioff) -def preserve(function): - def wrapper(self, *args, **kwargs): - opos = self._stream.tell() - try: - return function(self, *args, **kwargs) - finally: - self._stream.seek(opos) - functools.update_wrapper(wrapper, function) - return wrapper - class MobiWriter(object): def __init__(self, compress=None, logger=FauxLogger()): self._compress = compress or UNCOMPRESSED @@ -235,6 +225,24 @@ class MobiWriter(object): data = text.read(0x1000) self._text_nrecords = nrecords + def _rescale_image(self, data, maxsizeb, dimen=None): + if dimen is not None: + image = Image.open(StringIO(data)) + image.thumbnail(dimen, Image.ANTIALIAS) + data = StringIO() + image.save(data, image.format) + data = data.getvalue() + if len(data) < maxsizeb: + return data + image = Image.open(StringIO(data)) + for quality in xrange(95, -1, -1): + data = StringIO() + image.save(data, 'JPEG', quality=quality) + data = data.getvalue() + if len(data) <= maxsizeb: + break + return data + def _generate_images(self): images = [(index, href) for href, index in self._images.items()] images.sort() @@ -242,17 +250,9 @@ class MobiWriter(object): coverid = metadata.cover[0] if metadata.cover else None for _, href in images: item = self._oeb.manifest.hrefs[href] - data = item.data - # TODO: Re-size etc images - image = Image.open(StringIO(item.data)) maxsizek = 89 if coverid == item.id else 63 maxsizeb = maxsizek * 1024 - for quality in xrange(95, -1, -1): - data = StringIO() - image.save(data, 'JPEG', quality=quality) - data = data.getvalue() - if len(data) <= maxsizeb: - break + data = self._rescale_image(item.data, maxsizeb) self._records.append(data) def _generate_record0(self): @@ -313,14 +313,9 @@ class MobiWriter(object): return ''.join(exth) def _add_thumbnail(self, item): - thumbnail = Image.open(StringIO(item.data)) - thumbnail.thumbnail((180, 240), Image.ANTIALIAS) - for quality in xrange(95, -1, -1): - data = StringIO() - thumbnail.save(data, 'JPEG', quality=quality) - data = data.getvalue() - if len(data) <= (1024 * 16): - break + maxsizeb = 16 * 1024 + dimen = (180, 240) + data = self._rescale_image(item.data, maxsizeb, dimen) manifest = self._oeb.manifest id, href = manifest.generate('thumbnail', 'thumbnail.jpeg') manifest.add(id, href, 'image/jpeg', data=data) From 0182685b766307f90f978ca0d7cfd5b5d4c9aa4d Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Tue, 30 Dec 2008 19:39:16 -0500 Subject: [PATCH 7/8] Extra OEB validity checks. "Absolute" URI generation for manifest items. --- src/calibre/ebooks/lit/oeb.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/lit/oeb.py b/src/calibre/ebooks/lit/oeb.py index 432c35cde2..42b54561d1 100644 --- a/src/calibre/ebooks/lit/oeb.py +++ b/src/calibre/ebooks/lit/oeb.py @@ -128,6 +128,7 @@ class Metadata(object): TERMS = set(['contributor', 'coverage', 'creator', 'date', 'description', 'format', 'identifier', 'language', 'publisher', 'relation', 'rights', 'source', 'subject', 'title', 'type']) + ATTRS = set(['role', 'file-as', 'scheme']) OPF1_NSMAP = {'dc': DC11_NS, 'oebpackage': OPF1_NS} OPF2_NSMAP = {'opf': OPF2_NS, 'dc': DC11_NS, 'dcterms': DCTERMS_NS, 'xsi': XSI_NS} @@ -144,7 +145,12 @@ class Metadata(object): self.value = value self.attrib = attrib = {} for fq_attr in fq_attrib: - attr = barename(fq_attr) + if fq_attr in Metadata.ATTRS: + attr = fq_attr + fq_attr = OPF2(fq_attr) + fq_attrib[fq_attr] = fq_attrib.pop(attr) + else: + attr = barename(fq_attr) attrib[attr] = fq_attrib[fq_attr] def __getattr__(self, name): @@ -161,7 +167,7 @@ class Metadata(object): % (barename(self.term), self.value, self.attrib) def __str__(self): - return self.value.encode('ascii', 'xmlcharrefreplace') + return unicode(self.value).encode('ascii', 'xmlcharrefreplace') def __unicode__(self): return unicode(self.value) @@ -276,6 +282,14 @@ class Manifest(object): if result != 0: return result return cmp(self.id, other.id) + + def abshref(self, href): + if '/' not in self.href: + return href + dirname = os.path.dirname(self.href) + href = os.path.join(dirname, href) + href = os.path.normpath(href).replace('\\', '/') + return href def __init__(self, oeb): self.oeb = oeb @@ -581,6 +595,15 @@ class OEBBook(object): else: self.logger.log_warn(u'Unique-identifier %r not found.' % uid) self.uid = metadata.identifier[0] + if not metadata.language: + self.logger.log_warn(u'Language not specified.') + metadata.add('language', 'en') + if not metadata.creator: + self.logger.log_warn(u'Creator not specified.') + metadata.add('creator', 'Unknown') + if not metadata.title: + self.logger.log_warn(u'Title not specified.') + metadata.add('title', 'Unknown') def _manifest_from_opf(self, opf): self.manifest = manifest = Manifest(self) From b899f3084c4db74851124187ff0ae2f24c78febc Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Tue, 30 Dec 2008 19:39:52 -0500 Subject: [PATCH 8/8] Mobipocket generation: - Produce correct Mobi language codes. - Properly interpret relative @href attributes. --- src/calibre/ebooks/mobi/langcodes.py | 169 ++++++++++++++++++++++++++- src/calibre/ebooks/mobi/writer.py | 22 ++-- 2 files changed, 179 insertions(+), 12 deletions(-) diff --git a/src/calibre/ebooks/mobi/langcodes.py b/src/calibre/ebooks/mobi/langcodes.py index c0884e55a8..17fbd7fe3e 100644 --- a/src/calibre/ebooks/mobi/langcodes.py +++ b/src/calibre/ebooks/mobi/langcodes.py @@ -3,6 +3,8 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __docformat__ = 'restructuredtext en' +from struct import pack + main_language = { 0 : "NEUTRAL", 54 : "AFRIKAANS", @@ -155,5 +157,168 @@ sub_language = { 2 : "SWEDISH_FINLAND", 1 : "UZBEK_LATIN", 2 : "UZBEK_CYRILLIC", - - } \ No newline at end of file + } + +IANA_MOBI = \ + {None: {None: (0, 0)}, + 'af': {None: (54, 0)}, + 'ar': {None: (1, 0), + 'AE': (1, 56), + 'BH': (1, 60), + 'DZ': (1, 20), + 'EG': (1, 12), + 'JO': (1, 44), + 'KW': (1, 52), + 'LB': (1, 48), + 'MA': (1, 24), + 'OM': (1, 32), + 'QA': (1, 64), + 'SA': (1, 4), + 'SY': (1, 40), + 'TN': (1, 28), + 'YE': (1, 36)}, + 'as': {None: (77, 0)}, + 'az': {None: (44, 0)}, + 'be': {None: (35, 0)}, + 'bg': {None: (2, 0)}, + 'bn': {None: (69, 0)}, + 'ca': {None: (3, 0)}, + 'cs': {None: (5, 0)}, + 'da': {None: (6, 0)}, + 'de': {None: (7, 0), + 'AT': (7, 12), + 'CH': (7, 8), + 'LI': (7, 20), + 'LU': (7, 16)}, + 'el': {None: (8, 0)}, + 'en': {None: (9, 0), + 'AU': (9, 12), + 'BZ': (9, 40), + 'CA': (9, 16), + 'GB': (9, 8), + 'IE': (9, 24), + 'JM': (9, 32), + 'NZ': (9, 20), + 'PH': (9, 52), + 'TT': (9, 44), + 'US': (9, 4), + 'ZA': (9, 28), + 'ZW': (9, 48)}, + 'es': {None: (10, 0), + 'AR': (10, 44), + 'BO': (10, 64), + 'CL': (10, 52), + 'CO': (10, 36), + 'CR': (10, 20), + 'DO': (10, 28), + 'EC': (10, 48), + 'ES': (10, 4), + 'GT': (10, 16), + 'HN': (10, 72), + 'MX': (10, 8), + 'NI': (10, 76), + 'PA': (10, 24), + 'PE': (10, 40), + 'PR': (10, 80), + 'PY': (10, 60), + 'SV': (10, 68), + 'UY': (10, 56), + 'VE': (10, 32)}, + 'et': {None: (37, 0)}, + 'eu': {None: (45, 0)}, + 'fa': {None: (41, 0)}, + 'fi': {None: (11, 0)}, + 'fo': {None: (56, 0)}, + 'fr': {None: (12, 0), + 'BE': (12, 8), + 'CA': (12, 12), + 'CH': (12, 16), + 'FR': (12, 4), + 'LU': (12, 20), + 'MC': (12, 24)}, + 'gu': {None: (71, 0)}, + 'he': {None: (13, 0)}, + 'hi': {None: (57, 0)}, + 'hr': {None: (26, 0)}, + 'hu': {None: (14, 0)}, + 'hy': {None: (43, 0)}, + 'id': {None: (33, 0)}, + 'is': {None: (15, 0)}, + 'it': {None: (16, 0), + 'CH': (16, 8), + 'IT': (16, 4)}, + 'ja': {None: (17, 0)}, + 'ka': {None: (55, 0)}, + 'kk': {None: (63, 0)}, + 'kn': {None: (75, 0)}, + 'ko': {None: (18, 0)}, + 'kok': {None: (87, 0)}, + 'lt': {None: (39, 0)}, + 'lv': {None: (38, 0)}, + 'mk': {None: (47, 0)}, + 'ml': {None: (76, 0)}, + 'mr': {None: (78, 0)}, + 'ms': {None: (62, 0)}, + 'mt': {None: (58, 0)}, + 'ne': {None: (97, 0)}, + 'nl': {None: (19, 0), + 'BE': (19, 8)}, + 'no': {None: (20, 0)}, + 'or': {None: (72, 0)}, + 'pa': {None: (70, 0)}, + 'pl': {None: (21, 0)}, + 'pt': {None: (22, 0), + 'BR': (22, 4), + 'PT': (22, 8)}, + 'rm': {None: (23, 0)}, + 'ro': {None: (24, 0)}, + 'ru': {None: (25, 0)}, + 'sa': {None: (79, 0)}, + 'se': {None: (59, 0)}, + 'sk': {None: (27, 0)}, + 'sl': {None: (36, 0)}, + 'sq': {None: (28, 0)}, + 'sr': {None: (26, 12), + 'RS': (26, 12)}, + 'st': {None: (48, 0)}, + 'sv': {None: (29, 0), + 'FI': (29, 8)}, + 'sw': {None: (65, 0)}, + 'ta': {None: (73, 0)}, + 'te': {None: (74, 0)}, + 'th': {None: (30, 0)}, + 'tn': {None: (50, 0)}, + 'tr': {None: (31, 0)}, + 'ts': {None: (49, 0)}, + 'tt': {None: (68, 0)}, + 'uk': {None: (34, 0)}, + 'ur': {None: (32, 0)}, + 'uz': {None: (67, 0), + 'UZ': (67, 8)}, + 'vi': {None: (42, 0)}, + 'wen': {None: (46, 0)}, + 'xh': {None: (52, 0)}, + 'zh': {None: (4, 0), + 'CN': (4, 8), + 'HK': (4, 12), + 'SG': (4, 16), + 'TW': (4, 4)}, + 'zu': {None: (53, 0)}} + +def iana2mobi(self, icode): + subtags = list(code.split('-')) + langdict = IANA_MOBI[None] + while len(subtags) > 0: + lang = subtags.pop(0).lower() + if lang in IANA_MOBI: + langdict = IANA_MOBI[lang] + break + mcode = langdict[None] + while len(subtags) > 0: + subtag = subtags.pop(0) + if subtag not in langdict: + subtag = subtag.upper() + if subtag in langdict: + mcode = langdict[subtag] + break + return pack('>HBB', 0, mcode[1], mcode[0]) diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index 43c4aaa52d..4986790828 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -20,6 +20,7 @@ from urlparse import urldefrag from lxml import etree from PIL import Image from calibre.ebooks.mobi.palmdoc import compress_doc +from calibre.ebooks.mobi.langcodes import iana2mobi from calibre.ebooks.lit.oeb import XML_NS, XHTML, XHTML_NS, OEB_DOCS from calibre.ebooks.lit.oeb import xpath, barename, namespace, prefixname from calibre.ebooks.lit.oeb import FauxLogger, OEBBook @@ -87,19 +88,20 @@ class Serializer(object): buffer.write('/>') buffer.write('') - def serialize_href(self, href, baseid=None): + def serialize_href(self, href, base=None): hrefs = self.oeb.manifest.hrefs path, frag = urldefrag(href) - # TODO: Absolute path translation + if path and base: + path = base.abshref(path) if path and path not in hrefs: return False buffer = self.buffer item = hrefs[path] if path else None if item and item.spine_position is None: return False - id = item.id if item else baseid + id = item.id if item else base.id frag = frag if frag else 'calibre_top' - href = '_'.join((id, frag)) + href = '#'.join((id, frag)) buffer.write('filepos=') self.href_offsets[href].append(buffer.tell()) buffer.write('0000000000') @@ -117,7 +119,7 @@ class Serializer(object): buffer.write('') # TODO: Figure out how to make the 'crossable' stuff work for # non-"linear" spine items. - self.id_offsets[item.id + '_calibre_top'] = buffer.tell() + self.id_offsets[item.id + '#calibre_top'] = buffer.tell() for elem in item.data.find(XHTML('body')): self.serialize_elem(elem, item) @@ -129,7 +131,7 @@ class Serializer(object): tag = prefixname(elem.tag, nsrmap) for attr in ('name', 'id'): if attr in elem.attrib: - id = '_'.join((item.id, elem.attrib[attr])) + id = '#'.join((item.id, elem.attrib[attr])) self.id_offsets[id] = buffer.tell() del elem.attrib[attr] buffer.write('<') @@ -141,7 +143,7 @@ class Serializer(object): attr = prefixname(attr, nsrmap) buffer.write(' ') if attr == 'href': - if self.serialize_href(val, item.id): + if self.serialize_href(val, item): continue elif attr == 'src' and val in hrefs: index = self.images[val] @@ -256,19 +258,19 @@ class MobiWriter(object): self._records.append(data) def _generate_record0(self): + metadata = self._oeb.metadata exth = self._build_exth() record0 = StringIO() record0.write(pack('>HHIHHHH', self._compress, 0, self._text_length, self._text_nrecords, 0x1000, 0, 0)) uid = random.randint(0, 0xffffffff) - title = str(self._oeb.metadata.title[0]) + title = str(metadata.title[0]) record0.write('MOBI') record0.write(pack('>IIIII', 0xe8, 2, 65001, uid, 5)) record0.write('\xff' * 40) record0.write(pack('>I', self._text_nrecords + 1)) record0.write(pack('>II', 0xe8 + 16 + len(exth), len(title))) - # TODO: Translate to language code - record0.write(pack('>I', 9)) + record0.write(iana2mobi(str(metadata.language[0]))) record0.write('\0' * 8) record0.write(pack('>II', 5, self._text_nrecords + 1)) record0.write('\0' * 16)