From a0cbbc1cbee08efa0dd1f963f2ec7c66749a83df Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Sun, 25 Jan 2009 13:35:18 -0500 Subject: [PATCH] Implement Mobipocket "set_metadata()" function. --- src/calibre/ebooks/metadata/mobi.py | 224 ++++++++++++++++++++++++++-- src/calibre/ebooks/mobi/writer.py | 92 ++++++------ 2 files changed, 259 insertions(+), 57 deletions(-) diff --git a/src/calibre/ebooks/metadata/mobi.py b/src/calibre/ebooks/metadata/mobi.py index 933cbbdaed..3b506761b6 100644 --- a/src/calibre/ebooks/metadata/mobi.py +++ b/src/calibre/ebooks/metadata/mobi.py @@ -1,23 +1,225 @@ -#!/usr/bin/env python +''' +Retrieve and modify in-place Mobipocket book metadata. +''' + +from __future__ import with_statement + __license__ = 'GPL v3' -__copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net' +__copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net and ' \ + 'Marshall T. Vandegrift ' __docformat__ = 'restructuredtext en' -''' -''' - -import sys, os - +import sys +import os +from struct import pack, unpack +from cStringIO import StringIO +from calibre.ebooks.metadata import get_parser, MetaInformation +from calibre.ebooks.mobi import MobiError from calibre.ebooks.mobi.reader import get_metadata +from calibre.ebooks.mobi.writer import rescale_image, MAX_THUMB_DIMEN +from calibre.ebooks.mobi.langcodes import iana2mobi + +class StreamSlicer(object): + def __init__(self, stream, start=0, stop=None): + self._stream = stream + self.start = start + if stop is None: + stream.seek(0, 2) + stop = stream.tell() + self.stop = stop + self._len = stop - start + + def __len__(self): + return self._len + + def __getitem__(self, key): + stream = self._stream + base = self.start + if isinstance(key, (int, long)): + stream.seek(base + key) + return stream.read(1) + if isinstance(key, slice): + start, stop, stride = key.indices(self._len) + if stride < 0: + start, stop = stop, start + size = stop - start + if size <= 0: + return "" + stream.seek(base + start) + data = stream.read(size) + if stride != 1: + data = data[::stride] + return data + raise TypeError("stream indices must be integers") + + def __setitem__(self, key, value): + stream = self._stream + base = self.start + if isinstance(key, (int, long)): + if len(value) != 1: + raise ValueError("key and value lengths must match") + stream.seek(base + key) + return stream.write(value) + if isinstance(key, slice): + start, stop, stride = key.indices(self._len) + if stride < 0: + start, stop = stop, start + size = stop - start + if stride != 1: + value = value[::stride] + if len(value) != size: + raise ValueError("key and value lengths must match") + stream.seek(base + start) + return stream.write(value) + raise TypeError("stream indices must be integers") + + +class MetadataUpdater(object): + def __init__(self, stream): + self.stream = stream + data = self.data = StreamSlicer(stream) + type = self.type = data[60:68] + self.nrecs, = unpack('>H', data[76:78]) + record0 = self.record0 = self.record(0) + codepage, = unpack('>I', record0[28:32]) + self.codec = 'utf-8' if codepage == 65001 else 'cp1252' + image_base, = unpack('>I', record0[108:112]) + flags, = unpack('>I', record0[128:132]) + have_exth = self.have_exth = (flags & 0x40) != 0 + if not have_exth: + return + self.cover_record = self.thumbnail_record = None + exth_off = unpack('>I', record0[20:24])[0] + 16 + record0.start + exth = self.exth = StreamSlicer(stream, exth_off, record0.stop) + nitems, = unpack('>I', exth[8:12]) + pos = 12 + for i in xrange(nitems): + id, size = unpack('>II', exth[pos:pos + 8]) + content = exth[pos + 8: pos + size] + pos += size + if id == 201: + rindex, = self.cover_rindex, = unpack('>I', content) + self.cover_record = self.record(rindex + image_base) + elif id == 202: + rindex, = self.thumbnail_rindex, = unpack('>I', content) + self.thumbnail_record = self.record(rindex + image_base) + + def record(self, n): + if n >= self.nrecs: + raise ValueError('non-existent record %r' % n) + offoff = 78 + (8 * n) + start, = unpack('>I', self.data[offoff + 0:offoff + 4]) + stop = None + if n < (self.nrecs - 1): + stop, = unpack('>I', self.data[offoff + 8:offoff + 12]) + return StreamSlicer(self.stream, start, stop) + + def update(self, mi): + recs = [] + if mi.authors: + authors = '; '.join(mi.authors) + recs.append((100, authors.encode(self.codec, 'replace'))) + if mi.publisher: + recs.append((101, mi.publisher.encode(self.codec, 'replace'))) + if mi.comments: + recs.append((103, mi.comments.encode(self.codec, 'replace'))) + if mi.isbn: + recs.append((104, mi.isbn.encode(self.codec, 'replace'))) + if mi.tags: + subjects = '; '.join(mi.tags) + recs.append((105, subjects.encode(self.codec, 'replace'))) + if self.cover_record is not None: + recs.append((201, pack('>I', self.cover_rindex))) + recs.append((203, pack('>I', 0))) + if self.thumbnail_record is not None: + recs.append((202, pack('>I', self.thumbnail_rindex))) + exth = StringIO() + for code, data in recs: + exth.write(pack('>II', code, len(data) + 8)) + exth.write(data) + exth = exth.getvalue() + trail = len(exth) % 4 + pad = '\0' * (4 - trail) # Always pad w/ at least 1 byte + exth = ['EXTH', pack('>II', len(exth) + 12, len(recs)), exth, pad] + exth = ''.join(exth) + title = (mi.title or _('Unknown')).encode(self.codec, 'replace') + title_off = (self.exth.start - self.record0.start) + len(exth) + title_len = len(title) + trail = len(self.exth) - len(exth) - len(title) + if trail < 0: + raise MobiError("Insufficient space to update metadata") + self.exth[:] = ''.join([exth, title, '\0' * trail]) + self.record0[84:92] = pack('>II', title_off, title_len) + self.record0[92:96] = iana2mobi(mi.language) + if mi.cover_data[1]: + data = mi.cover_data[1] + if self.cover_record is not None: + size = len(self.cover_record) + cover = rescale_image(data, size) + cover += '\0' * (size - len(cover)) + self.cover_record[:] = cover + if self.thumbnail_record is not None: + size = len(self.thumbnail_record) + thumbnail = rescale_image(data, size, dimen=MAX_THUMB_DIMEN) + thumbnail += '\0' * (size - len(thumbnail)) + self.thumbnail_record[:] = thumbnail + return + +def set_metadata(stream, mi): + mu = MetadataUpdater(stream) + mu.update(mi) + return + + +def option_parser(): + parser = get_parser('mobi') + parser.remove_option('--category') + parser.add_option('--tags', default=None, + help=_('Set the subject tags')) + parser.add_option('--language', default=None, + help=_('Set the language')) + parser.add_option('--publisher', default=None, + help=_('Set the publisher')) + parser.add_option('--isbn', default=None, + help=_('Set the ISBN')) + return parser def main(args=sys.argv): + parser = option_parser() + opts, args = parser.parse_args(args) if len(args) != 2: + parser.print_help() print >>sys.stderr, 'Usage: %s file.mobi' % args[0] return 1 fname = args[1] - mi = get_metadata(open(fname, 'rb')) - print unicode(mi) - if mi.cover_data[1]: + changed = False + with open(fname, 'r+b') as stream: + mi = get_metadata(stream) + if opts.title: + mi.title = opts.title + changed = True + if opts.authors: + mi.authors = opts.authors.split(',') + changed = True + if opts.comment: + mi.comments = opts.comment + changed = True + if opts.tags is not None: + mi.tags = opts.tags.split(',') + changed = True + if opts.language is not None: + mi.language = opts.language + changed = True + if opts.publisher is not None: + mi.publisher = opts.publisher + changed = True + if opts.isbn is not None: + mi.isbn = opts.isbn + changed = True + if changed: + set_metadata(stream, mi) + print unicode(get_metadata(stream)) + if not changed and mi.cover_data[1]: cover = os.path.abspath( '.'.join((os.path.splitext(os.path.basename(fname))[0], mi.cover_data[0].lower()))) @@ -26,4 +228,4 @@ def main(args=sys.argv): return 0 if __name__ == '__main__': - sys.exit(main()) \ No newline at end of file + sys.exit(main()) diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index 39c77eace5..f1810d2f28 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -87,6 +87,49 @@ def decint(value, direction): bytes[-1] |= 0x80 return ''.join(chr(b) for b in reversed(bytes)) +def rescale_image(data, maxsizeb, dimen=None): + image = Image.open(StringIO(data)) + format = image.format + changed = False + if image.format not in ('JPEG', 'GIF'): + width, height = image.size + area = width * height + if area <= 40000: + format = 'GIF' + else: + image = image.convert('RGBA') + format = 'JPEG' + changed = True + if dimen is not None: + image.thumbnail(dimen, Image.ANTIALIAS) + changed = True + if changed: + data = StringIO() + image.save(data, format) + data = data.getvalue() + if len(data) <= maxsizeb: + return data + image = image.convert('RGBA') + for quality in xrange(95, -1, -1): + data = StringIO() + image.save(data, 'JPEG', quality=quality) + data = data.getvalue() + if len(data) <= maxsizeb: + return data + width, height = image.size + for scale in xrange(99, 0, -1): + scale = scale / 100. + data = StringIO() + scaled = image.copy() + size = (int(width * scale), (height * scale)) + scaled.thumbnail(size, Image.ANTIALIAS) + scaled.save(data, 'JPEG', quality=0) + data = data.getvalue() + if len(data) <= maxsizeb: + return data + # Well, we tried? + return data + class Serializer(object): NSRMAP = {'': None, XML_NS: 'xml', XHTML_NS: '', MBP_NS: 'mbp'} @@ -355,50 +398,7 @@ class MobiWriter(object): offset += RECORD_SIZE data, overlap = self._read_text_record(text) self._text_nrecords = nrecords - - def _rescale_image(self, data, maxsizeb, dimen=None): - image = Image.open(StringIO(data)) - format = image.format - changed = False - if image.format not in ('JPEG', 'GIF'): - width, height = image.size - area = width * height - if area <= 40000: - format = 'GIF' - else: - image = image.convert('RGBA') - format = 'JPEG' - changed = True - if dimen is not None: - image.thumbnail(dimen, Image.ANTIALIAS) - changed = True - if changed: - data = StringIO() - image.save(data, format) - data = data.getvalue() - if len(data) <= maxsizeb: - return data - image = image.convert('RGBA') - for quality in xrange(95, -1, -1): - data = StringIO() - image.save(data, 'JPEG', quality=quality) - data = data.getvalue() - if len(data) <= maxsizeb: - return data - width, height = image.size - for scale in xrange(99, 0, -1): - scale = scale / 100. - data = StringIO() - scaled = image.copy() - size = (int(width * scale), (height * scale)) - scaled.thumbnail(size, Image.ANTIALIAS) - scaled.save(data, 'JPEG', quality=0) - data = data.getvalue() - if len(data) <= maxsizeb: - return data - # Well, we tried? - return data - + def _generate_images(self): self._oeb.logger.info('Serializing images...') images = [(index, href) for href, index in self._images.items()] @@ -407,7 +407,7 @@ class MobiWriter(object): coverid = metadata.cover[0] if metadata.cover else None for _, href in images: item = self._oeb.manifest.hrefs[href] - data = self._rescale_image(item.data, self._imagemax) + data = rescale_image(item.data, self._imagemax) self._records.append(data) def _generate_record0(self): @@ -480,7 +480,7 @@ class MobiWriter(object): return ''.join(exth) def _add_thumbnail(self, item): - data = self._rescale_image(item.data, MAX_THUMB_SIZE, MAX_THUMB_DIMEN) + data = rescale_image(item.data, MAX_THUMB_SIZE, MAX_THUMB_DIMEN) manifest = self._oeb.manifest id, href = manifest.generate('thumbnail', 'thumbnail.jpeg') manifest.add(id, href, 'image/jpeg', data=data)