Implement Mobipocket "set_metadata()" function.

This commit is contained in:
Marshall T. Vandegrift 2009-01-25 13:35:18 -05:00
parent 2bf6c6ed6a
commit a0cbbc1cbe
2 changed files with 259 additions and 57 deletions

View File

@ -1,23 +1,225 @@
#!/usr/bin/env python
'''
Retrieve and modify in-place Mobipocket book metadata.
'''
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net'
__copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net and ' \
'Marshall T. Vandegrift <llasram@gmail.com>'
__docformat__ = 'restructuredtext en'
'''
'''
import sys, os
import sys
import os
from struct import pack, unpack
from cStringIO import StringIO
from calibre.ebooks.metadata import get_parser, MetaInformation
from calibre.ebooks.mobi import MobiError
from calibre.ebooks.mobi.reader import get_metadata
from calibre.ebooks.mobi.writer import rescale_image, MAX_THUMB_DIMEN
from calibre.ebooks.mobi.langcodes import iana2mobi
class StreamSlicer(object):
def __init__(self, stream, start=0, stop=None):
self._stream = stream
self.start = start
if stop is None:
stream.seek(0, 2)
stop = stream.tell()
self.stop = stop
self._len = stop - start
def __len__(self):
return self._len
def __getitem__(self, key):
stream = self._stream
base = self.start
if isinstance(key, (int, long)):
stream.seek(base + key)
return stream.read(1)
if isinstance(key, slice):
start, stop, stride = key.indices(self._len)
if stride < 0:
start, stop = stop, start
size = stop - start
if size <= 0:
return ""
stream.seek(base + start)
data = stream.read(size)
if stride != 1:
data = data[::stride]
return data
raise TypeError("stream indices must be integers")
def __setitem__(self, key, value):
stream = self._stream
base = self.start
if isinstance(key, (int, long)):
if len(value) != 1:
raise ValueError("key and value lengths must match")
stream.seek(base + key)
return stream.write(value)
if isinstance(key, slice):
start, stop, stride = key.indices(self._len)
if stride < 0:
start, stop = stop, start
size = stop - start
if stride != 1:
value = value[::stride]
if len(value) != size:
raise ValueError("key and value lengths must match")
stream.seek(base + start)
return stream.write(value)
raise TypeError("stream indices must be integers")
class MetadataUpdater(object):
def __init__(self, stream):
self.stream = stream
data = self.data = StreamSlicer(stream)
type = self.type = data[60:68]
self.nrecs, = unpack('>H', data[76:78])
record0 = self.record0 = self.record(0)
codepage, = unpack('>I', record0[28:32])
self.codec = 'utf-8' if codepage == 65001 else 'cp1252'
image_base, = unpack('>I', record0[108:112])
flags, = unpack('>I', record0[128:132])
have_exth = self.have_exth = (flags & 0x40) != 0
if not have_exth:
return
self.cover_record = self.thumbnail_record = None
exth_off = unpack('>I', record0[20:24])[0] + 16 + record0.start
exth = self.exth = StreamSlicer(stream, exth_off, record0.stop)
nitems, = unpack('>I', exth[8:12])
pos = 12
for i in xrange(nitems):
id, size = unpack('>II', exth[pos:pos + 8])
content = exth[pos + 8: pos + size]
pos += size
if id == 201:
rindex, = self.cover_rindex, = unpack('>I', content)
self.cover_record = self.record(rindex + image_base)
elif id == 202:
rindex, = self.thumbnail_rindex, = unpack('>I', content)
self.thumbnail_record = self.record(rindex + image_base)
def record(self, n):
if n >= self.nrecs:
raise ValueError('non-existent record %r' % n)
offoff = 78 + (8 * n)
start, = unpack('>I', self.data[offoff + 0:offoff + 4])
stop = None
if n < (self.nrecs - 1):
stop, = unpack('>I', self.data[offoff + 8:offoff + 12])
return StreamSlicer(self.stream, start, stop)
def update(self, mi):
recs = []
if mi.authors:
authors = '; '.join(mi.authors)
recs.append((100, authors.encode(self.codec, 'replace')))
if mi.publisher:
recs.append((101, mi.publisher.encode(self.codec, 'replace')))
if mi.comments:
recs.append((103, mi.comments.encode(self.codec, 'replace')))
if mi.isbn:
recs.append((104, mi.isbn.encode(self.codec, 'replace')))
if mi.tags:
subjects = '; '.join(mi.tags)
recs.append((105, subjects.encode(self.codec, 'replace')))
if self.cover_record is not None:
recs.append((201, pack('>I', self.cover_rindex)))
recs.append((203, pack('>I', 0)))
if self.thumbnail_record is not None:
recs.append((202, pack('>I', self.thumbnail_rindex)))
exth = StringIO()
for code, data in recs:
exth.write(pack('>II', code, len(data) + 8))
exth.write(data)
exth = exth.getvalue()
trail = len(exth) % 4
pad = '\0' * (4 - trail) # Always pad w/ at least 1 byte
exth = ['EXTH', pack('>II', len(exth) + 12, len(recs)), exth, pad]
exth = ''.join(exth)
title = (mi.title or _('Unknown')).encode(self.codec, 'replace')
title_off = (self.exth.start - self.record0.start) + len(exth)
title_len = len(title)
trail = len(self.exth) - len(exth) - len(title)
if trail < 0:
raise MobiError("Insufficient space to update metadata")
self.exth[:] = ''.join([exth, title, '\0' * trail])
self.record0[84:92] = pack('>II', title_off, title_len)
self.record0[92:96] = iana2mobi(mi.language)
if mi.cover_data[1]:
data = mi.cover_data[1]
if self.cover_record is not None:
size = len(self.cover_record)
cover = rescale_image(data, size)
cover += '\0' * (size - len(cover))
self.cover_record[:] = cover
if self.thumbnail_record is not None:
size = len(self.thumbnail_record)
thumbnail = rescale_image(data, size, dimen=MAX_THUMB_DIMEN)
thumbnail += '\0' * (size - len(thumbnail))
self.thumbnail_record[:] = thumbnail
return
def set_metadata(stream, mi):
mu = MetadataUpdater(stream)
mu.update(mi)
return
def option_parser():
parser = get_parser('mobi')
parser.remove_option('--category')
parser.add_option('--tags', default=None,
help=_('Set the subject tags'))
parser.add_option('--language', default=None,
help=_('Set the language'))
parser.add_option('--publisher', default=None,
help=_('Set the publisher'))
parser.add_option('--isbn', default=None,
help=_('Set the ISBN'))
return parser
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
print >>sys.stderr, 'Usage: %s file.mobi' % args[0]
return 1
fname = args[1]
mi = get_metadata(open(fname, 'rb'))
print unicode(mi)
if mi.cover_data[1]:
changed = False
with open(fname, 'r+b') as stream:
mi = get_metadata(stream)
if opts.title:
mi.title = opts.title
changed = True
if opts.authors:
mi.authors = opts.authors.split(',')
changed = True
if opts.comment:
mi.comments = opts.comment
changed = True
if opts.tags is not None:
mi.tags = opts.tags.split(',')
changed = True
if opts.language is not None:
mi.language = opts.language
changed = True
if opts.publisher is not None:
mi.publisher = opts.publisher
changed = True
if opts.isbn is not None:
mi.isbn = opts.isbn
changed = True
if changed:
set_metadata(stream, mi)
print unicode(get_metadata(stream))
if not changed and mi.cover_data[1]:
cover = os.path.abspath(
'.'.join((os.path.splitext(os.path.basename(fname))[0],
mi.cover_data[0].lower())))
@ -26,4 +228,4 @@ def main(args=sys.argv):
return 0
if __name__ == '__main__':
sys.exit(main())
sys.exit(main())

View File

@ -87,6 +87,49 @@ def decint(value, direction):
bytes[-1] |= 0x80
return ''.join(chr(b) for b in reversed(bytes))
def rescale_image(data, maxsizeb, dimen=None):
image = Image.open(StringIO(data))
format = image.format
changed = False
if image.format not in ('JPEG', 'GIF'):
width, height = image.size
area = width * height
if area <= 40000:
format = 'GIF'
else:
image = image.convert('RGBA')
format = 'JPEG'
changed = True
if dimen is not None:
image.thumbnail(dimen, Image.ANTIALIAS)
changed = True
if changed:
data = StringIO()
image.save(data, format)
data = data.getvalue()
if len(data) <= maxsizeb:
return data
image = image.convert('RGBA')
for quality in xrange(95, -1, -1):
data = StringIO()
image.save(data, 'JPEG', quality=quality)
data = data.getvalue()
if len(data) <= maxsizeb:
return data
width, height = image.size
for scale in xrange(99, 0, -1):
scale = scale / 100.
data = StringIO()
scaled = image.copy()
size = (int(width * scale), (height * scale))
scaled.thumbnail(size, Image.ANTIALIAS)
scaled.save(data, 'JPEG', quality=0)
data = data.getvalue()
if len(data) <= maxsizeb:
return data
# Well, we tried?
return data
class Serializer(object):
NSRMAP = {'': None, XML_NS: 'xml', XHTML_NS: '', MBP_NS: 'mbp'}
@ -355,50 +398,7 @@ class MobiWriter(object):
offset += RECORD_SIZE
data, overlap = self._read_text_record(text)
self._text_nrecords = nrecords
def _rescale_image(self, data, maxsizeb, dimen=None):
image = Image.open(StringIO(data))
format = image.format
changed = False
if image.format not in ('JPEG', 'GIF'):
width, height = image.size
area = width * height
if area <= 40000:
format = 'GIF'
else:
image = image.convert('RGBA')
format = 'JPEG'
changed = True
if dimen is not None:
image.thumbnail(dimen, Image.ANTIALIAS)
changed = True
if changed:
data = StringIO()
image.save(data, format)
data = data.getvalue()
if len(data) <= maxsizeb:
return data
image = image.convert('RGBA')
for quality in xrange(95, -1, -1):
data = StringIO()
image.save(data, 'JPEG', quality=quality)
data = data.getvalue()
if len(data) <= maxsizeb:
return data
width, height = image.size
for scale in xrange(99, 0, -1):
scale = scale / 100.
data = StringIO()
scaled = image.copy()
size = (int(width * scale), (height * scale))
scaled.thumbnail(size, Image.ANTIALIAS)
scaled.save(data, 'JPEG', quality=0)
data = data.getvalue()
if len(data) <= maxsizeb:
return data
# Well, we tried?
return data
def _generate_images(self):
self._oeb.logger.info('Serializing images...')
images = [(index, href) for href, index in self._images.items()]
@ -407,7 +407,7 @@ class MobiWriter(object):
coverid = metadata.cover[0] if metadata.cover else None
for _, href in images:
item = self._oeb.manifest.hrefs[href]
data = self._rescale_image(item.data, self._imagemax)
data = rescale_image(item.data, self._imagemax)
self._records.append(data)
def _generate_record0(self):
@ -480,7 +480,7 @@ class MobiWriter(object):
return ''.join(exth)
def _add_thumbnail(self, item):
data = self._rescale_image(item.data, MAX_THUMB_SIZE, MAX_THUMB_DIMEN)
data = rescale_image(item.data, MAX_THUMB_SIZE, MAX_THUMB_DIMEN)
manifest = self._oeb.manifest
id, href = manifest.generate('thumbnail', 'thumbnail.jpeg')
manifest.add(id, href, 'image/jpeg', data=data)