mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 10:44:09 -04:00
Implement Mobipocket "set_metadata()" function.
This commit is contained in:
parent
2bf6c6ed6a
commit
a0cbbc1cbe
@ -1,23 +1,225 @@
|
|||||||
#!/usr/bin/env python
|
'''
|
||||||
|
Retrieve and modify in-place Mobipocket book metadata.
|
||||||
|
'''
|
||||||
|
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net'
|
__copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net and ' \
|
||||||
|
'Marshall T. Vandegrift <llasram@gmail.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
'''
|
import sys
|
||||||
'''
|
import os
|
||||||
|
from struct import pack, unpack
|
||||||
import sys, os
|
from cStringIO import StringIO
|
||||||
|
from calibre.ebooks.metadata import get_parser, MetaInformation
|
||||||
|
from calibre.ebooks.mobi import MobiError
|
||||||
from calibre.ebooks.mobi.reader import get_metadata
|
from calibre.ebooks.mobi.reader import get_metadata
|
||||||
|
from calibre.ebooks.mobi.writer import rescale_image, MAX_THUMB_DIMEN
|
||||||
|
from calibre.ebooks.mobi.langcodes import iana2mobi
|
||||||
|
|
||||||
|
class StreamSlicer(object):
|
||||||
|
def __init__(self, stream, start=0, stop=None):
|
||||||
|
self._stream = stream
|
||||||
|
self.start = start
|
||||||
|
if stop is None:
|
||||||
|
stream.seek(0, 2)
|
||||||
|
stop = stream.tell()
|
||||||
|
self.stop = stop
|
||||||
|
self._len = stop - start
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return self._len
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
stream = self._stream
|
||||||
|
base = self.start
|
||||||
|
if isinstance(key, (int, long)):
|
||||||
|
stream.seek(base + key)
|
||||||
|
return stream.read(1)
|
||||||
|
if isinstance(key, slice):
|
||||||
|
start, stop, stride = key.indices(self._len)
|
||||||
|
if stride < 0:
|
||||||
|
start, stop = stop, start
|
||||||
|
size = stop - start
|
||||||
|
if size <= 0:
|
||||||
|
return ""
|
||||||
|
stream.seek(base + start)
|
||||||
|
data = stream.read(size)
|
||||||
|
if stride != 1:
|
||||||
|
data = data[::stride]
|
||||||
|
return data
|
||||||
|
raise TypeError("stream indices must be integers")
|
||||||
|
|
||||||
|
def __setitem__(self, key, value):
|
||||||
|
stream = self._stream
|
||||||
|
base = self.start
|
||||||
|
if isinstance(key, (int, long)):
|
||||||
|
if len(value) != 1:
|
||||||
|
raise ValueError("key and value lengths must match")
|
||||||
|
stream.seek(base + key)
|
||||||
|
return stream.write(value)
|
||||||
|
if isinstance(key, slice):
|
||||||
|
start, stop, stride = key.indices(self._len)
|
||||||
|
if stride < 0:
|
||||||
|
start, stop = stop, start
|
||||||
|
size = stop - start
|
||||||
|
if stride != 1:
|
||||||
|
value = value[::stride]
|
||||||
|
if len(value) != size:
|
||||||
|
raise ValueError("key and value lengths must match")
|
||||||
|
stream.seek(base + start)
|
||||||
|
return stream.write(value)
|
||||||
|
raise TypeError("stream indices must be integers")
|
||||||
|
|
||||||
|
|
||||||
|
class MetadataUpdater(object):
|
||||||
|
def __init__(self, stream):
|
||||||
|
self.stream = stream
|
||||||
|
data = self.data = StreamSlicer(stream)
|
||||||
|
type = self.type = data[60:68]
|
||||||
|
self.nrecs, = unpack('>H', data[76:78])
|
||||||
|
record0 = self.record0 = self.record(0)
|
||||||
|
codepage, = unpack('>I', record0[28:32])
|
||||||
|
self.codec = 'utf-8' if codepage == 65001 else 'cp1252'
|
||||||
|
image_base, = unpack('>I', record0[108:112])
|
||||||
|
flags, = unpack('>I', record0[128:132])
|
||||||
|
have_exth = self.have_exth = (flags & 0x40) != 0
|
||||||
|
if not have_exth:
|
||||||
|
return
|
||||||
|
self.cover_record = self.thumbnail_record = None
|
||||||
|
exth_off = unpack('>I', record0[20:24])[0] + 16 + record0.start
|
||||||
|
exth = self.exth = StreamSlicer(stream, exth_off, record0.stop)
|
||||||
|
nitems, = unpack('>I', exth[8:12])
|
||||||
|
pos = 12
|
||||||
|
for i in xrange(nitems):
|
||||||
|
id, size = unpack('>II', exth[pos:pos + 8])
|
||||||
|
content = exth[pos + 8: pos + size]
|
||||||
|
pos += size
|
||||||
|
if id == 201:
|
||||||
|
rindex, = self.cover_rindex, = unpack('>I', content)
|
||||||
|
self.cover_record = self.record(rindex + image_base)
|
||||||
|
elif id == 202:
|
||||||
|
rindex, = self.thumbnail_rindex, = unpack('>I', content)
|
||||||
|
self.thumbnail_record = self.record(rindex + image_base)
|
||||||
|
|
||||||
|
def record(self, n):
|
||||||
|
if n >= self.nrecs:
|
||||||
|
raise ValueError('non-existent record %r' % n)
|
||||||
|
offoff = 78 + (8 * n)
|
||||||
|
start, = unpack('>I', self.data[offoff + 0:offoff + 4])
|
||||||
|
stop = None
|
||||||
|
if n < (self.nrecs - 1):
|
||||||
|
stop, = unpack('>I', self.data[offoff + 8:offoff + 12])
|
||||||
|
return StreamSlicer(self.stream, start, stop)
|
||||||
|
|
||||||
|
def update(self, mi):
|
||||||
|
recs = []
|
||||||
|
if mi.authors:
|
||||||
|
authors = '; '.join(mi.authors)
|
||||||
|
recs.append((100, authors.encode(self.codec, 'replace')))
|
||||||
|
if mi.publisher:
|
||||||
|
recs.append((101, mi.publisher.encode(self.codec, 'replace')))
|
||||||
|
if mi.comments:
|
||||||
|
recs.append((103, mi.comments.encode(self.codec, 'replace')))
|
||||||
|
if mi.isbn:
|
||||||
|
recs.append((104, mi.isbn.encode(self.codec, 'replace')))
|
||||||
|
if mi.tags:
|
||||||
|
subjects = '; '.join(mi.tags)
|
||||||
|
recs.append((105, subjects.encode(self.codec, 'replace')))
|
||||||
|
if self.cover_record is not None:
|
||||||
|
recs.append((201, pack('>I', self.cover_rindex)))
|
||||||
|
recs.append((203, pack('>I', 0)))
|
||||||
|
if self.thumbnail_record is not None:
|
||||||
|
recs.append((202, pack('>I', self.thumbnail_rindex)))
|
||||||
|
exth = StringIO()
|
||||||
|
for code, data in recs:
|
||||||
|
exth.write(pack('>II', code, len(data) + 8))
|
||||||
|
exth.write(data)
|
||||||
|
exth = exth.getvalue()
|
||||||
|
trail = len(exth) % 4
|
||||||
|
pad = '\0' * (4 - trail) # Always pad w/ at least 1 byte
|
||||||
|
exth = ['EXTH', pack('>II', len(exth) + 12, len(recs)), exth, pad]
|
||||||
|
exth = ''.join(exth)
|
||||||
|
title = (mi.title or _('Unknown')).encode(self.codec, 'replace')
|
||||||
|
title_off = (self.exth.start - self.record0.start) + len(exth)
|
||||||
|
title_len = len(title)
|
||||||
|
trail = len(self.exth) - len(exth) - len(title)
|
||||||
|
if trail < 0:
|
||||||
|
raise MobiError("Insufficient space to update metadata")
|
||||||
|
self.exth[:] = ''.join([exth, title, '\0' * trail])
|
||||||
|
self.record0[84:92] = pack('>II', title_off, title_len)
|
||||||
|
self.record0[92:96] = iana2mobi(mi.language)
|
||||||
|
if mi.cover_data[1]:
|
||||||
|
data = mi.cover_data[1]
|
||||||
|
if self.cover_record is not None:
|
||||||
|
size = len(self.cover_record)
|
||||||
|
cover = rescale_image(data, size)
|
||||||
|
cover += '\0' * (size - len(cover))
|
||||||
|
self.cover_record[:] = cover
|
||||||
|
if self.thumbnail_record is not None:
|
||||||
|
size = len(self.thumbnail_record)
|
||||||
|
thumbnail = rescale_image(data, size, dimen=MAX_THUMB_DIMEN)
|
||||||
|
thumbnail += '\0' * (size - len(thumbnail))
|
||||||
|
self.thumbnail_record[:] = thumbnail
|
||||||
|
return
|
||||||
|
|
||||||
|
def set_metadata(stream, mi):
|
||||||
|
mu = MetadataUpdater(stream)
|
||||||
|
mu.update(mi)
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
def option_parser():
|
||||||
|
parser = get_parser('mobi')
|
||||||
|
parser.remove_option('--category')
|
||||||
|
parser.add_option('--tags', default=None,
|
||||||
|
help=_('Set the subject tags'))
|
||||||
|
parser.add_option('--language', default=None,
|
||||||
|
help=_('Set the language'))
|
||||||
|
parser.add_option('--publisher', default=None,
|
||||||
|
help=_('Set the publisher'))
|
||||||
|
parser.add_option('--isbn', default=None,
|
||||||
|
help=_('Set the ISBN'))
|
||||||
|
return parser
|
||||||
|
|
||||||
def main(args=sys.argv):
|
def main(args=sys.argv):
|
||||||
|
parser = option_parser()
|
||||||
|
opts, args = parser.parse_args(args)
|
||||||
if len(args) != 2:
|
if len(args) != 2:
|
||||||
|
parser.print_help()
|
||||||
print >>sys.stderr, 'Usage: %s file.mobi' % args[0]
|
print >>sys.stderr, 'Usage: %s file.mobi' % args[0]
|
||||||
return 1
|
return 1
|
||||||
fname = args[1]
|
fname = args[1]
|
||||||
mi = get_metadata(open(fname, 'rb'))
|
changed = False
|
||||||
print unicode(mi)
|
with open(fname, 'r+b') as stream:
|
||||||
if mi.cover_data[1]:
|
mi = get_metadata(stream)
|
||||||
|
if opts.title:
|
||||||
|
mi.title = opts.title
|
||||||
|
changed = True
|
||||||
|
if opts.authors:
|
||||||
|
mi.authors = opts.authors.split(',')
|
||||||
|
changed = True
|
||||||
|
if opts.comment:
|
||||||
|
mi.comments = opts.comment
|
||||||
|
changed = True
|
||||||
|
if opts.tags is not None:
|
||||||
|
mi.tags = opts.tags.split(',')
|
||||||
|
changed = True
|
||||||
|
if opts.language is not None:
|
||||||
|
mi.language = opts.language
|
||||||
|
changed = True
|
||||||
|
if opts.publisher is not None:
|
||||||
|
mi.publisher = opts.publisher
|
||||||
|
changed = True
|
||||||
|
if opts.isbn is not None:
|
||||||
|
mi.isbn = opts.isbn
|
||||||
|
changed = True
|
||||||
|
if changed:
|
||||||
|
set_metadata(stream, mi)
|
||||||
|
print unicode(get_metadata(stream))
|
||||||
|
if not changed and mi.cover_data[1]:
|
||||||
cover = os.path.abspath(
|
cover = os.path.abspath(
|
||||||
'.'.join((os.path.splitext(os.path.basename(fname))[0],
|
'.'.join((os.path.splitext(os.path.basename(fname))[0],
|
||||||
mi.cover_data[0].lower())))
|
mi.cover_data[0].lower())))
|
||||||
|
@ -87,6 +87,49 @@ def decint(value, direction):
|
|||||||
bytes[-1] |= 0x80
|
bytes[-1] |= 0x80
|
||||||
return ''.join(chr(b) for b in reversed(bytes))
|
return ''.join(chr(b) for b in reversed(bytes))
|
||||||
|
|
||||||
|
def rescale_image(data, maxsizeb, dimen=None):
|
||||||
|
image = Image.open(StringIO(data))
|
||||||
|
format = image.format
|
||||||
|
changed = False
|
||||||
|
if image.format not in ('JPEG', 'GIF'):
|
||||||
|
width, height = image.size
|
||||||
|
area = width * height
|
||||||
|
if area <= 40000:
|
||||||
|
format = 'GIF'
|
||||||
|
else:
|
||||||
|
image = image.convert('RGBA')
|
||||||
|
format = 'JPEG'
|
||||||
|
changed = True
|
||||||
|
if dimen is not None:
|
||||||
|
image.thumbnail(dimen, Image.ANTIALIAS)
|
||||||
|
changed = True
|
||||||
|
if changed:
|
||||||
|
data = StringIO()
|
||||||
|
image.save(data, format)
|
||||||
|
data = data.getvalue()
|
||||||
|
if len(data) <= maxsizeb:
|
||||||
|
return data
|
||||||
|
image = image.convert('RGBA')
|
||||||
|
for quality in xrange(95, -1, -1):
|
||||||
|
data = StringIO()
|
||||||
|
image.save(data, 'JPEG', quality=quality)
|
||||||
|
data = data.getvalue()
|
||||||
|
if len(data) <= maxsizeb:
|
||||||
|
return data
|
||||||
|
width, height = image.size
|
||||||
|
for scale in xrange(99, 0, -1):
|
||||||
|
scale = scale / 100.
|
||||||
|
data = StringIO()
|
||||||
|
scaled = image.copy()
|
||||||
|
size = (int(width * scale), (height * scale))
|
||||||
|
scaled.thumbnail(size, Image.ANTIALIAS)
|
||||||
|
scaled.save(data, 'JPEG', quality=0)
|
||||||
|
data = data.getvalue()
|
||||||
|
if len(data) <= maxsizeb:
|
||||||
|
return data
|
||||||
|
# Well, we tried?
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
class Serializer(object):
|
class Serializer(object):
|
||||||
NSRMAP = {'': None, XML_NS: 'xml', XHTML_NS: '', MBP_NS: 'mbp'}
|
NSRMAP = {'': None, XML_NS: 'xml', XHTML_NS: '', MBP_NS: 'mbp'}
|
||||||
@ -356,49 +399,6 @@ class MobiWriter(object):
|
|||||||
data, overlap = self._read_text_record(text)
|
data, overlap = self._read_text_record(text)
|
||||||
self._text_nrecords = nrecords
|
self._text_nrecords = nrecords
|
||||||
|
|
||||||
def _rescale_image(self, data, maxsizeb, dimen=None):
|
|
||||||
image = Image.open(StringIO(data))
|
|
||||||
format = image.format
|
|
||||||
changed = False
|
|
||||||
if image.format not in ('JPEG', 'GIF'):
|
|
||||||
width, height = image.size
|
|
||||||
area = width * height
|
|
||||||
if area <= 40000:
|
|
||||||
format = 'GIF'
|
|
||||||
else:
|
|
||||||
image = image.convert('RGBA')
|
|
||||||
format = 'JPEG'
|
|
||||||
changed = True
|
|
||||||
if dimen is not None:
|
|
||||||
image.thumbnail(dimen, Image.ANTIALIAS)
|
|
||||||
changed = True
|
|
||||||
if changed:
|
|
||||||
data = StringIO()
|
|
||||||
image.save(data, format)
|
|
||||||
data = data.getvalue()
|
|
||||||
if len(data) <= maxsizeb:
|
|
||||||
return data
|
|
||||||
image = image.convert('RGBA')
|
|
||||||
for quality in xrange(95, -1, -1):
|
|
||||||
data = StringIO()
|
|
||||||
image.save(data, 'JPEG', quality=quality)
|
|
||||||
data = data.getvalue()
|
|
||||||
if len(data) <= maxsizeb:
|
|
||||||
return data
|
|
||||||
width, height = image.size
|
|
||||||
for scale in xrange(99, 0, -1):
|
|
||||||
scale = scale / 100.
|
|
||||||
data = StringIO()
|
|
||||||
scaled = image.copy()
|
|
||||||
size = (int(width * scale), (height * scale))
|
|
||||||
scaled.thumbnail(size, Image.ANTIALIAS)
|
|
||||||
scaled.save(data, 'JPEG', quality=0)
|
|
||||||
data = data.getvalue()
|
|
||||||
if len(data) <= maxsizeb:
|
|
||||||
return data
|
|
||||||
# Well, we tried?
|
|
||||||
return data
|
|
||||||
|
|
||||||
def _generate_images(self):
|
def _generate_images(self):
|
||||||
self._oeb.logger.info('Serializing images...')
|
self._oeb.logger.info('Serializing images...')
|
||||||
images = [(index, href) for href, index in self._images.items()]
|
images = [(index, href) for href, index in self._images.items()]
|
||||||
@ -407,7 +407,7 @@ class MobiWriter(object):
|
|||||||
coverid = metadata.cover[0] if metadata.cover else None
|
coverid = metadata.cover[0] if metadata.cover else None
|
||||||
for _, href in images:
|
for _, href in images:
|
||||||
item = self._oeb.manifest.hrefs[href]
|
item = self._oeb.manifest.hrefs[href]
|
||||||
data = self._rescale_image(item.data, self._imagemax)
|
data = rescale_image(item.data, self._imagemax)
|
||||||
self._records.append(data)
|
self._records.append(data)
|
||||||
|
|
||||||
def _generate_record0(self):
|
def _generate_record0(self):
|
||||||
@ -480,7 +480,7 @@ class MobiWriter(object):
|
|||||||
return ''.join(exth)
|
return ''.join(exth)
|
||||||
|
|
||||||
def _add_thumbnail(self, item):
|
def _add_thumbnail(self, item):
|
||||||
data = self._rescale_image(item.data, MAX_THUMB_SIZE, MAX_THUMB_DIMEN)
|
data = rescale_image(item.data, MAX_THUMB_SIZE, MAX_THUMB_DIMEN)
|
||||||
manifest = self._oeb.manifest
|
manifest = self._oeb.manifest
|
||||||
id, href = manifest.generate('thumbnail', 'thumbnail.jpeg')
|
id, href = manifest.generate('thumbnail', 'thumbnail.jpeg')
|
||||||
manifest.add(id, href, 'image/jpeg', data=data)
|
manifest.add(id, href, 'image/jpeg', data=data)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user