diff --git a/src/calibre/ebooks/metadata/mobi.py b/src/calibre/ebooks/metadata/mobi.py index 2f8c90f4c4..f4809e3cff 100644 --- a/src/calibre/ebooks/metadata/mobi.py +++ b/src/calibre/ebooks/metadata/mobi.py @@ -17,6 +17,8 @@ from calibre.ebooks.mobi import MobiError from calibre.ebooks.mobi.writer import rescale_image, MAX_THUMB_DIMEN from calibre.ebooks.mobi.langcodes import iana2mobi +import struct, traceback + class StreamSlicer(object): def __init__(self, stream, start=0, stop=None): @@ -72,6 +74,17 @@ class StreamSlicer(object): return stream.write(value) raise TypeError("stream indices must be integers") + def update(self, data_blocks): + # Rewrite the stream + stream = self._stream + base = self.start + stream.seek(base) + self._stream.truncate(base) + for block in data_blocks: + stream.write(block) + + def truncate(self, value): + self._stream.truncate(value) class MetadataUpdater(object): def __init__(self, stream): @@ -84,13 +97,27 @@ class MetadataUpdater(object): codepage, = unpack('>I', record0[28:32]) self.codec = 'utf-8' if codepage == 65001 else 'cp1252' image_base, = unpack('>I', record0[108:112]) - flags, = unpack('>I', record0[128:132]) + flags, = self.flags, = unpack('>I', record0[128:132]) have_exth = self.have_exth = (flags & 0x40) != 0 self.cover_record = self.thumbnail_record = None self.timestamp = None + + self.pdbrecords = self.get_pdbrecords() if not have_exth: - return + self.create_exth() + + # Fetch timestamp, cover_record, thumbnail_record + self.fetchEXTHFields() + + def fetchEXTHFields(self): + stream = self.stream + record0 = self.record0 + + # 20:24 = mobiHeaderLength, 16=PDBHeader size exth_off = unpack('>I', record0[20:24])[0] + 16 + record0.start + image_base, = unpack('>I', record0[108:112]) + + # Fetch EXTH block exth = self.exth = StreamSlicer(stream, exth_off, record0.stop) nitems, = unpack('>I', exth[8:12]) pos = 12 @@ -108,7 +135,136 @@ class MetadataUpdater(object): elif id == 202: rindex, = self.thumbnail_rindex, = unpack('>I', content) self.thumbnail_record = self.record(rindex + image_base) + + def patch(self, off, new_record0): + # Save the current size of each record + record_sizes = [len(new_record0)] + for i in range(1,self.nrecs-1): + record_sizes.append(self.pdbrecords[i+1][0]-self.pdbrecords[i][0]) + # And the last one + record_sizes.append(self.data.stop - self.pdbrecords[self.nrecs-1][0]) + + # pdbrecord[0] is the offset of record0. It will not change + # record1 offset will be offset of record0 + len(new_record0) + updated_pdbrecords = [self.pdbrecords[0][0]] + record0_offset = self.pdbrecords[0][0] + current_offset = self.pdbrecords[1][0] + updated_offset = record0_offset + len(new_record0) + + for i in range(1,self.nrecs-1): + updated_pdbrecords.append(updated_offset) + updated_offset += record_sizes[i] + # Update the last pdbrecord + updated_pdbrecords.append(updated_offset) + # Read in current records 1 to last + data_blocks = [new_record0] + for i in range(1,self.nrecs): + data_blocks.append(self.data[self.pdbrecords[i][0]:self.pdbrecords[i][0] + record_sizes[i]]) + + # Rewrite the stream + self.record0.update(data_blocks) + + # Rewrite the pdbrecords + self.update_pdbrecords(updated_pdbrecords) + + # Truncate if necessary + if (updated_pdbrecords[-1] + record_sizes[-1]) < self.data.stop: + self.data.truncate(updated_pdbrecords[-1] + record_sizes[-1]) + else: + self.data.stop = updated_pdbrecords[-1] + record_sizes[-1] + + def patchSection(self, section, new): + if (section + 1 == self.nrecs): + endoff = len(self.stream) + else: + endoff = self.pdbrecords[section + 1][0] + off = self.pdbrecords[section][0] + self.patch(off, new) + + def create_exth(self, exth=None): + # Add an EXTH block to record 0, rewrite the stream + # self.hexdump(self.record0) + + # Fetch the title + title_offset, = struct.unpack('>L', self.record0[0x54:0x58]) + title_length, = struct.unpack('>L', self.record0[0x58:0x5c]) + title_in_file, = struct.unpack('%ds' % (title_length), self.record0[title_offset:title_offset + title_length]) + + # Adjust length to accommodate PrimaryINDX if necessary + mobi_header_length, = unpack('>L', self.record0[0x14:0x18]) + if mobi_header_length == 0xe4: + # Patch mobi_header_length to 0xE8 + self.record0[0x17] = "\xe8" + self.record0[0xf4:0xf8] = pack('>L', 0xFFFFFFFF) + mobi_header_length = 0xe8 + + # Set EXTH flag (0x40) + self.record0[0x80:0x84] = pack('>L', self.flags|0x40) + + if not exth: + # Construct an empty EXTH block + pad = '\0' * 4 + exth = ['EXTH', pack('>II', 12, 0), pad] + exth = ''.join(exth) + + # Update title_offset + self.record0[0x54:0x58] = pack('>L', 0x10 + mobi_header_length + len(exth)) + + # Create an updated Record0 + new_record0 = StringIO() + new_record0.write(self.record0[:0x10 + mobi_header_length]) + new_record0.write(exth) + new_record0.write(title_in_file) + + # Pad to a 4-byte boundary + trail = len(new_record0.getvalue()) % 4 + pad = '\0' * (4 - trail) # Always pad w/ at least 1 byte + new_record0.write(pad) + + self.hexdump(new_record0.getvalue()) + + # Rebuild the stream, update the pdbrecords pointers + self.patchSection(0,new_record0.getvalue()) + + # Update record0 + self.record0 = self.record(0) + + def hexdump(self, src, length=16): + # Diagnostic + FILTER=''.join([(len(repr(chr(x)))==3) and chr(x) or '.' for x in range(256)]) + N=0; result='' + while src: + s,src = src[:length],src[length:] + hexa = ' '.join(["%02X"%ord(x) for x in s]) + s = s.translate(FILTER) + result += "%04X %-*s %s\n" % (N, length*3, hexa, s) + N+=length + print result + + def get_pdbrecords(self): + pdbrecords = [] + for i in xrange(self.nrecs): + offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', self.data[78+i*8:78+i*8+8]) + flags, val = a1, a2<<16|a3<<8|a4 + pdbrecords.append( [offset, flags, val] ) + return pdbrecords + + def update_pdbrecords(self, updated_pdbrecords): + for (i, pdbrecord) in enumerate(updated_pdbrecords): + self.data[78+i*8:78+i*8 + 4] = pack('>L',pdbrecord) + + # Refresh local copy + self.pdbrecords = self.get_pdbrecords() + + def dump_pdbrecords(self): + # Diagnostic + print "MetadataUpdater.dump_pdbrecords()" + print "%10s %10s %10s" % ("offset","flags","val") + for i in xrange(len(self.pdbrecords)): + pdbrecord = self.pdbrecords[i] + print "%10X %10X %10X" % (pdbrecord[0], pdbrecord[1], pdbrecord[2]) + def record(self, n): if n >= self.nrecs: raise ValueError('non-existent record %r' % n) @@ -142,7 +298,6 @@ class MetadataUpdater(object): if mi.tags: subjects = '; '.join(mi.tags) recs.append((105, subjects.encode(self.codec, 'replace'))) - if mi.pubdate: recs.append((106, str(mi.pubdate).encode(self.codec, 'replace'))) elif mi.timestamp: @@ -151,15 +306,16 @@ class MetadataUpdater(object): recs.append((106, self.timestamp)) else: recs.append((106, str(datetime.now()).encode(self.codec, 'replace'))) - if self.cover_record is not None: recs.append((201, pack('>I', self.cover_rindex))) recs.append((203, pack('>I', 0))) if self.thumbnail_record is not None: recs.append((202, pack('>I', self.thumbnail_rindex))) - exth = StringIO() + if getattr(self, 'encryption_type', -1) != 0: raise MobiError('Setting metadata in DRMed MOBI files is not supported.') + + exth = StringIO() for code, data in recs: exth.write(pack('>II', code, len(data) + 8)) exth.write(data) @@ -168,17 +324,17 @@ class MetadataUpdater(object): pad = '\0' * (4 - trail) # Always pad w/ at least 1 byte exth = ['EXTH', pack('>II', len(exth) + 12, len(recs)), exth, pad] exth = ''.join(exth) + title = (mi.title or _('Unknown')).encode(self.codec, 'replace') if getattr(self, 'exth', None) is None: raise MobiError('No existing EXTH record. Cannot update metadata.') - title_off = (self.exth.start - self.record0.start) + len(exth) - title_len = len(title) - trail = len(self.exth) - len(exth) - len(title) - if trail < 0: - raise MobiError("Insufficient space to update metadata") - self.exth[:] = ''.join([exth, title, '\0' * trail]) - self.record0[84:92] = pack('>II', title_off, title_len) + self.record0[92:96] = iana2mobi(mi.language) + self.create_exth(exth) + + # Fetch updated timestamp, cover_record, thumbnail_record + self.fetchEXTHFields() + if mi.cover_data[1] or mi.cover: try: data = mi.cover_data[1] if mi.cover_data[1] else open(mi.cover, 'rb').read()