From 41f168413b732a126e77c3e07ace65d8dd06cec6 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 13 Mar 2012 13:05:14 +0530 Subject: [PATCH 1/5] Add preliminary support for extracting FONT records to inspect mobi --- .../ebooks/conversion/plugins/mobi_input.py | 2 +- src/calibre/ebooks/mobi/debug.py | 44 ++++++++++++++++--- src/calibre/ebooks/mobi/reader/mobi8.py | 10 +++-- 3 files changed, 44 insertions(+), 12 deletions(-) diff --git a/src/calibre/ebooks/conversion/plugins/mobi_input.py b/src/calibre/ebooks/conversion/plugins/mobi_input.py index 8ce44efa96..144158e966 100644 --- a/src/calibre/ebooks/conversion/plugins/mobi_input.py +++ b/src/calibre/ebooks/conversion/plugins/mobi_input.py @@ -52,7 +52,7 @@ class MOBIInput(InputFormatPlugin): mr.extract_content(u'.', parse_cache) if mr.kf8_type is not None: - log('Found KF8 MOBI of type %s'%mr.kf8_type) + log('Found KF8 MOBI of type %r'%mr.kf8_type) from calibre.ebooks.mobi.reader.mobi8 import Mobi8Reader return os.path.abspath(Mobi8Reader(mr, log)()) diff --git a/src/calibre/ebooks/mobi/debug.py b/src/calibre/ebooks/mobi/debug.py index 7f2695b5c4..800b2b7bec 100644 --- a/src/calibre/ebooks/mobi/debug.py +++ b/src/calibre/ebooks/mobi/debug.py @@ -7,7 +7,7 @@ __license__ = 'GPL v3' __copyright__ = '2011, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import struct, datetime, sys, os, shutil +import struct, datetime, sys, os, shutil, zlib from collections import OrderedDict, defaultdict from lxml import html @@ -1149,6 +1149,32 @@ class BinaryRecord(object): # {{{ # }}} +class FontRecord(object): # {{{ + + def __init__(self, idx, record): + self.raw = record.raw + name = '%06d'%idx + (self.uncompressed_size, self.unknown1, self.unknown2) = \ + struct.unpack_from(b'>LLL', self.raw, 4) + self.payload = self.raw[4:] + self.ext = 'unknown' + if self.unknown1 == 1: + self.zlib_header = self.raw[self.unknown2:self.unknown2+2] + self.payload = zlib.decompress(self.raw[self.unknown2+2:-4], -15) + hdr = self.payload[:4] + if hdr in {b'\0\1\0\0', b'true', b'ttcf'}: + self.ext = 'ttf' + else: + print ('Unknown font record with fields: %s' % + [self.uncompressed_size, self.unknown1, self.unknown2]) + self.name = '%s.%s'%(name, self.ext) + + def dump(self, folder): + with open(os.path.join(folder, self.name), 'wb') as f: + f.write(self.payload) + +# }}} + class TBSIndexing(object): # {{{ def __init__(self, text_records, indices, doc_type): @@ -1410,6 +1436,7 @@ class MOBIFile(object): # {{{ self.mobi_header.extra_data_flags, decompress) for r in xrange(1, min(len(self.records), ntr+1))] self.image_records, self.binary_records = [], [] + self.font_records = [] image_index = 0 for i in xrange(fntbr, len(self.records)): if i in self.indexing_record_nums or i in self.huffman_record_nums: @@ -1419,13 +1446,15 @@ class MOBIFile(object): # {{{ fmt = None if i >= fii and r.raw[:4] not in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP', - b'AUDI', b'VIDE'}: + b'AUDI', b'VIDE', b'FONT'}: try: width, height, fmt = identify_data(r.raw) except: pass if fmt is not None: self.image_records.append(ImageRecord(image_index, r, fmt)) + elif r.raw[:4] == b'FONT': + self.font_records.append(FontRecord(i, r)) else: self.binary_records.append(BinaryRecord(i, r)) @@ -1465,10 +1494,11 @@ def inspect_mobi(path_or_stream, ddir=None): # {{{ of.write(rec.raw) alltext += rec.raw of.seek(0) - root = html.fromstring(alltext.decode('utf-8')) - with open(os.path.join(ddir, 'pretty.html'), 'wb') as of: - of.write(html.tostring(root, pretty_print=True, encoding='utf-8', - include_meta_content_type=True)) + if f.mobi_header.file_version < 8: + root = html.fromstring(alltext.decode('utf-8')) + with open(os.path.join(ddir, 'pretty.html'), 'wb') as of: + of.write(html.tostring(root, pretty_print=True, encoding='utf-8', + include_meta_content_type=True)) if f.index_header is not None: @@ -1490,7 +1520,7 @@ def inspect_mobi(path_or_stream, ddir=None): # {{{ f.tbs_indexing.dump(ddir) for tdir, attr in [('text', 'text_records'), ('images', 'image_records'), - ('binary', 'binary_records')]: + ('binary', 'binary_records'), ('font', 'font_records')]: tdir = os.path.join(ddir, tdir) os.mkdir(tdir) for rec in getattr(f, attr): diff --git a/src/calibre/ebooks/mobi/reader/mobi8.py b/src/calibre/ebooks/mobi/reader/mobi8.py index d1f7ae93d9..86d123bf7a 100644 --- a/src/calibre/ebooks/mobi/reader/mobi8.py +++ b/src/calibre/ebooks/mobi/reader/mobi8.py @@ -351,7 +351,7 @@ class Mobi8Reader(object): fields = struct.unpack_from(b'>LLLL', data, 4) except: fields = None - #self.log.debug('Font record fields: %s'%(fields,)) + # self.log.debug('Font record fields: %s'%(fields,)) cdata = data[26:-4] ext = 'dat' try: @@ -361,11 +361,13 @@ class Mobi8Reader(object): 'Fields: %s' % (fname_idx, fields,)) uncompressed_data = data[4:] ext = 'failed' - hdr = uncompressed_data[0:4] if len(uncompressed_data) < 200: - self.log.warn('Corrupted font record: %d'%fname_idx) + self.log.warn('Failed to uncompress embedded font %d: ' + 'Fields: %s' % (fname_idx, fields,)) + uncompressed_data = data[4:] ext = 'failed' - if hdr == b'\0\1\0\0' or hdr == b'true' or hdr == b'ttcf': + hdr = uncompressed_data[:4] + if ext != 'failed' and hdr in {b'\0\1\0\0', b'true', b'ttcf'}: ext = 'ttf' href = "fonts/%05d.%s" % (fname_idx, ext) with open(href.replace('/', os.sep), 'wb') as f: From e806e2f2df36691b0ff37094a80d46fbc96767a6 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 13 Mar 2012 13:07:10 +0530 Subject: [PATCH 2/5] ... --- src/calibre/ebooks/mobi/reader/mobi8.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/mobi/reader/mobi8.py b/src/calibre/ebooks/mobi/reader/mobi8.py index 86d123bf7a..ed0088c168 100644 --- a/src/calibre/ebooks/mobi/reader/mobi8.py +++ b/src/calibre/ebooks/mobi/reader/mobi8.py @@ -348,7 +348,7 @@ class Mobi8Reader(object): # bytes 16 - 23: ?? typically all 0x00 ?? Are these compression flags from zlib? # The compressed data begins with 2 bytes of header and has 4 bytes of checksum at the end try: - fields = struct.unpack_from(b'>LLLL', data, 4) + fields = struct.unpack_from(b'>LLLLL', data, 4) except: fields = None # self.log.debug('Font record fields: %s'%(fields,)) From fb41c8c881071ece260dfbfa4c8819003ec6aa2b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 13 Mar 2012 13:11:39 +0530 Subject: [PATCH 3/5] ... --- src/calibre/ebooks/mobi/debug.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/calibre/ebooks/mobi/debug.py b/src/calibre/ebooks/mobi/debug.py index 800b2b7bec..3a5715ab9a 100644 --- a/src/calibre/ebooks/mobi/debug.py +++ b/src/calibre/ebooks/mobi/debug.py @@ -1164,6 +1164,10 @@ class FontRecord(object): # {{{ hdr = self.payload[:4] if hdr in {b'\0\1\0\0', b'true', b'ttcf'}: self.ext = 'ttf' + if self.uncompressed_size != len(self.payload): + raise ValueError('Font record uncompressed size mismatch', + ' expected: %d actual: %d'%(self.uncompressed_size, + len(self.payload))) else: print ('Unknown font record with fields: %s' % [self.uncompressed_size, self.unknown1, self.unknown2]) From 1195c37da5a6bb14daa1a9e1026bedf3be373b01 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 13 Mar 2012 13:38:23 +0530 Subject: [PATCH 4/5] ... --- src/calibre/ebooks/mobi/debug.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/calibre/ebooks/mobi/debug.py b/src/calibre/ebooks/mobi/debug.py index 3a5715ab9a..0444105003 100644 --- a/src/calibre/ebooks/mobi/debug.py +++ b/src/calibre/ebooks/mobi/debug.py @@ -1171,6 +1171,8 @@ class FontRecord(object): # {{{ else: print ('Unknown font record with fields: %s' % [self.uncompressed_size, self.unknown1, self.unknown2]) + print ('\tAdditional fields: %s'%(( + struct.unpack_from(b'>LL', self.raw, 16),))) self.name = '%s.%s'%(name, self.ext) def dump(self, folder): From 49a4e5e02f6533b630fc17d97c1b50e897506e98 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 13 Mar 2012 14:16:23 +0530 Subject: [PATCH 5/5] When setting metadata in MOBI files fix cover not being updated if the mobi file has its first image record as the cover (this is the case for calibre produced MOBI files). Also remove obsolete MOBI writer code. --- .../ebooks/conversion/plugins/mobi_output.py | 10 +- src/calibre/ebooks/metadata/mobi.py | 26 +- src/calibre/ebooks/mobi/__init__.py | 5 + src/calibre/ebooks/mobi/writer.py | 2950 ----------------- src/calibre/ebooks/mobi/writer2/main.py | 4 +- src/calibre/gui2/convert/mobi_output.py | 2 +- src/calibre/gui2/convert/mobi_output.ui | 81 +- 7 files changed, 60 insertions(+), 3018 deletions(-) delete mode 100644 src/calibre/ebooks/mobi/writer.py diff --git a/src/calibre/ebooks/conversion/plugins/mobi_output.py b/src/calibre/ebooks/conversion/plugins/mobi_output.py index f22015d71f..2bde83e0e3 100644 --- a/src/calibre/ebooks/conversion/plugins/mobi_output.py +++ b/src/calibre/ebooks/conversion/plugins/mobi_output.py @@ -18,9 +18,6 @@ class MOBIOutput(OutputFormatPlugin): file_type = 'mobi' options = set([ - OptionRecommendation(name='rescale_images', recommended_value=False, - help=_('Modify images to meet Palm device size limitations.') - ), OptionRecommendation(name='prefer_author_sort', recommended_value=False, level=OptionRecommendation.LOW, help=_('When present, use author sort field as author.') @@ -167,12 +164,7 @@ class MOBIOutput(OutputFormatPlugin): mobimlizer(oeb, opts) self.check_for_periodical() write_page_breaks_after_item = input_plugin is not plugin_for_input_format('cbz') - from calibre.utils.config import tweaks - if tweaks.get('new_mobi_writer', True): - from calibre.ebooks.mobi.writer2.main import MobiWriter - MobiWriter - else: - from calibre.ebooks.mobi.writer import MobiWriter + from calibre.ebooks.mobi.writer2.main import MobiWriter writer = MobiWriter(opts, write_page_breaks_after_item=write_page_breaks_after_item) writer(oeb, output_path) diff --git a/src/calibre/ebooks/metadata/mobi.py b/src/calibre/ebooks/metadata/mobi.py index 911421a6ce..846015f491 100644 --- a/src/calibre/ebooks/metadata/mobi.py +++ b/src/calibre/ebooks/metadata/mobi.py @@ -9,16 +9,21 @@ __copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net and ' \ 'Marshall T. Vandegrift ' __docformat__ = 'restructuredtext en' -import os, cStringIO +import os, cStringIO, imghdr from struct import pack, unpack from cStringIO import StringIO from calibre.ebooks import normalize -from calibre.ebooks.mobi import MobiError -from calibre.ebooks.mobi.writer import rescale_image, MAX_THUMB_DIMEN +from calibre.ebooks.mobi import MobiError, MAX_THUMB_DIMEN +from calibre.ebooks.mobi.utils import rescale_image from calibre.ebooks.mobi.langcodes import iana2mobi from calibre.utils.date import now as nowf +def is_image(ss): + if ss is None: + return False + return imghdr.what(None, ss[:200]) is not None + class StreamSlicer(object): def __init__(self, stream, start=0, stop=None): @@ -161,11 +166,10 @@ class MetadataUpdater(object): if id == 106: self.timestamp = content elif id == 201: - rindex, = self.cover_rindex, = unpack('>i', content) - if rindex > 0 : - self.cover_record = self.record(rindex + image_base) + rindex, = self.cover_rindex, = unpack('>I', content) + self.cover_record = self.record(rindex + image_base) elif id == 202: - rindex, = self.thumbnail_rindex, = unpack('>i', content) + rindex, = self.thumbnail_rindex, = unpack('>I', content) if rindex > 0 : self.thumbnail_record = self.record(rindex + image_base) @@ -416,17 +420,17 @@ class MetadataUpdater(object): except: pass else: - if self.cover_record is not None: + if is_image(self.cover_record): size = len(self.cover_record) cover = rescale_image(data, size) if len(cover) <= size: - cover += '\0' * (size - len(cover)) + cover += b'\0' * (size - len(cover)) self.cover_record[:] = cover - if self.thumbnail_record is not None: + if is_image(self.thumbnail_record): size = len(self.thumbnail_record) thumbnail = rescale_image(data, size, dimen=MAX_THUMB_DIMEN) if len(thumbnail) <= size: - thumbnail += '\0' * (size - len(thumbnail)) + thumbnail += b'\0' * (size - len(thumbnail)) self.thumbnail_record[:] = thumbnail return diff --git a/src/calibre/ebooks/mobi/__init__.py b/src/calibre/ebooks/mobi/__init__.py index 55bc030796..22e0c1388f 100644 --- a/src/calibre/ebooks/mobi/__init__.py +++ b/src/calibre/ebooks/mobi/__init__.py @@ -6,3 +6,8 @@ __copyright__ = '2008, Kovid Goyal ' class MobiError(Exception): pass + +MAX_THUMB_SIZE = 16 * 1024 +MAX_THUMB_DIMEN = (180, 240) + + diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py deleted file mode 100644 index 40e9eeedd0..0000000000 --- a/src/calibre/ebooks/mobi/writer.py +++ /dev/null @@ -1,2950 +0,0 @@ -''' -Write content to Mobipocket books. -''' - -__license__ = 'GPL v3' -__copyright__ = '2008, Marshall T. Vandegrift and \ - Kovid Goyal ' - -from collections import defaultdict -import random -import re -from struct import pack -import time -from urlparse import urldefrag -from cStringIO import StringIO - -from calibre.ebooks import normalize -from calibre.ebooks.mobi.langcodes import iana2mobi -from calibre.ebooks.mobi.mobiml import MBP_NS -from calibre.ebooks.oeb.base import OEB_DOCS -from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES -from calibre.ebooks.oeb.base import XHTML -from calibre.ebooks.oeb.base import XHTML_NS -from calibre.ebooks.oeb.base import XML_NS -from calibre.ebooks.oeb.base import namespace -from calibre.ebooks.oeb.base import prefixname -from calibre.ebooks.oeb.base import urlnormalize -from calibre.ebooks.compression.palmdoc import compress_doc -from calibre.utils.magick.draw import Image, save_cover_data_to, thumbnail - -INDEXING = True -FCIS_FLIS = True -WRITE_PBREAKS = True - -# TODO: -# - Optionally rasterize tables - -EXTH_CODES = { - 'creator': 100, - 'publisher': 101, - 'description': 103, - 'identifier': 104, - 'subject': 105, - 'pubdate': 106, - 'date': 106, - 'review': 107, - 'contributor': 108, - 'rights': 109, - 'type': 111, - 'source': 112, - 'title': 503, - } - -RECORD_SIZE = 0x1000 - -UNCOMPRESSED = 1 -PALMDOC = 2 -HUFFDIC = 17480 - -PALM_MAX_IMAGE_SIZE = 63 * 1024 -OTHER_MAX_IMAGE_SIZE = 10 * 1024 * 1024 -MAX_THUMB_SIZE = 16 * 1024 -MAX_THUMB_DIMEN = (180, 240) - - -TAGX = { - 'chapter' : - '\x00\x00\x00\x01\x01\x01\x01\x00\x02\x01\x02\x00\x03\x01\x04\x00\x04\x01\x08\x00\x00\x00\x00\x01', - 'subchapter' : - '\x00\x00\x00\x01\x01\x01\x01\x00\x02\x01\x02\x00\x03\x01\x04\x00\x04\x01\x08\x00\x05\x01\x10\x00\x15\x01\x10\x00\x16\x01\x20\x00\x17\x01\x40\x00\x00\x00\x00\x01', - 'periodical' : - '\x00\x00\x00\x02\x01\x01\x01\x00\x02\x01\x02\x00\x03\x01\x04\x00\x04\x01\x08\x00\x05\x01\x10\x00\x15\x01\x20\x00\x16\x01\x40\x00\x17\x01\x80\x00\x00\x00\x00\x01\x45\x01\x01\x00\x46\x01\x02\x00\x47\x01\x04\x00\x00\x00\x00\x01', - 'secondary_book':'\x00\x00\x00\x01\x01\x01\x01\x00\x00\x00\x00\x01', - 'secondary_periodical':'\x00\x00\x00\x01\x01\x01\x01\x00\x0b\x03\x02\x00\x00\x00\x00\x01' - } - -INDXT = { - 'chapter' : '\x0f', - 'subchapter' : '\x1f', - 'article' : '\x3f', - 'chapter with subchapters': '\x6f', - 'periodical' : '\xdf', - 'section' : '\xff', - } - -def encode(data): - return data.encode('utf-8') - -# Almost like the one for MS LIT, but not quite. -DECINT_FORWARD = 0 -DECINT_BACKWARD = 1 -def decint(value, direction): - # Encode vwi - bytes = [] - while True: - b = value & 0x7f - value >>= 7 - bytes.append(b) - if value == 0: - break - if direction == DECINT_FORWARD: - bytes[0] |= 0x80 - elif direction == DECINT_BACKWARD: - bytes[-1] |= 0x80 - return ''.join(chr(b) for b in reversed(bytes)) - -def align_block(raw, multiple=4, pad='\0'): - extra = len(raw) % multiple - if extra == 0: return raw - return raw + pad*(multiple - extra) - -def rescale_image(data, maxsizeb, dimen=None): - if dimen is not None: - data = thumbnail(data, width=dimen[0], height=dimen[1], - compression_quality=90)[-1] - else: - # Replace transparent pixels with white pixels and convert to JPEG - data = save_cover_data_to(data, 'img.jpg', return_data=True) - if len(data) <= maxsizeb: - return data - orig_data = data - img = Image() - quality = 95 - - img.load(data) - while len(data) >= maxsizeb and quality >= 10: - quality -= 5 - img.set_compression_quality(quality) - data = img.export('jpg') - if len(data) <= maxsizeb: - return data - orig_data = data - - scale = 0.9 - while len(data) >= maxsizeb and scale >= 0.05: - img = Image() - img.load(orig_data) - w, h = img.size - img.size = (int(scale*w), int(scale*h)) - img.set_compression_quality(quality) - data = img.export('jpg') - scale -= 0.05 - return data - -class Serializer(object): # {{{ - NSRMAP = {'': None, XML_NS: 'xml', XHTML_NS: '', MBP_NS: 'mbp'} - - def __init__(self, oeb, images, write_page_breaks_after_item=True): - self.oeb = oeb - self.images = images - self.logger = oeb.logger - self.write_page_breaks_after_item = write_page_breaks_after_item - self.id_offsets = {} - self.href_offsets = defaultdict(list) - self.breaks = [] - buffer = self.buffer = StringIO() - buffer.write('') - self.serialize_head() - self.serialize_body() - buffer.write('') - self.fixup_links() - self.text = buffer.getvalue() - - def serialize_head(self): - buffer = self.buffer - buffer.write('') - if len(self.oeb.guide) > 0: - self.serialize_guide() - buffer.write('') - - def serialize_guide(self): - buffer = self.buffer - hrefs = self.oeb.manifest.hrefs - buffer.write('') - for ref in self.oeb.guide.values(): - # The Kindle decides where to open a book based on the presence of - # an item in the guide that looks like - # - path = urldefrag(ref.href)[0] - if path not in hrefs or hrefs[path].media_type not in OEB_DOCS: - continue - - buffer.write('') - - buffer.write('') - - def serialize_href(self, href, base=None): - hrefs = self.oeb.manifest.hrefs - path, frag = urldefrag(urlnormalize(href)) - if path and base: - path = base.abshref(path) - if path and path not in hrefs: - return False - buffer = self.buffer - item = hrefs[path] if path else None - if item and item.spine_position is None: - return False - path = item.href if item else base.href - href = '#'.join((path, frag)) if frag else path - buffer.write('filepos=') - self.href_offsets[href].append(buffer.tell()) - buffer.write('0000000000') - return True - - def serialize_body(self): - buffer = self.buffer - self.anchor_offset = buffer.tell() - buffer.write('') - self.anchor_offset_kindle = buffer.tell() - spine = [item for item in self.oeb.spine if item.linear] - spine.extend([item for item in self.oeb.spine if not item.linear]) - for item in spine: - self.serialize_item(item) - buffer.write('') - - def serialize_item(self, item): - buffer = self.buffer - if not item.linear: - self.breaks.append(buffer.tell() - 1) - self.id_offsets[urlnormalize(item.href)] = buffer.tell() - # Kindle periodical articles are contained in a
tag - buffer.write('
') - for elem in item.data.find(XHTML('body')): - self.serialize_elem(elem, item) - # Kindle periodical article end marker - buffer.write('
') - if self.write_page_breaks_after_item: - buffer.write('') - buffer.write('
') - self.anchor_offset = None - - def serialize_elem(self, elem, item, nsrmap=NSRMAP): - buffer = self.buffer - if not isinstance(elem.tag, basestring) \ - or namespace(elem.tag) not in nsrmap: - return - tag = prefixname(elem.tag, nsrmap) - # Previous layers take care of @name - id = elem.attrib.pop('id', None) - if id: - href = '#'.join((item.href, id)) - offset = self.anchor_offset or buffer.tell() - self.id_offsets[urlnormalize(href)] = offset - if self.anchor_offset is not None and \ - tag == 'a' and not elem.attrib and \ - not len(elem) and not elem.text: - return - self.anchor_offset = buffer.tell() - buffer.write('<') - buffer.write(tag) - if elem.attrib: - for attr, val in elem.attrib.items(): - if namespace(attr) not in nsrmap: - continue - attr = prefixname(attr, nsrmap) - buffer.write(' ') - if attr == 'href': - if self.serialize_href(val, item): - continue - elif attr == 'src': - href = urlnormalize(item.abshref(val)) - if href in self.images: - index = self.images[href] - buffer.write('recindex="%05d"' % index) - continue - buffer.write(attr) - buffer.write('="') - self.serialize_text(val, quot=True) - buffer.write('"') - buffer.write('>') - if elem.text or len(elem) > 0: - if elem.text: - self.anchor_offset = None - self.serialize_text(elem.text) - for child in elem: - self.serialize_elem(child, item) - if child.tail: - self.anchor_offset = None - self.serialize_text(child.tail) - buffer.write('' % tag) - - def serialize_text(self, text, quot=False): - text = text.replace('&', '&') - text = text.replace('<', '<') - text = text.replace('>', '>') - text = text.replace(u'\u00AD', '') # Soft-hyphen - if quot: - text = text.replace('"', '"') - self.buffer.write(encode(text)) - - def fixup_links(self): - buffer = self.buffer - id_offsets = self.id_offsets - for href, hoffs in self.href_offsets.items(): - if href not in id_offsets: - self.logger.warn('Hyperlink target %r not found' % href) - href, _ = urldefrag(href) - if href in self.id_offsets: - ioff = self.id_offsets[href] - for hoff in hoffs: - buffer.seek(hoff) - buffer.write('%010d' % ioff) - - # }}} - -class MobiWriter(object): - COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+') - - def __init__(self, opts, - write_page_breaks_after_item=True): - self.opts = opts - self.write_page_breaks_after_item = write_page_breaks_after_item - self._compression = UNCOMPRESSED if getattr(opts, 'dont_compress', - False) else PALMDOC - self._imagemax = (PALM_MAX_IMAGE_SIZE if getattr(opts, - 'rescale_images', False) else OTHER_MAX_IMAGE_SIZE) - self._prefer_author_sort = getattr(opts, 'prefer_author_sort', False) - self._primary_index_record = None - self._conforming_periodical_toc = False - self._indexable = False - self._ctoc = "" - self._ctoc_records = [] - self._ctoc_offset = 0 - self._ctoc_largest = 0 - self._HTMLRecords = [] - self._tbSequence = "" - self._MobiDoc = None - self._anchor_offset_kindle = 0 - self._initialIndexRecordFound = False - self._firstSectionConcluded = False - self._currentSectionIndex = 0 - - @classmethod - def generate(cls, opts): - """Generate a Writer instance from command-line options.""" - imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None - prefer_author_sort = opts.prefer_author_sort - return cls(compression=PALMDOC, imagemax=imagemax, - prefer_author_sort=prefer_author_sort) - - def __call__(self, oeb, path): - if hasattr(path, 'write'): - return self._dump_stream(oeb, path) - with open(path, 'w+b') as stream: - return self._dump_stream(oeb, stream) - - def _write(self, * data): - for datum in data: - self._stream.write(datum) - - def _tell(self): - return self._stream.tell() - - def _dump_stream(self, oeb, stream): - self._oeb = oeb - self._stream = stream - self._records = [None] - self._generate_content() - self._generate_record0() - self._write_header() - self._write_content() - - def _generate_content(self): - self._map_image_names() - self._generate_text() - - if INDEXING and self._indexable : - try: - self._generate_index() - except: - self._oeb.log.exception('Failed to generate index') - - self._generate_images() - - def _map_image_names(self): - index = 1 - self._images = images = {} - mh_href = None - - if 'masthead' in self._oeb.guide: - mh_href = self._oeb.guide['masthead'].href - images[mh_href] = 1 - index += 1 - - for item in self._oeb.manifest.values(): - if item.media_type in OEB_RASTER_IMAGES: - if item.href == mh_href: continue - images[item.href] = index - index += 1 - - def _read_text_record(self, text): - pos = text.tell() - text.seek(0, 2) - npos = min((pos + RECORD_SIZE, text.tell())) - last = '' - while not last.decode('utf-8', 'ignore'): - size = len(last) + 1 - text.seek(npos - size) - last = text.read(size) - extra = 0 - try: - last.decode('utf-8') - except UnicodeDecodeError: - prev = len(last) - while True: - text.seek(npos - prev) - last = text.read(len(last) + 1) - try: - last.decode('utf-8') - except UnicodeDecodeError: - pass - else: - break - extra = len(last) - prev - text.seek(pos) - data = text.read(RECORD_SIZE) - overlap = text.read(extra) - text.seek(npos) - return data, overlap - - # TBS {{{ - def _generate_flat_indexed_navpoints(self): - # Assemble a HTMLRecordData instance for each HTML record - # Return True if valid, False if invalid - self._oeb.logger.info('Indexing flat navPoints ...') - - numberOfHTMLRecords = ( self._content_length // RECORD_SIZE ) + 1 - - # Create a list of HTMLRecordData class instances - x = numberOfHTMLRecords - while x: - self._HTMLRecords.append(HTMLRecordData()) - x -= 1 - - toc = self._oeb.toc - myIndex = 0 - myEndingRecord = 0 - previousOffset = 0 - previousLength = 0 - offset = 0 - length = 0 - entries = list(toc.iter())[1:] - - # Get offset, length per entry - for (i, child) in enumerate(entries): - if not child.title or not child.title.strip(): - child.title = "(none)" - - if not child.title or not child.title.strip(): - child.title = "(none)" - - h = child.href - if h not in self._id_offsets: - self._oeb.log.warning(' Could not find TOC entry "%s", aborting indexing ...'% child.title) - return False - offset = self._id_offsets[h] - - length = None - - for sibling in entries[i+1:]: - h2 = sibling.href - if h2 in self._id_offsets: - offset2 = self._id_offsets[h2] - if offset2 > offset: - length = offset2 - offset - break - - if length is None: - length = self._content_length - offset - - if self.opts.verbose > 3 : - self._oeb.logger.info("child %03d: %s" % (i, child)) - self._oeb.logger.info(" title: %s" % child.title) - self._oeb.logger.info(" depth: %d" % child.depth()) - self._oeb.logger.info(" offset: 0x%06X \tlength: 0x%06X \tnext: 0x%06X" % (offset, length, offset + length)) - - # Look a gap between chapter nodes. Don't evaluate periodical or section nodes - if (i and child.depth() == 1 and entries[i-1].depth() == 1) : - if offset != previousOffset + previousLength : - self._oeb.log.warning("*** TOC discontinuity ***") - self._oeb.log.warning(" node %03d: '%s' offset: 0x%X length: 0x%X" % \ - (i-1, entries[i-1].title, previousOffset, previousLength) ) - self._oeb.log.warning(" node %03d: '%s' offset: 0x%X != 0x%06X" % \ - (i, child.title, offset, previousOffset + previousLength) ) - self._oeb.log.warning('_generate_flat_indexed_navpoints: Failed to generate index') - # Zero out self._HTMLRecords, return False - self._HTMLRecords = [] - #last_name = None - return False - - previousOffset = offset - previousLength = length - - # Calculate the HTML record for this entry - myStartingRecord = offset // RECORD_SIZE - - # If no one has taken the openingNode slot, it must be us - if self._HTMLRecords[myStartingRecord].openingNode == -1 : - self._HTMLRecords[myStartingRecord].openingNode = myIndex - - # Bump the node count for this HTML record - # Special case if we're the first so we get a true node count - if self._HTMLRecords[myStartingRecord].currentSectionNodeCount == -1: - self._HTMLRecords[myStartingRecord].currentSectionNodeCount = 1 - else: - self._HTMLRecords[myStartingRecord].currentSectionNodeCount += 1 - - # Calculate the ending HTMLRecord of this entry - myEndingRecord = (offset + length) // RECORD_SIZE - - if myEndingRecord > myStartingRecord : - interimSpanRecord = myStartingRecord + 1 - while interimSpanRecord <= myEndingRecord : - self._HTMLRecords[interimSpanRecord].continuingNode = myIndex - self._HTMLRecords[interimSpanRecord].currentSectionNodeCount = 1 - interimSpanRecord += 1 - if self.opts.verbose > 3 :self._oeb.logger.info(" node %03d: %-15.15s... spans HTML records %03d - %03d \t offset: 0x%06X length: 0x%06X" % \ - (myIndex, child.title if child.title.strip() > "" else "(missing)", myStartingRecord, interimSpanRecord, offset, length) ) - else : - if self.opts.verbose > 3 : self._oeb.logger.info(" node %03d: %-15.15s... spans HTML records %03d - %03d \t offset: 0x%06X length: 0x%06X" % \ - (myIndex, child.title if child.title.strip() > "" else "(missing)", myStartingRecord, myStartingRecord, offset, length) ) - - myIndex += 1 - - # Successfully parsed the entries - return True - - def _generate_indexed_navpoints(self): - # Assemble a HTMLRecordData instance for each HTML record - # Return True if valid, False if invalid - self._oeb.logger.info('Indexing navPoints ...') - - numberOfHTMLRecords = ( self._content_length // RECORD_SIZE ) + 1 - - # Create a list of HTMLRecordData class instances - x = numberOfHTMLRecords - while x: - self._HTMLRecords.append(HTMLRecordData()) - x -= 1 - - toc = self._oeb.toc - myIndex = 0 - myEndingRecord = 0 - previousOffset = 0 - previousLength = 0 - offset = 0 - length = 0 - sectionChangedInRecordNumber = -1 - sectionChangesInThisRecord = False - entries = list(toc.iter())[1:] - - # Get offset, length per entry - for (firstSequentialNode, node) in enumerate(list(self._ctoc_map)) : - if node['klass'] != 'article' and node['klass'] != 'chapter' : - # Skip periodical and section entries - continue - else : - if self.opts.verbose > 3 :self._oeb.logger.info("\tFirst sequential node: %03d" % firstSequentialNode) - break - - for i, child in enumerate(entries): - # Entries continues with a stream of section+articles, section+articles ... - h = child.href - if h not in self._id_offsets: - self._oeb.log.warning(' Could not find TOC entry "%s", aborting indexing ...'% child.title) - return False - offset = self._id_offsets[h] - - length = None - - for sibling in entries[i+1:]: - h2 = sibling.href - if h2 in self._id_offsets: - offset2 = self._id_offsets[h2] - if offset2 > offset: - length = offset2 - offset - break - - if length is None: - length = self._content_length - offset - - if self.opts.verbose > 3 : - self._oeb.logger.info("child %03d: %s" % (i, child)) - self._oeb.logger.info(" title: %s" % child.title) - self._oeb.logger.info(" depth: %d" % child.depth()) - self._oeb.logger.info(" offset: 0x%06X \tlength: 0x%06X \tnext: 0x%06X" % (offset, length, offset + length)) - - # Look a gap between nodes, articles/chapters only, as - # periodical and section lengths cover spans of articles - if (i>firstSequentialNode) and self._ctoc_map[i-1]['klass'] != 'section': - if offset != previousOffset + previousLength : - self._oeb.log.warning("*** TOC discontinuity: nodes are not sequential ***") - self._oeb.log.info(" node %03d: '%s' offset: 0x%X length: 0x%X" % \ - (i-1, entries[i-1].title, previousOffset, previousLength) ) - self._oeb.log.warning(" node %03d: '%s' offset: 0x%X != 0x%06X" % \ - (i, child.title, offset, previousOffset + previousLength) ) - # self._oeb.log.warning("\tnode data %03d: %s" % (i-1, self._ctoc_map[i-1]) ) - # self._oeb.log.warning("\tnode data %03d: %s" % (i, self._ctoc_map[i]) ) - # Dump the offending entry - self._oeb.log.info("...") - for z in range(i-6 if i-6 > 0 else 0, i+6 if i+6 < len(entries) else len(entries)): - if z == i: - self._oeb.log.warning("child %03d: %s" % (z, entries[z])) - else: - self._oeb.log.info("child %03d: %s" % (z, entries[z])) - self._oeb.log.info("...") - - self._oeb.log.warning('_generate_indexed_navpoints: Failed to generate index') - # Zero out self._HTMLRecords, return False - self._HTMLRecords = [] - return False - - previousOffset = offset - previousLength = length - - # Calculate the HTML record for this entry - thisRecord = offset // RECORD_SIZE - - # Store the current continuingNodeParent and openingNodeParent - if self._ctoc_map[i]['klass'] == 'article': - if thisRecord > 0 : - if sectionChangesInThisRecord : # <<< - self._HTMLRecords[thisRecord].continuingNodeParent = self._currentSectionIndex - 1 - else : - self._HTMLRecords[thisRecord].continuingNodeParent = self._currentSectionIndex - - # periodical header? - if self._ctoc_map[i]['klass'] == 'periodical' : - # INCREMENT currentSectionNode count - # Commented out because structured docs don't count section changes in nodeCount - # compensation at 948 for flat periodicals - # self._HTMLRecords[thisRecord].currentSectionNodeCount = 1 - continue - - # Is this node a new section? - if self._ctoc_map[i]['klass'] == 'section' : - # INCREMENT currentSectionNode count - # Commented out because structured docs don't count section changes in nodeCount - # self._HTMLRecords[thisRecord].currentSectionNodeCount += 1 - - # *** This should check currentSectionNumber, because content could start late - if thisRecord > 0: - sectionChangesInThisRecord = True - #sectionChangesInRecordNumber = thisRecord - self._currentSectionIndex += 1 - self._HTMLRecords[thisRecord].nextSectionNumber = self._currentSectionIndex - # The following node opens the nextSection - self._HTMLRecords[thisRecord].nextSectionOpeningNode = myIndex - continue - else : - continue - - - # If no one has taken the openingNode slot, it must be us - # This could happen before detecting a section change - if self._HTMLRecords[thisRecord].openingNode == -1 : - self._HTMLRecords[thisRecord].openingNode = myIndex - self._HTMLRecords[thisRecord].openingNodeParent = self._currentSectionIndex - - # Bump the nextSection node count while we're in the same record - if sectionChangedInRecordNumber == thisRecord : - if self._ctoc_map[i]['klass'] == 'article' : - if self._HTMLRecords[thisRecord].nextSectionNodeCount == -1: - self._HTMLRecords[thisRecord].nextSectionNodeCount = 1 - else: - self._HTMLRecords[thisRecord].nextSectionNodeCount += 1 - else : - # Bump the currentSectionNodeCount one last time - self._HTMLRecords[thisRecord].currentSectionNodeCount += 1 - - else : - # Reset the change record - # sectionChangedInRecordNumber = -1 - sectionChangesInThisRecord = False - if self._HTMLRecords[thisRecord].currentSectionNodeCount == -1: - self._HTMLRecords[thisRecord].currentSectionNodeCount = 1 - else: - self._HTMLRecords[thisRecord].currentSectionNodeCount += 1 - - # Fill in the spanning records - myEndingRecord = (offset + length) // RECORD_SIZE - if myEndingRecord > thisRecord : - sectionChangesInThisRecord = False - interimSpanRecord = thisRecord + 1 - while interimSpanRecord <= myEndingRecord : - self._HTMLRecords[interimSpanRecord].continuingNode = myIndex - - self._HTMLRecords[interimSpanRecord].continuingNodeParent = self._currentSectionIndex - self._HTMLRecords[interimSpanRecord].currentSectionNodeCount = 1 - interimSpanRecord += 1 - - if self.opts.verbose > 3 :self._oeb.logger.info(" node: %03d %-10.10s %-15.15s... spans HTML records %03d-%03d \t offset: 0x%06X length: 0x%06X" % \ - (myIndex, self._ctoc_map[i]['klass'], child.title if child.title.strip() > "" else "(missing)", thisRecord, interimSpanRecord, offset, length) ) - elif thisRecord == numberOfHTMLRecords-1: - # Check for short terminating record (GR provisional) - if self._HTMLRecords[thisRecord].continuingNode == -1: - self._HTMLRecords[thisRecord].continuingNode = self._HTMLRecords[thisRecord].openingNode - 1 - else : - if self.opts.verbose > 3 : self._oeb.logger.info(" node: %03d %-10.10s %-15.15s... spans HTML records %03d-%03d \t offset: 0x%06X length: 0x%06X" % \ - (myIndex, self._ctoc_map[i]['klass'], child.title if child.title.strip() > "" else "(missing)", thisRecord, thisRecord, offset, length) ) - - myIndex += 1 - - # Successfully parsed the entries - return True - - def _generate_tbs_book(self, nrecords, lastrecord): - if self.opts.verbose > 3 :self._oeb.logger.info("Assembling TBS for Book: HTML record %03d of %03d" % \ - (nrecords, lastrecord) ) - # Variables for trailing byte sequence - tbsType = 0x00 - tbSequence = "" - - # Generate TBS for type 0x002 - mobi_book - if self._initialIndexRecordFound == False : - - # Is there any indexed content yet? - if self._HTMLRecords[nrecords].currentSectionNodeCount == -1 : - # No indexing data - write vwi length of 1 only - tbSequence = decint(len(tbSequence) + 1, DECINT_FORWARD) - - else : - # First indexed HTML record is a special case - # One or more nodes - self._initialIndexRecordFound = True - if self._HTMLRecords[nrecords].currentSectionNodeCount == 1 : - tbsType = 2 - else : - tbsType = 6 - - tbSequence = decint(tbsType, DECINT_FORWARD) - tbSequence += decint(0x00, DECINT_FORWARD) - # Don't write a nodecount for opening type 2 record - if tbsType != 2 : - # Check that <> -1 - tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) - tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) - - else : - # Determine tbsType for indexed HTMLRecords - if nrecords == lastrecord and self._HTMLRecords[nrecords].currentSectionNodeCount == 1 : - # Ending record with singleton node - tbsType = 2 - - elif self._HTMLRecords[nrecords].continuingNode > 0 and self._HTMLRecords[nrecords].openingNode == -1 : - # This is a span-only record - tbsType = 3 - # Zero out the nodeCount with a pre-formed vwi - self._HTMLRecords[nrecords].currentSectionNodeCount = 0x80 - - else : - tbsType = 6 - - - # Shift the openingNode index << 3 - shiftedNCXEntry = self._HTMLRecords[nrecords].continuingNode << 3 - # Add the TBS type - shiftedNCXEntry |= tbsType - - # Assemble the TBS - tbSequence = decint(shiftedNCXEntry, DECINT_FORWARD) - tbSequence += decint(0x00, DECINT_FORWARD) - # Don't write a nodecount for terminating type 2 record - if tbsType != 2 : - tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) - tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) - - self._tbSequence = tbSequence - - def _generate_tbs_flat_periodical(self, nrecords, lastrecord): - # Flat periodicals <0x102> have a single section for all articles - # Structured periodicals <0x101 | 0x103> have one or more sections with articles - # The first section TBS sequence is different for Flat and Structured - # This function is called once per HTML record - - # Variables for trailing byte sequence - tbsType = 0x00 - tbSequence = "" - - # Generate TBS for type 0x102 - mobi_feed - flat periodical - if self._initialIndexRecordFound == False : - # Is there any indexed content yet? - if self._HTMLRecords[nrecords].currentSectionNodeCount == -1 : - # No indexing data - write vwi length of 1 only - tbSequence = decint(len(tbSequence) + 1, DECINT_FORWARD) - - else : - # First indexed record: Type 6 with nodeCount only - self._initialIndexRecordFound = True - tbsType = 6 - tbSequence = decint(tbsType, DECINT_FORWARD) - tbSequence += decint(0x00, DECINT_FORWARD) - # nodeCount = 0xDF + 0xFF + n(0x3F) - need to add 2 because we didn't count them earlier - tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount + 2) - tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) - if self.opts.verbose > 2 : - self._oeb.logger.info("\nAssembling TBS for Flat Periodical: HTML record %03d of %03d, section %d" % \ - (nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent ) ) - self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb) - - else : - # An HTML record with nextSectionNumber = -1 has no section change in this record - # Default for flat periodicals with only one section - if self.opts.verbose > 2 : - self._oeb.logger.info("\nAssembling TBS for Flat Periodical: HTML record %03d of %03d, section %d" % \ - (nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent ) ) - self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb) - - # First section has different Type values - # Determine tbsType for HTMLRecords > 0 - if nrecords == lastrecord and self._HTMLRecords[nrecords].currentSectionNodeCount == 1 : - # Ending record with singleton node - tbsType = 6 - - # Assemble the Type 6 TBS - tbSequence = decint(tbsType, DECINT_FORWARD) # Type - tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80 - tbSequence += chr(2) # arg2 = 0x02 - - # Assemble arg3 - (article index +1) << 4 + flag: 1 = article spans this record - arg3 = self._HTMLRecords[nrecords].continuingNode - arg3 += 1 - arg3 <<= 4 - arg3 |= 0x0 #flags = 0 - tbSequence += decint(arg3, DECINT_FORWARD) # arg3 - - - # tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) # nodeCount - tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len - - elif self._HTMLRecords[nrecords].continuingNode > 0 and self._HTMLRecords[nrecords].openingNode == -1 : - # This is a span-only record - tbsType = 6 - # Zero out the nodeCount with a pre-formed vwi - self._HTMLRecords[nrecords].currentSectionNodeCount = 0x80 - - # Assemble the Type 6 TBS - tbSequence = decint(tbsType, DECINT_FORWARD) # Type - tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80 - tbSequence += chr(2) # arg2 = 0x02 - # Assemble arg3 - article index << 3 + flag: 1 = article spans this record - arg3 = self._HTMLRecords[nrecords].continuingNode - # Add the index of the openingNodeParent to get the offset start - # We know that section 0 is at position 1, section 1 at index 2, etc. - arg3 += self._HTMLRecords[nrecords].continuingNodeParent + 1 - arg3 <<= 4 - arg3 |= 0x01 - tbSequence += decint(arg3, DECINT_FORWARD) # arg3 - tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) # nodeCount - tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len - - else : - tbsType = 7 - # Assemble the Type 7 TBS - tbSequence = decint(tbsType, DECINT_FORWARD) # Type - tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80 - tbSequence += chr(2) # arg2 = 0x02 - tbSequence += decint(0x00, DECINT_FORWARD) # arg3 = 0x80 - # Assemble arg4 - article index << 4 + flag: 1 = article spans this record - arg4 = self._HTMLRecords[nrecords].continuingNode - # Add the index of the openingNodeParent to get the offset start - # We know that section 0 is at position 1, section 1 at index 2, etc. - arg4 += self._HTMLRecords[nrecords].continuingNodeParent + 1 - arg4 <<= 4 - arg4 |= 0x04 # 4: multiple nodes - tbSequence += decint(arg4, DECINT_FORWARD) # arg4 - tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) # nodeCount - tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len - - self._tbSequence = tbSequence - - def _generate_tbs_structured_periodical(self, nrecords, lastrecord): - # Structured periodicals <0x101 | 0x103> have one or more sections for all articles - # The first section TBS sequences is different for Flat and Structured - # This function is called once per HTML record - - # Variables for trailing byte sequence - tbsType = 0x00 - tbSequence = "" - - # Generate TBS for type 0x101/0x103 - structured periodical - if self._initialIndexRecordFound == False : - # Is there any indexed content yet? - if self._HTMLRecords[nrecords].currentSectionNodeCount == -1 : - # No indexing data - write vwi length of 1 only - tbSequence = decint(len(tbSequence) + 1, DECINT_FORWARD) - - else : - self._initialIndexRecordFound = True - - if self.opts.verbose > 2 : - self._oeb.logger.info("\nAssembling TBS for Structured Periodical: HTML record %03d of %03d, section %d" % \ - (nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent ) ) - self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb) - - # First record only - tbsType = 6 - # Assemble the Type 6 TBS - tbSequence = decint(tbsType, DECINT_FORWARD) # Type - tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80 - tbSequence += chr(2) # arg2 = 0x02 - # Assemble arg3: (section jump + article index) << 4 + flag: 1 = article spans this record - arg3 = self._sectionCount # Jump over the section group - arg3 += 0 # First article index = 0 - arg3 <<= 4 - arg3 |= 0x04 - tbSequence += decint(arg3, DECINT_FORWARD) # arg3 - - # Structured periodicals don't count periodical, section in nodeCount - tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) # nodeCount - tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len - else : - if self._firstSectionConcluded == False : - # Use type 6 & 7 until first section switch, then 2 - - if self._HTMLRecords[nrecords].nextSectionNumber == -1 : - # An HTML record with nextSectionNumber = -1 has no section change in this record - if self.opts.verbose > 2 : - self._oeb.logger.info("\nAssembling TBS for Structured Periodical: HTML record %03d of %03d, section %d" % \ - (nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent ) ) - self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb) - - # First section has different Type values - # Determine tbsType for HTMLRecords > 0 - if nrecords == lastrecord and self._HTMLRecords[nrecords].currentSectionNodeCount == 1 : - # Ending record with singleton node - tbsType = 6 - - # Assemble the Type 6 TBS - tbSequence = decint(tbsType, DECINT_FORWARD) # Type - tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80 - tbSequence += chr(2) # arg2 = 0x02 - # Assemble arg3: (section jump + article index) << 4 + flag: 1 = article spans this record - arg3 = self._sectionCount - arg3 += self._HTMLRecords[nrecords].continuingNode - arg3 <<= 4 - arg3 |= 0x04 - tbSequence += decint(arg3, DECINT_FORWARD) # arg3 - tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) # nodeCount - tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len - - elif self._HTMLRecords[nrecords].continuingNode > 0 and self._HTMLRecords[nrecords].openingNode == -1 : - # This is a span-only record - tbsType = 6 - # Zero out the nodeCount with a pre-formed vwi - self._HTMLRecords[nrecords].currentSectionNodeCount = 0x80 - - # Assemble the Type 6 TBS - tbSequence = decint(tbsType, DECINT_FORWARD) # Type - tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80 - tbSequence += chr(2) # arg2 = 0x02 - # Assemble arg3: (section jump + article index) << 4 + flag: 1 = article spans this record - arg3 = self._sectionCount - arg3 += self._HTMLRecords[nrecords].continuingNode - arg3 <<= 4 - arg3 |= 0x01 - tbSequence += decint(arg3, DECINT_FORWARD) # arg3 - tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) # nodeCount - tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len - - else : - tbsType = 7 - # Assemble the Type 7 TBS - tbSequence = decint(tbsType, DECINT_FORWARD) # Type - tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80 - tbSequence += chr(2) # arg2 = 0x02 - tbSequence += decint(0x00, DECINT_FORWARD) # arg3 = 0x80 - # Assemble arg4: (section jump + article index) << 4 + flag: 1 = article spans this record - arg4 = self._sectionCount - arg4 += self._HTMLRecords[nrecords].continuingNode - arg4 <<= 4 - arg4 |= 0x04 # 4: multiple nodes - tbSequence += decint(arg4, DECINT_FORWARD) # arg4 - tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) # nodeCount - tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len - - - # Initial section switch from section 1 - elif self._HTMLRecords[nrecords].nextSectionNumber > 0 : - tbsType = 3 - - if self.opts.verbose > 2 : - self._oeb.logger.info("\nAssembling TBS for Structured Periodical: HTML record %03d of %03d, switching sections %d-%d" % \ - (nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent, self._HTMLRecords[nrecords].nextSectionNumber) ) - self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb) - - tbSequence = decint(tbsType, DECINT_FORWARD) # Type - tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80 - tbSequence += decint(0x00, DECINT_FORWARD) # arg2 = 0x80 - - # Assemble arg3: Upper nybble: ending section index - # Lower nybble = flags for next section - 0 or 1 - arg3 = (self._HTMLRecords[nrecords].continuingNodeParent + 1) << 4 - arg3Flags = 0 # 0: has nodes? - arg3 |= arg3Flags - tbSequence += decint(arg3, DECINT_FORWARD) - - # Assemble arg4: Upper nybble: continuingNode << 4 - # Lower nybble: flag: 0 = no starting nodes from previous section - # flag: 4 = starting nodes from previous section - - sectionBase = self._HTMLRecords[nrecords].continuingNodeParent - sectionDelta = self._sectionCount - sectionBase - 1 - articleOffset = self._HTMLRecords[nrecords].continuingNode + 1 - arg4 = (sectionDelta + articleOffset) << 4 - - arg4Flags = 0 - if self._HTMLRecords[nrecords].currentSectionNodeCount > 1 : - arg4Flags = 4 - else : - arg4Flags = 0 - arg4 |= arg4Flags - tbSequence += decint(arg4, DECINT_FORWARD) # arg4 - - # Write optional 4a if previous section node count > 1 - if arg4Flags == 4 : # arg4a - nodeCountValue = self._HTMLRecords[nrecords].currentSectionNodeCount - nodeCountValue = 0x80 if nodeCountValue == 0 else nodeCountValue - tbSequence += chr(nodeCountValue) - - # Write article2: not completely understood - arg5 = sectionDelta + articleOffset - if self._HTMLRecords[nrecords].currentSectionNodeCount < 2: - arg5 -= 1 - arg5 <<= 4 - arg5Flags = 8 - arg5 |= arg5Flags - tbSequence += decint(arg5, DECINT_FORWARD) # arg5 - - # Write first article of new section - #arg6 = self._sectionCount - 1 # We're now into the following section - #arg6 = self._HTMLRecords[nrecords].nextSectionNumber - arg6 = sectionDelta + self._HTMLRecords[nrecords].nextSectionOpeningNode - arg6 <<= 4 - if self._HTMLRecords[nrecords].nextSectionNodeCount > 1 : - arg6Flags = 4 - else : - arg6Flags = 0 - arg6 |= arg6Flags - tbSequence += decint(arg6, DECINT_FORWARD) # arg5 - - # Write optional 6a if previous section node count > 1 - if arg6Flags == 4 : # arg4a - nodeCountValue = self._HTMLRecords[nrecords].nextSectionNodeCount - nodeCountValue = 0x80 if nodeCountValue == 0 else nodeCountValue - tbSequence += chr(nodeCountValue) - - tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len - - self._firstSectionConcluded = True - else : - # After first section switch, use types 2 and 3 - if self._HTMLRecords[nrecords].nextSectionNumber == -1 : - if self.opts.verbose > 2 : - self._oeb.logger.info("\nAssembling TBS for Structured Periodical: HTML record %03d of %03d, section %d" % \ - (nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent ) ) - self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb) - - tbsType = 2 - tbSequence = decint(tbsType, DECINT_FORWARD) # Type - tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80 - arg2 = self._HTMLRecords[nrecords].continuingNodeParent + 1 - arg2 <<= 4 - # Add flag = 1 if there are multiple nodes in this record - arg2Flags = 0 - if self._HTMLRecords[nrecords].currentSectionNodeCount > 0 : - arg2Flags = 1 - arg2 |= arg2Flags - tbSequence += decint(arg2, DECINT_FORWARD) - - if arg2Flags : - # Add an extra vwi 0x00 - tbSequence += decint(0x00, DECINT_FORWARD) # arg2Flags = 0x80 - - # arg3 - offset of continuingNode from sectionParent - arg3 = self._sectionCount - self._HTMLRecords[nrecords].continuingNodeParent # Total guess - arg3 += self._HTMLRecords[nrecords].continuingNode - arg3 <<= 4 - arg3Flags = 1 - if self._HTMLRecords[nrecords].currentSectionNodeCount > 0 : - arg3Flags = 4 - arg3 |= arg3Flags - tbSequence += decint(arg3, DECINT_FORWARD) - - if arg3Flags == 4 : - nodeCountValue = self._HTMLRecords[nrecords].currentSectionNodeCount - nodeCountValue = 0x80 if nodeCountValue == 0 else nodeCountValue - tbSequence += chr(nodeCountValue) - else : - tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80 - - tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len - - else : - # Section switch when section > 1 - tbsType = 3 - - if self.opts.verbose > 2 : - self._oeb.logger.info("\nAssembling TBS for Structured Periodical: HTML record %03d of %03d, switching sections %d-%d" % \ - (nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent, self._HTMLRecords[nrecords].nextSectionNumber) ) - self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb) - - tbSequence = decint(tbsType, DECINT_FORWARD) # Type - tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80 - tbSequence += decint(0x00, DECINT_FORWARD) # arg2 = 0x80 - - # arg3: continuingNodeParent section - # Upper nybble: ending section index - # Lower nybble = flags for next section - 0 or 1 - arg3 = (self._HTMLRecords[nrecords].continuingNodeParent + 1) << 4 - arg3Flags = 0 # 0: has nodes? - arg3 |= arg3Flags - tbSequence += decint(arg3, DECINT_FORWARD) - - # Assemble arg4: Upper nybble: continuingNode << 4 - # Lower nybble: flag: 0 = no starting nodes from previous section - # flag: 4 = starting nodes from previous section - sectionBase = self._HTMLRecords[nrecords].continuingNodeParent - sectionDelta = self._sectionCount - sectionBase - 1 - articleOffset = self._HTMLRecords[nrecords].continuingNode + 1 - arg4 = (sectionDelta + articleOffset) << 4 - - arg4Flags = 0 - if self._HTMLRecords[nrecords].currentSectionNodeCount > 1 : - arg4Flags = 4 - else : - arg4Flags = 0 - arg4 |= arg4Flags - tbSequence += decint(arg4, DECINT_FORWARD) # arg4 - - # Write optional 4a if previous section node count > 1 - if arg4Flags == 4 : # arg4a - nodeCountValue = self._HTMLRecords[nrecords].currentSectionNodeCount - nodeCountValue = 0x80 if nodeCountValue == 0 else nodeCountValue - tbSequence += chr(nodeCountValue) - - # Write article2: not completely understood - arg5 = sectionDelta + articleOffset - if self._HTMLRecords[nrecords].currentSectionNodeCount < 2: - arg5 -= 1 - arg5 <<= 4 - arg5Flags = 8 - arg5 |= arg5Flags - tbSequence += decint(arg5, DECINT_FORWARD) # arg5 - - # Write first article of new section - arg6 = sectionDelta + self._HTMLRecords[nrecords].nextSectionOpeningNode - arg6 <<= 4 - if self._HTMLRecords[nrecords].nextSectionNodeCount > 1 : - arg6Flags = 4 - else : - arg6Flags = 0 - arg6 |= arg6Flags - tbSequence += decint(arg6, DECINT_FORWARD) # arg5 - - # Write optional 6a if previous section node count > 1 - if arg6Flags == 4 : # arg4a - nodeCountValue = self._HTMLRecords[nrecords].nextSectionNodeCount - nodeCountValue = 0x80 if nodeCountValue == 0 else nodeCountValue - tbSequence += chr(nodeCountValue) - - tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len - - self._tbSequence = tbSequence - - # }}} - - def _evaluate_periodical_toc(self): - ''' - Periodical: - depth=4 - depth=3 1 - depth=2 1 or more - depth=1 multiple - Book: - depth=2 - depth=1 multiple - ''' - toc = self._oeb.toc - nodes = list(toc.iter())[1:] - toc_conforms = True - for child in nodes: - if child.klass == "periodical" and child.depth() != 3 or \ - child.klass == "section" and child.depth() != 2 or \ - child.klass == "article" and child.depth() != 1 : - - self._oeb.logger.warn('Nonconforming TOC entry: "%s" found at depth %d' % \ - (child.klass, child.depth()) ) - self._oeb.logger.warn(" : '%-25.25s...' \t\tklass=%-15.15s \tdepth:%d \tplayOrder=%03d" % \ - (child.title, child.klass, child.depth(), child.play_order) ) - toc_conforms = False - - # We also need to know that we have a pubdate or timestamp in the metadata, which the Kindle needs - if self._oeb.metadata['date'] == [] and self._oeb.metadata['timestamp'] == [] : - self._oeb.logger.info('metadata missing date/timestamp') - toc_conforms = False - - if not 'masthead' in self._oeb.guide : - self._oeb.logger.info('mastheadImage missing from manifest') - toc_conforms = False - - self._oeb.logger.info("%s" % " TOC structure conforms" if toc_conforms else " TOC structure non-conforming") - return toc_conforms - - def _generate_text(self): - self._oeb.logger.info('Serializing markup content...') - serializer = Serializer(self._oeb, self._images, - write_page_breaks_after_item=self.write_page_breaks_after_item) - breaks = serializer.breaks - text = serializer.text - self._anchor_offset_kindle = serializer.anchor_offset_kindle - self._id_offsets = serializer.id_offsets - self._content_length = len(text) - self._text_length = len(text) - text = StringIO(text) - buf = [] - nrecords = 0 - lastrecord = (self._content_length // RECORD_SIZE ) - offset = 0 - - if self._compression != UNCOMPRESSED: - self._oeb.logger.info(' Compressing markup content...') - data, overlap = self._read_text_record(text) - - if not self.opts.mobi_periodical: - self._flatten_toc() - - # Evaluate toc for conformance - if self.opts.mobi_periodical : - self._oeb.logger.info(' MOBI periodical specified, evaluating TOC for periodical conformance ...') - self._conforming_periodical_toc = self._evaluate_periodical_toc() - - # This routine decides whether to build flat or structured based on self._conforming_periodical_toc - # self._ctoc = self._generate_ctoc() - - # There may be multiple CNCX records built below, but the last record is returned and should be stored - self._ctoc_records.append(self._generate_ctoc()) - - # Build the HTMLRecords list so we can assemble the trailing bytes sequences in the following while loop - toc = self._oeb.toc - entries = list(toc.iter())[1:] - - if len(entries) : - self._indexable = self._generate_indexed_navpoints() - else : - self._oeb.logger.info(' No entries found in TOC ...') - self._indexable = False - - if not self._indexable : - self._oeb.logger.info(' Writing unindexed mobi ...') - - while len(data) > 0: - if self._compression == PALMDOC: - data = compress_doc(data) - record = StringIO() - record.write(data) - # Write trailing muti-byte sequence if any - record.write(overlap) - record.write(pack('>B', len(overlap))) - - if WRITE_PBREAKS : - nextra = 0 - pbreak = 0 - running = offset - while breaks and (breaks[0] - offset) < RECORD_SIZE: - # .pop returns item, removes it from list - pbreak = (breaks.pop(0) - running) >> 3 - if self.opts.verbose > 2 : - self._oeb.logger.info('pbreak = 0x%X at 0x%X' % (pbreak, record.tell()) ) - encoded = decint(pbreak, DECINT_FORWARD) - record.write(encoded) - running += pbreak << 3 - nextra += len(encoded) - lsize = 1 - while True: - size = decint(nextra + lsize, DECINT_BACKWARD) - if len(size) == lsize: - break - lsize += 1 - record.write(size) - - # Write Trailing Byte Sequence - if INDEXING and self._indexable: - # Dispatch to different TBS generators based upon publication type - booktype = self._MobiDoc.mobiType - if booktype == 0x002 : - self._generate_tbs_book(nrecords, lastrecord) - elif booktype == 0x102 : - self._generate_tbs_flat_periodical(nrecords, lastrecord) - elif booktype == 0x101 or booktype == 0x103 : - self._generate_tbs_structured_periodical(nrecords, lastrecord) - else : - raise NotImplementedError('Indexing for mobitype 0x%X not implemented' % booktype) - - # Write the sequence - record.write(self._tbSequence) - - self._records.append(record.getvalue()) - buf.append(self._records[-1]) - nrecords += 1 - offset += RECORD_SIZE - data, overlap = self._read_text_record(text) - - if INDEXING: - extra = sum(map(len, buf))%4 - if extra == 0: - extra = 4 - self._records.append('\0'*(4-extra)) - nrecords += 1 - self._text_nrecords = nrecords - - def _generate_images(self): - self._oeb.logger.info('Serializing images...') - images = [(index, href) for href, index in self._images.items()] - images.sort() - self._first_image_record = None - for _, href in images: - item = self._oeb.manifest.hrefs[href] - try: - data = rescale_image(item.data, self._imagemax) - except: - self._oeb.logger.warn('Bad image file %r' % item.href) - continue - finally: - item.unload_data_from_memory() - self._records.append(data) - if self._first_image_record is None: - self._first_image_record = len(self._records)-1 - - def _generate_end_records(self): - if FCIS_FLIS : - # This adds the binary blobs of FLIS and FCIS, which don't seem to be necessary - self._flis_number = len(self._records) - self._records.append( - 'FLIS\0\0\0\x08\0\x41\0\0\0\0\0\0\xff\xff\xff\xff\0\x01\0\x03\0\0\0\x03\0\0\0\x01'+ - '\xff'*4) - fcis = 'FCIS\x00\x00\x00\x14\x00\x00\x00\x10\x00\x00\x00\x01\x00\x00\x00\x00' - fcis += pack('>I', self._text_length) - fcis += '\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x08\x00\x01\x00\x01\x00\x00\x00\x00' - self._fcis_number = len(self._records) - self._records.append(fcis) - self._records.append('\xE9\x8E\x0D\x0A') - - else : - self._flis_number = len(self._records) - self._records.append('\xE9\x8E\x0D\x0A') - - def _generate_record0(self): - metadata = self._oeb.metadata - exth = self._build_exth() - last_content_record = len(self._records) - 1 - - ''' - if INDEXING and self._indexable: - self._generate_end_records() - ''' - self._generate_end_records() - - record0 = StringIO() - # The PalmDOC Header - record0.write(pack('>HHIHHHH', self._compression, 0, - self._text_length, - self._text_nrecords-1, RECORD_SIZE, 0, 0)) # 0 - 15 (0x0 - 0xf) - uid = random.randint(0, 0xffffffff) - title = normalize(unicode(metadata.title[0])).encode('utf-8') - # The MOBI Header - - # 0x0 - 0x3 - record0.write('MOBI') - - # 0x4 - 0x7 : Length of header - # 0x8 - 0x11 : MOBI type - # type meaning - # 0x002 MOBI book (chapter - chapter navigation) - # 0x101 News - Hierarchical navigation with sections and articles - # 0x102 News feed - Flat navigation - # 0x103 News magazine - same as 0x101 - # 0xC - 0xF : Text encoding (65001 is utf-8) - # 0x10 - 0x13 : UID - # 0x14 - 0x17 : Generator version - - btype = self._MobiDoc.mobiType - - record0.write(pack('>IIIII', - 0xe8, btype, 65001, uid, 6)) - - # 0x18 - 0x1f : Unknown - record0.write('\xff' * 8) - - - # 0x20 - 0x23 : Secondary index record - if btype < 0x100 : - record0.write(pack('>I', 0xffffffff)) - elif btype > 0x100 and self._indexable : - if self._primary_index_record is None: - record0.write(pack('>I', 0xffffffff)) - else: - record0.write(pack('>I', self._primary_index_record + 2 + len(self._ctoc_records))) - else : - record0.write(pack('>I', 0xffffffff)) - - # 0x24 - 0x3f : Unknown - record0.write('\xff' * 28) - - # 0x40 - 0x43 : Offset of first non-text record - record0.write(pack('>I', - self._text_nrecords + 1)) - - # 0x44 - 0x4b : title offset, title length - record0.write(pack('>II', - 0xe8 + 16 + len(exth), len(title))) - - # 0x4c - 0x4f : Language specifier - record0.write(iana2mobi( - str(metadata.language[0]))) - - # 0x50 - 0x57 : Unknown - record0.write('\0' * 8) - - # 0x58 - 0x5b : Format version - # 0x5c - 0x5f : First image record number - record0.write(pack('>II', - 6, self._first_image_record if self._first_image_record else 0)) - - # 0x60 - 0x63 : First HUFF/CDIC record number - # 0x64 - 0x67 : Number of HUFF/CDIC records - # 0x68 - 0x6b : First DATP record number - # 0x6c - 0x6f : Number of DATP records - record0.write('\0' * 16) - - # 0x70 - 0x73 : EXTH flags - record0.write(pack('>I', 0x50)) - - # 0x74 - 0x93 : Unknown - record0.write('\0' * 32) - - # 0x94 - 0x97 : DRM offset - # 0x98 - 0x9b : DRM count - # 0x9c - 0x9f : DRM size - # 0xa0 - 0xa3 : DRM flags - record0.write(pack('>IIII', - 0xffffffff, 0xffffffff, 0, 0)) - - - # 0xa4 - 0xaf : Unknown - record0.write('\0'*12) - - # 0xb0 - 0xb1 : First content record number - # 0xb2 - 0xb3 : last content record number - # (Includes Image, DATP, HUFF, DRM) - record0.write(pack('>HH', 1, last_content_record)) - - # 0xb4 - 0xb7 : Unknown - record0.write('\0\0\0\x01') - - # 0xb8 - 0xbb : FCIS record number - if FCIS_FLIS : - # Write these if FCIS/FLIS turned on - # 0xb8 - 0xbb : FCIS record number - record0.write(pack('>I', self._fcis_number)) - - # 0xbc - 0xbf : Unknown (FCIS record count?) - record0.write(pack('>I', 1)) - - # 0xc0 - 0xc3 : FLIS record number - record0.write(pack('>I', self._flis_number)) - - # 0xc4 - 0xc7 : Unknown (FLIS record count?) - record0.write(pack('>I', 1)) - else : - # 0xb8 - 0xbb : FCIS record number - record0.write(pack('>I', 0xffffffff)) - - # 0xbc - 0xbf : Unknown (FCIS record count?) - record0.write(pack('>I', 0xffffffff)) - - # 0xc0 - 0xc3 : FLIS record number - record0.write(pack('>I', 0xffffffff)) - - # 0xc4 - 0xc7 : Unknown (FLIS record count?) - record0.write(pack('>I', 1)) - - # 0xc8 - 0xcf : Unknown - record0.write('\0'*8) - - # 0xd0 - 0xdf : Unknown - record0.write(pack('>IIII', 0xffffffff, 0, 0xffffffff, 0xffffffff)) - - # 0xe0 - 0xe3 : Extra record data - # Extra record data flags: - # - 0x1: <extra multibyte bytes><size> (?) - # - 0x2: <TBS indexing description of this HTML record><size> GR - # - 0x4: <uncrossable breaks><size> - # GR: Use 7 for indexed files, 5 for unindexed - # Setting bit 2 (0x4) disables <guide><reference type="start"> functionality - - trailingDataFlags = 1 - if self._indexable : - trailingDataFlags |= 2 - if WRITE_PBREAKS : - trailingDataFlags |= 4 - record0.write(pack('>I', trailingDataFlags)) - - # 0xe4 - 0xe7 : Primary index record - record0.write(pack('>I', 0xffffffff if self._primary_index_record is - None else self._primary_index_record)) - - record0.write(exth) - record0.write(title) - record0 = record0.getvalue() - self._records[0] = record0 + ('\0' * (1024*8)) - - def _build_exth(self): - oeb = self._oeb - exth = StringIO() - nrecs = 0 - for term in oeb.metadata: - if term not in EXTH_CODES: continue - code = EXTH_CODES[term] - items = oeb.metadata[term] - if term == 'creator': - if self._prefer_author_sort: - creators = [normalize(unicode(c.file_as or c)) for c in items] - else: - creators = [normalize(unicode(c)) for c in items] - items = ['; '.join(creators)] - for item in items: - data = self.COLLAPSE_RE.sub(' ', normalize(unicode(item))) - if term == 'identifier': - if data.lower().startswith('urn:isbn:'): - data = data[9:] - elif item.scheme.lower() == 'isbn': - pass - else: - continue - data = data.encode('utf-8') - exth.write(pack('>II', code, len(data) + 8)) - exth.write(data) - nrecs += 1 - if term == 'rights' : - try: - rights = normalize(unicode(oeb.metadata.rights[0])).encode('utf-8') - except: - rights = 'Unknown' - exth.write(pack('>II', EXTH_CODES['rights'], len(rights) + 8)) - exth.write(rights) - nrecs += 1 - - # Write UUID as ASIN - uuid = None - from calibre.ebooks.oeb.base import OPF - for x in oeb.metadata['identifier']: - if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode(x).startswith('urn:uuid:'): - uuid = unicode(x).split(':')[-1] - break - if uuid is None: - from uuid import uuid4 - uuid = str(uuid4()) - - if isinstance(uuid, unicode): - uuid = uuid.encode('utf-8') - exth.write(pack('>II', 113, len(uuid) + 8)) - exth.write(uuid) - nrecs += 1 - - # Write cdetype - if not self.opts.mobi_periodical: - data = 'EBOK' - exth.write(pack('>II', 501, len(data)+8)) - exth.write(data) - nrecs += 1 - - # Add a publication date entry - if oeb.metadata['date'] != [] : - datestr = str(oeb.metadata['date'][0]) - elif oeb.metadata['timestamp'] != [] : - datestr = str(oeb.metadata['timestamp'][0]) - - if datestr is not None: - exth.write(pack('>II',EXTH_CODES['pubdate'], len(datestr) + 8)) - exth.write(datestr) - nrecs += 1 - else: - raise NotImplementedError("missing date or timestamp needed for mobi_periodical") - - if oeb.metadata.cover and \ - unicode(oeb.metadata.cover[0]) in oeb.manifest.ids: - id = unicode(oeb.metadata.cover[0]) - item = oeb.manifest.ids[id] - href = item.href - if href in self._images: - index = self._images[href] - 1 - exth.write(pack('>III', 0xc9, 0x0c, index)) - exth.write(pack('>III', 0xcb, 0x0c, 0)) - nrecs += 2 - index = self._add_thumbnail(item) - if index is not None: - exth.write(pack('>III', 0xca, 0x0c, index - 1)) - nrecs += 1 - - exth = exth.getvalue() - trail = len(exth) % 4 - pad = '\0' * (4 - trail) # Always pad w/ at least 1 byte - exth = ['EXTH', pack('>II', len(exth) + 12, nrecs), exth, pad] - return ''.join(exth) - - def _add_thumbnail(self, item): - try: - data = rescale_image(item.data, MAX_THUMB_SIZE, MAX_THUMB_DIMEN) - except IOError: - self._oeb.logger.warn('Bad image file %r' % item.href) - return None - manifest = self._oeb.manifest - id, href = manifest.generate('thumbnail', 'thumbnail.jpeg') - manifest.add(id, href, 'image/jpeg', data=data) - index = len(self._images) + 1 - self._images[href] = index - self._records.append(data) - return index - - def _write_header(self): - title = str(self._oeb.metadata.title[0]) - title = re.sub('[^-A-Za-z0-9]+', '_', title)[:31] - title = title + ('\0' * (32 - len(title))) - now = int(time.time()) - nrecords = len(self._records) - self._write(title, pack('>HHIIIIII', 0, 0, now, now, 0, 0, 0, 0), - 'BOOK', 'MOBI', pack('>IIH', (2*nrecords)-1, 0, nrecords)) - offset = self._tell() + (8 * nrecords) + 2 - for i, record in enumerate(self._records): - self._write(pack('>I', offset), '\0', pack('>I', 2*i)[1:]) - offset += len(record) - self._write('\0\0') - - def _write_content(self): - for record in self._records: - self._write(record) - - def _clean_text_value(self, text): - if text is not None and text.strip() : - text = text.strip() - if not isinstance(text, unicode): - text = text.decode('utf-8', 'replace') - text = normalize(text).encode('utf-8') - else : - text = "(none)".encode('utf-8') - return text - - def _compute_offset_length(self, i, node, entries) : - h = node.href - if h not in self._id_offsets: - self._oeb.log.warning('Could not find TOC entry:', node.title) - return -1, -1 - - offset = self._id_offsets[h] - length = None - # Calculate length based on next entry's offset - for sibling in entries[i+1:]: - h2 = sibling.href - if h2 in self._id_offsets: - offset2 = self._id_offsets[h2] - if offset2 > offset: - length = offset2 - offset - break - if length is None: - length = self._content_length - offset - return offset, length - - def _establish_document_structure(self) : - documentType = None - try : - klass = self._ctoc_map[0]['klass'] - except : - klass = None - - if klass == 'chapter' or klass == None : - documentType = 'book' - if self.opts.verbose > 2 : - self._oeb.logger.info("Adding a MobiBook to self._MobiDoc") - self._MobiDoc.documentStructure = MobiBook() - - elif klass == 'periodical' : - documentType = klass - if self.opts.verbose > 2 : - self._oeb.logger.info("Adding a MobiPeriodical to self._MobiDoc") - self._MobiDoc.documentStructure = MobiPeriodical(self._MobiDoc.getNextNode()) - self._MobiDoc.documentStructure.startAddress = self._anchor_offset_kindle - else : - raise NotImplementedError('_establish_document_structure: unrecognized klass: %s' % klass) - return documentType - - # Index {{{ - - def _flatten_toc(self): - ''' - Flatten and re-order entries in TOC so that chapter to chapter jumping - never fails on the Kindle. - ''' - from calibre.ebooks.oeb.base import TOC - items = list(self._oeb.toc.iterdescendants()) - offsets = {i:self._id_offsets.get(i.href, -1) for i in items if i.href} - items = [i for i in items if offsets[i] > -1] - items.sort(key=lambda i:offsets[i]) - filt = [] - seen = set() - for i in items: - off = offsets[i] - if off in seen: continue - seen.add(off) - filt.append(i) - items = filt - newtoc = TOC() - for c, i in enumerate(items): - newtoc.add(i.title, i.href, play_order=c+1, id=str(c), - klass='chapter') - self._oeb.toc = newtoc - - def _generate_index(self): - self._oeb.log('Generating INDX ...') - self._primary_index_record = None - - # Build the NCXEntries and INDX - indxt, indxt_count, indices, last_name = self._generate_indxt() - - if last_name is None: - self._oeb.log.warn('Input document has no TOC. No index generated.') - return - - # Assemble the INDX0[0] and INDX1[0] output streams - indx1 = StringIO() - indx1.write('INDX'+pack('>I', 0xc0)) # header length - - # 0x8 - 0xb : Unknown - indx1.write('\0'*4) - - # 0xc - 0xf : Header type - indx1.write(pack('>I', 1)) - - # 0x10 - 0x13 : Unknown - indx1.write('\0'*4) - - # 0x14 - 0x17 : IDXT offset - # 0x18 - 0x1b : IDXT count - indx1.write(pack('>I', 0xc0+len(indxt))) - indx1.write(pack('>I', indxt_count + 1)) - - # 0x1c - 0x23 : Unknown - indx1.write('\xff'*8) - - # 0x24 - 0xbf - indx1.write('\0'*156) - indx1.write(indxt) - indx1.write(indices) - indx1 = indx1.getvalue() - - idxt0 = chr(len(last_name)) + last_name + pack('>H', indxt_count + 1) - idxt0 = align_block(idxt0) - indx0 = StringIO() - - if self._MobiDoc.mobiType == 0x002 : - tagx = TAGX['chapter'] - else : - tagx = TAGX['periodical'] - - tagx = align_block('TAGX' + pack('>I', 8 + len(tagx)) + tagx) - indx0_indices_pos = 0xc0 + len(tagx) + len(idxt0) - indx0_indices = align_block('IDXT' + pack('>H', 0xc0 + len(tagx))) - # Generate record header - header = StringIO() - - header.write('INDX') - header.write(pack('>I', 0xc0)) # header length - - # 0x08 - 0x0b : Unknown - header.write('\0'*4) - - # 0x0c - 0x0f : Header type - header.write(pack('>I', 0)) - - # 0x10 - 0x13 : Generator ID - # This value may impact the position of flagBits written in - # write_article_node(). Change with caution. - header.write(pack('>I', 6)) - - # 0x14 - 0x17 : IDXT offset - header.write(pack('>I', indx0_indices_pos)) - - # 0x18 - 0x1b : IDXT count - header.write(pack('>I', 1)) - - # 0x1c - 0x1f : Text encoding ? - # header.write(pack('>I', 650001)) - # GR: This needs to be either 0xFDE9 or 0x4E4 - header.write(pack('>I', 0xFDE9)) - - # 0x20 - 0x23 : Language code? - header.write(iana2mobi(str(self._oeb.metadata.language[0]))) - - # 0x24 - 0x27 : Number of TOC entries in INDX1 - header.write(pack('>I', indxt_count + 1)) - - # 0x28 - 0x2b : ORDT Offset - header.write('\0'*4) - - # 0x2c - 0x2f : LIGT offset - header.write('\0'*4) - - # 0x30 - 0x33 : Number of LIGT entries - header.write('\0'*4) - - # 0x34 - 0x37 : Number of ctoc[] blocks - header.write(pack('>I', len(self._ctoc_records))) - - # 0x38 - 0xb3 : Unknown (pad?) - header.write('\0'*124) - - # 0xb4 - 0xb7 : TAGX offset - header.write(pack('>I', 0xc0)) - - # 0xb8 - 0xbf : Unknown - header.write('\0'*8) - - header = header.getvalue() - - indx0.write(header) - indx0.write(tagx) - indx0.write(idxt0) - indx0.write(indx0_indices) - indx0 = indx0.getvalue() - - self._primary_index_record = len(self._records) - - # GR: handle multiple ctoc records - self._records.extend([indx0, indx1 ]) - for (i,ctoc_record) in enumerate(self._ctoc_records): - self._records.append(ctoc_record) - # print "adding %d of %d ctoc records" % (i+1, len(self._ctoc_records)) - - # Indexing for author/description fields in summary section - # Test for indexed periodical - only one that needs secondary index - if self._MobiDoc.mobiType > 0x100 : - # Write secondary index records - #tagx = TAGX['secondary_'+\ - # ('periodical' if self.opts.mobi_periodical else 'book')] - tagx = TAGX['secondary_'+'periodical'] - tagx_len = 8 + len(tagx) - - # generate secondary INDX0 - indx0 = StringIO() - indx0.write('INDX'+pack('>I', 0xc0)+'\0'*8) # header + 8x00 - indx0.write(pack('>I', 0x06)) # generator ID - indx0.write(pack('>I', 0xe8)) # IDXT offset - indx0.write(pack('>I', 1)) # IDXT entries - indx0.write(pack('>I', 65001)) # encoding - indx0.write('\xff'*4) # language - indx0.write(pack('>I', 4)) # IDXT Entries in INDX1 - indx0.write('\0'*4) # ORDT Offset - indx0.write('\0'*136) # everything up to TAGX offset - indx0.write(pack('>I', 0xc0)) # TAGX offset - indx0.write('\0'*8) # unknowns - indx0.write('TAGX'+pack('>I', tagx_len)+tagx) # TAGX - indx0.write('\x0D'+'mastheadImage' + '\x00\x04') # mastheadImage - indx0.write('IDXT'+'\x00\xd8\x00\x00') # offset plus pad - - # generate secondary INDX1 - indx1 = StringIO() - indx1.write('INDX' + pack('>I', 0xc0) + '\0'*4) # header + 4x00 - indx1.write(pack('>I', 1)) # blockType 1 - indx1.write(pack('>I', 0x00)) # unknown - indx1.write('\x00\x00\x00\xF0') # IDXT offset - indx1.write(pack('>I', 4)) # num of IDXT entries - indx1.write('\xff'*8) # encoding, language - indx1.write('\0'*(0xc0-indx1.tell())) # 00 to IDXT Entries @ 0xC0 - indx1.write('\0\x01\x80') # 1 - null - indx1.write('\x06'+'author' + '\x02\x80\x80\xc7') # author - indx1.write('\x0B'+'description' + '\x02\x80\x80\xc6') # description - indx1.write('\x0D'+'mastheadImage' + '\x02\x85\x80\xc5') # mastheadImage - indx1.write('IDXT'+'\x00\xc0\x00\xc3\x00\xce\x00\xde') # IDXT header - - # Write INDX0 and INDX1 to the stream - indx0, indx1 = indx0.getvalue(), indx1.getvalue() - self._records.extend((indx0, indx1)) - if self.opts.verbose > 3: - from tempfile import mkdtemp - import os - t = mkdtemp() - for i, n in enumerate(['sindx1', 'sindx0', 'ctoc', 'indx0', 'indx1']): - open(os.path.join(t, n+'.bin'), 'wb').write(self._records[-(i+1)]) - self._oeb.log.debug('Index records dumped to', t) - - # Index nodes {{{ - def _write_periodical_node(self, indxt, indices, index, offset, length, count, firstSection, lastSection) : - pos = 0xc0 + indxt.tell() - indices.write(pack('>H', pos)) # Save the offset for IDXTIndices - name = "%04X"%count - indxt.write(chr(len(name)) + name) # Write the name - indxt.write(INDXT['periodical']) # entryType [0x0F | 0xDF | 0xFF | 0x3F] - indxt.write(chr(1)) # subType 1 - indxt.write(decint(offset, DECINT_FORWARD)) # offset - indxt.write(decint(length, DECINT_FORWARD)) # length - indxt.write(decint(self._ctoc_map[index]['titleOffset'], DECINT_FORWARD)) # vwi title offset in CNCX - - indxt.write(decint(0, DECINT_FORWARD)) # unknown byte - - indxt.write(decint(self._ctoc_map[index]['classOffset'], DECINT_FORWARD)) # vwi title offset in CNCX - indxt.write(decint(firstSection, DECINT_FORWARD)) # first section in periodical - indxt.write(decint(lastSection, DECINT_FORWARD)) # first section in periodical - - indxt.write(decint(0, DECINT_FORWARD)) # 0x80 - - def _write_section_node(self, indxt, indices, myCtocMapIndex, index, offset, length, count, firstArticle, lastArticle, parentIndex) : - pos = 0xc0 + indxt.tell() - indices.write(pack('>H', pos)) # Save the offset for IDXTIndices - name = "%04X"%count - indxt.write(chr(len(name)) + name) # Write the name - indxt.write(INDXT['section']) # entryType [0x0F | 0xDF | 0xFF | 0x3F] - indxt.write(chr(0)) # subType 0 - indxt.write(decint(offset, DECINT_FORWARD)) # offset - indxt.write(decint(length, DECINT_FORWARD)) # length - indxt.write(decint(self._ctoc_map[myCtocMapIndex]['titleOffset'], DECINT_FORWARD)) # vwi title offset in CNCX - - indxt.write(decint(1, DECINT_FORWARD)) # unknown byte - - indxt.write(decint(self._ctoc_map[myCtocMapIndex]['classOffset'], DECINT_FORWARD)) # vwi title offset in CNCX - indxt.write(decint(parentIndex, DECINT_FORWARD)) # index of periodicalParent - indxt.write(decint(firstArticle, DECINT_FORWARD)) # first section in periodical - indxt.write(decint(lastArticle, DECINT_FORWARD)) # first section in periodical - - def _write_article_node(self, indxt, indices, index, offset, length, count, parentIndex) : - pos = 0xc0 + indxt.tell() - indices.write(pack('>H', pos)) # Save the offset for IDXTIndices - name = "%04X"%count - indxt.write(chr(len(name)) + name) # Write the name - indxt.write(INDXT['article']) # entryType [0x0F | 0xDF | 0xFF | 0x3F] - - hasAuthor = True if self._ctoc_map[index]['authorOffset'] else False - hasDescription = True if self._ctoc_map[index]['descriptionOffset'] else False - - # flagBits may be dependent upon the generatorID written at 0x10 in generate_index(). - # in INDX0. Mobigen uses a generatorID of 2 and writes these bits at positions 1 & 2; - # calibre uses a generatorID of 6 and writes the bits at positions 2 & 3. - flagBits = 0 - if hasAuthor : flagBits |= 0x4 - if hasDescription : flagBits |= 0x2 - indxt.write(pack('>B',flagBits)) # Author/description flags - indxt.write(decint(offset, DECINT_FORWARD)) # offset - - - indxt.write(decint(length, DECINT_FORWARD)) # length - indxt.write(decint(self._ctoc_map[index]['titleOffset'], DECINT_FORWARD)) # vwi title offset in CNCX - - indxt.write(decint(2, DECINT_FORWARD)) # unknown byte - - indxt.write(decint(self._ctoc_map[index]['classOffset'], DECINT_FORWARD)) # vwi title offset in CNCX - indxt.write(decint(parentIndex, DECINT_FORWARD)) # index of periodicalParent - - # Optionally write the author and description fields - descriptionOffset = self._ctoc_map[index]['descriptionOffset'] - if descriptionOffset : - indxt.write(decint(descriptionOffset, DECINT_FORWARD)) - - authorOffset = self._ctoc_map[index]['authorOffset'] - if authorOffset : - indxt.write(decint(authorOffset, DECINT_FORWARD)) - - def _write_chapter_node(self, indxt, indices, index, offset, length, count): - # Writes an INDX1 NCXEntry of entryType 0x0F - chapter - if self.opts.verbose > 2: - # *** GR: Turn this off while I'm developing my code - #self._oeb.log.debug('Writing TOC node to IDXT:', node.title, 'href:', node.href) - pass - - pos = 0xc0 + indxt.tell() - indices.write(pack('>H', pos)) # Save the offset for IDXTIndices - name = "%04X"%count - indxt.write(chr(len(name)) + name) # Write the name - indxt.write(INDXT['chapter']) # entryType [0x0F | 0xDF | 0xFF | 0x3F] - indxt.write(decint(offset, DECINT_FORWARD)) # offset - indxt.write(decint(length, DECINT_FORWARD)) # length - indxt.write(decint(self._ctoc_map[index]['titleOffset'], DECINT_FORWARD)) # vwi title offset in CNCX - indxt.write(decint(0, DECINT_FORWARD)) # unknown byte - - # }}} - - - def _generate_section_indices(self, child, currentSection, myPeriodical, myDoc ) : - sectionTitles = list(child.iter())[1:] - sectionIndices = [] - sectionParents = [] - for (j, section) in enumerate(sectionTitles): - # iterate over just the sections - - if section.klass == 'periodical' : - # Write our index to the list - sectionIndices.append(currentSection) - - if self.opts.verbose > 3 : - self._oeb.logger.info("Periodical: %15.15s \tkls:%s \tdpt:%d ply:%03d" % \ - (section.title, section.klass, section.depth(), section.play_order) ) - - elif section.klass == 'section' : - # Add sections, save in list with original sequence number - myNewSection = myPeriodical.addSectionParent(myDoc, j) - sectionParents.append(myNewSection) - - # Bump the section # - currentSection += 1 - # Write our index to the list - sectionIndices.append(currentSection) - - if self.opts.verbose > 3 : - self._oeb.logger.info(" Section: %15.15s \tkls:%s \tdpt:%d ply:%03d \tindex:%d" % \ - (section.title, section.klass, section.depth(), section.play_order,j) ) - - elif section.klass == 'article' : - # Write our index to the list - sectionIndices.append(currentSection) - - else : - if self.opts.verbose > 3 : - self._oeb.logger.info( " Unrecognized class %s in structured document" % section.klass) - return sectionIndices, sectionParents - - def _generate_section_article_indices(self, i, section, entries, sectionIndices, sectionParents): - sectionArticles = list(section.iter())[1:] - # Iterate over the section's articles - - for (j, article) in enumerate(sectionArticles): - # Recompute offset and length for each article - offset, length = self._compute_offset_length(i, article, entries) - if self.opts.verbose > 2 : - self._oeb.logger.info( "article %02d: offset = 0x%06X length = 0x%06X" % (j, offset, length) ) - - ctoc_map_index = i + j + 1 - - #hasAuthor = self._ctoc_map[ctoc_map_index].get('authorOffset') - #hasDescription = self._ctoc_map[ctoc_map_index].get('descriptionOffset') - mySectionParent = sectionParents[sectionIndices[i-1]] - myNewArticle = MobiArticle(mySectionParent, offset, length, ctoc_map_index ) - mySectionParent.addArticle( myNewArticle ) - - def _add_book_chapters(self, myDoc, indxt, indices): - chapterCount = myDoc.documentStructure.chapterCount() - if self.opts.verbose > 3 : - self._oeb.logger.info("Writing %d chapters for mobitype 0x%03X" % (chapterCount, myDoc.mobiType)) - - for (c, chapter) in enumerate(list(myDoc.documentStructure.chapters)) : - index = chapter.myCtocMapIndex - self._write_chapter_node(indxt, indices, index, chapter.startAddress, chapter.length, c) - - last_name = "%04X"%c # Returned when done - return last_name, c - - def _add_periodical_flat_articles(self, myDoc, indxt, indices): - sectionParent = myDoc.documentStructure.sectionParents[0] - articleCount = len(sectionParent.articles) - if self.opts.verbose > 3 : - self._oeb.logger.info("Writing %d articles for mobitype 0x%03X" % (articleCount, myDoc.mobiType)) - - # Singleton periodical - index = 0 - offset = myDoc.documentStructure.startAddress - length = myDoc.documentStructure.length - c = 0 - firstSection = myDoc.documentStructure.firstSectionIndex - lastSection = myDoc.documentStructure.lastSectionIndex - self._write_periodical_node(indxt, indices, index, offset, length, c, firstSection, lastSection) - - # Singleton section - index += 1 - offset = sectionParent.startAddress - length = sectionParent.sectionLength - c += 1 - firstArticle = sectionParent.firstArticleIndex - lastArticle = sectionParent.lastArticleIndex - parentIndex = sectionParent.parentIndex - self._write_section_node(indxt, indices, sectionParent.myCtocMapIndex, index, offset, length, c, firstArticle, lastArticle, parentIndex) - - # articles - for article in list(sectionParent.articles): - index = article.myCtocMapIndex - offset = article.startAddress - length = article.articleLength - c += 1 - parentIndex = article.sectionParentIndex - self._write_article_node(indxt, indices, index, offset, length, c, parentIndex) - - last_name = "%04X" % c - return last_name, c - - def _add_periodical_structured_articles(self, myDoc, indxt, indices): - # Write NCXEntries for Structured Periodical - # <periodical> - # <section> - # <section> ... - # <article> - # <article> ... - - if self.opts.verbose > 2 : - self._oeb.logger.info( "Writing NCXEntries for mobiType 0x%03X" % myDoc.mobiType) - - sectionParent = myDoc.documentStructure.sectionParents[0] - #articleCount = len(sectionParent.articles) - - # Write opening periodical 0xDF entry - index = 0 - offset = myDoc.documentStructure.startAddress - length = myDoc.documentStructure.length - c = 0 - firstSection = myDoc.documentStructure.firstSectionIndex - lastSection = myDoc.documentStructure.lastSectionIndex - self._write_periodical_node(indxt, indices, index, offset, length, c, firstSection, lastSection) - - # Write each section 0xFF entry - sectionCount = firstSection - while sectionCount <= lastSection : - # section - sectionParent = myDoc.documentStructure.sectionParents[sectionCount - 1] - #articleCount = len(sectionParent.articles) - #index += 1 - offset = sectionParent.startAddress - length = sectionParent.sectionLength - c += 1 - firstArticle = sectionParent.firstArticleIndex - lastArticle = sectionParent.lastArticleIndex - parentIndex = sectionParent.parentIndex - self._write_section_node(indxt, indices, sectionParent.myCtocMapIndex, sectionCount, offset, length, c, firstArticle, lastArticle, parentIndex) - sectionCount += 1 - - # Write each article 0x3F entry - sectionCount = firstSection - while sectionCount <= lastSection : - # section - sectionParent = myDoc.documentStructure.sectionParents[sectionCount - 1] -# articleCount = len(sectionParent.articles) -# index += 1 -# offset = sectionParent.startAddress -# length = sectionParent.sectionLength -# c += 1 -# firstArticle = sectionParent.firstArticleIndex -# lastArticle = sectionParent.lastArticleIndex -# parentIndex = sectionParent.parentIndex -# add_section_node(index, offset, length, c, firstArticle, lastArticle, parentIndex) - - last_name = "%04X"%c - - # articles - for (i, article) in enumerate(list(sectionParent.articles)) : - if self.opts.verbose > 3 : - self._oeb.logger.info( "Adding section:article %d:%02d" % \ - (sectionParent.myIndex, i)) - index = article.myCtocMapIndex - offset = article.startAddress - length = article.articleLength - c += 1 - parentIndex = article.sectionParentIndex - self._write_article_node(indxt, indices, index, offset, length, c, parentIndex) - - last_name = "%04X"%c - - sectionCount += 1 - - return last_name, c - - def _generate_indxt(self): - # Assumption: child.depth() represents nestedness of the TOC. - # A flat document (book) has a depth of 2: - # <navMap> child.depth() = 2 - # <navPoint> Chapter child.depth() = 1 - # <navPoint> Chapter etc - # -or- - # A structured document (periodical) has a depth of 4 (Mobigen-prepped) - # <navMap> child.depth() = 4 - # <navPoint> Periodical child.depth() = 3 - # <navPoint> Section 1 child.depth() = 2 - # <navPoint> Article child.depth() = 1 - # <navPoint> Article(s) child.depth() = 1 - # <navpoint> Section 2 - - sectionIndices = [] - sectionParents = [] - currentSection = 0 # Starting section number - toc = self._oeb.toc - indxt, indices, c = StringIO(), StringIO(), 0 - - indices.write('IDXT') - last_name = None - - # 'book', 'periodical' or None - documentType = self._establish_document_structure() - myDoc = self._MobiDoc - - nodes = list(toc.iter())[0:1] - for (i, child) in enumerate(nodes) : - - if documentType == "periodical" : - myPeriodical = myDoc.documentStructure - if self.opts.verbose > 3 : - self._oeb.logger.info("\nDocument: %s \tkls:%s \tdpt:%d ply:%03d" % \ - (child.title, child.klass, child.depth(), child.play_order) ) - sectionIndices, sectionParents = \ - self._generate_section_indices(child, currentSection, myPeriodical, myDoc) - - elif documentType == "book" : - myBook = myDoc.documentStructure - - if self.opts.verbose > 3 : - self._oeb.logger.info("\nBook: %-19.19s \tkls:%s \tdpt:%d ply:%03d" % \ - (child.title, child.klass, child.depth(), child.play_order) ) - else : - if self.opts.verbose > 3 : - self._oeb.logger.info("unknown document type %12.12s \tdepth:%d" % (child.title, child.depth()) ) - - # Original code starts here - # test first node for depth/class - entries = list(toc.iter())[1:] - for (i, child) in enumerate(entries): - if not child.title or not child.title.strip(): - continue - - offset, length = self._compute_offset_length(i, child, entries) - - if child.klass == 'chapter' or \ - (not self.opts.mobi_periodical and child.klass == 'article') : - # create chapter object - confirm i + 0 is correct!! - myNewChapter = MobiChapter(myDoc.getNextNode(), offset, length, i) - myBook.addChapter(myNewChapter) - - # Diagnostic - try : - if self.opts.verbose > 3 : - self._oeb.logger.info( " Chapter: %-14.14s \tcls:%s \tdpt:%d ply:%03d \toff:0x%X \t:len0x%X" % \ - (child.title, child.klass, child.depth(), child.play_order, offset, length) ) - except : - if self.opts.verbose > 3 : - self._oeb.logger.info( " Chapter: %-14.14s \tclass:%s \tdepth:%d playOrder:%03d \toff:0x%X \t:len0x%X" % \ - ("(bad string)", child.klass, child.depth(), child.play_order, offset, length)) - - elif child.klass == 'section' and self.opts.mobi_periodical : - if self.opts.verbose > 3 : - self._oeb.logger.info("\n Section: %-15.15s \tkls:%s \tdpt:%d ply:%03d" % \ - (child.title, child.klass, child.depth(), child.play_order)) - self._generate_section_article_indices(i, child, entries, sectionIndices, sectionParents) - - if self.opts.verbose > 3 : - self._oeb.logger.info("") - - mobiType = myDoc.mobiType - if self.opts.verbose > 3 : - self._MobiDoc.dumpInfo() - - if mobiType == 0x02 : - last_name, c = self._add_book_chapters(myDoc, indxt, indices) - - elif mobiType == 0x102 and myDoc.documentStructure.sectionCount() == 1 : - last_name, c = self._add_periodical_flat_articles(myDoc, indxt, indices) - - else : - last_name, c = self._add_periodical_structured_articles(myDoc, indxt, indices) - - return align_block(indxt.getvalue()), c, align_block(indices.getvalue()), last_name - # }}} - - # CTOC {{{ - def _add_to_ctoc(self, ctoc_str, record_offset): - # Write vwilen + string to ctoc - # Return offset - # Is there enough room for this string in the current ctoc record? - if 0xfbf8 - self._ctoc.tell() < 2 + len(ctoc_str): - # flush this ctoc, start a new one - # print "closing ctoc_record at 0x%X" % self._ctoc.tell() - # print "starting new ctoc with '%-50.50s ...'" % ctoc_str - # pad with 00 - pad = 0xfbf8 - self._ctoc.tell() - # print "padding %d bytes of 00" % pad - self._ctoc.write('\0' * (pad)) - self._ctoc_records.append(self._ctoc.getvalue()) - self._ctoc.truncate(0) - self._ctoc_offset += 0x10000 - record_offset = self._ctoc_offset - - offset = self._ctoc.tell() + record_offset - self._ctoc.write(decint(len(ctoc_str), DECINT_FORWARD) + ctoc_str) - return offset - - def _add_flat_ctoc_node(self, node, ctoc, title=None): - # Process 'chapter' or 'article' nodes only, force either to 'chapter' - t = node.title if title is None else title - t = self._clean_text_value(t) - self._last_toc_entry = t - - # Create an empty dictionary for this node - ctoc_name_map = {} - - # article = chapter - if node.klass == 'article' : - ctoc_name_map['klass'] = 'chapter' - else : - ctoc_name_map['klass'] = node.klass - - # Add title offset to name map - ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset) - self._chapterCount += 1 - - # append this node's name_map to map - self._ctoc_map.append(ctoc_name_map) - - return - - def _add_structured_ctoc_node(self, node, ctoc, title=None): - # Process 'periodical', 'section' and 'article' - - # Fetch the offset referencing the current ctoc_record - if node.klass is None : - return - t = node.title if title is None else title - t = self._clean_text_value(t) - self._last_toc_entry = t - - # Create an empty dictionary for this node - ctoc_name_map = {} - - # Add the klass of this node - ctoc_name_map['klass'] = node.klass - - if node.klass == 'chapter': - # Add title offset to name map - ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset) - self._chapterCount += 1 - - elif node.klass == 'periodical' : - # Add title offset - ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset) - - # Look for existing class entry 'periodical' in _ctoc_map - for entry in self._ctoc_map: - if entry['klass'] == 'periodical': - # Use the pre-existing instance - ctoc_name_map['classOffset'] = entry['classOffset'] - break - else : - continue - else: - # class names should always be in CNCX 0 - no offset - ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0) - - self._periodicalCount += 1 - - elif node.klass == 'section' : - # Add title offset - ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset) - - # Look for existing class entry 'section' in _ctoc_map - for entry in self._ctoc_map: - if entry['klass'] == 'section': - # Use the pre-existing instance - ctoc_name_map['classOffset'] = entry['classOffset'] - break - else : - continue - else: - # class names should always be in CNCX 0 - no offset - ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0) - - self._sectionCount += 1 - - elif node.klass == 'article' : - # Add title offset/title - ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset) - - # Look for existing class entry 'article' in _ctoc_map - for entry in self._ctoc_map: - if entry['klass'] == 'article': - ctoc_name_map['classOffset'] = entry['classOffset'] - break - else : - continue - else: - # class names should always be in CNCX 0 - no offset - ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0) - - # Add description offset/description - if node.description : - d = self._clean_text_value(node.description) - ctoc_name_map['descriptionOffset'] = self._add_to_ctoc(d, self._ctoc_offset) - else : - ctoc_name_map['descriptionOffset'] = None - - # Add author offset/attribution - if node.author : - a = self._clean_text_value(node.author) - ctoc_name_map['authorOffset'] = self._add_to_ctoc(a, self._ctoc_offset) - else : - ctoc_name_map['authorOffset'] = None - - self._articleCount += 1 - - else : - raise NotImplementedError( \ - 'writer._generate_ctoc.add_node: title: %s has unrecognized klass: %s, playOrder: %d' % \ - (node.title, node.klass, node.play_order)) - - # append this node's name_map to map - self._ctoc_map.append(ctoc_name_map) - - def _generate_ctoc(self): - # Generate the compiled TOC strings - # Each node has 1-4 CTOC entries: - # Periodical (0xDF) - # title, class - # Section (0xFF) - # title, class - # Article (0x3F) - # title, class, description, author - # Chapter (0x0F) - # title, class - # nb: Chapters don't actually have @class, so we synthesize it - # in reader._toc_from_navpoint - - toc = self._oeb.toc - reduced_toc = [] - self._ctoc_map = [] # per node dictionary of {class/title/desc/author} offsets - self._last_toc_entry = None - #ctoc = StringIO() - self._ctoc = StringIO() - - # Track the individual node types - self._periodicalCount = 0 - self._sectionCount = 0 - self._articleCount = 0 - self._chapterCount = 0 - - #first = True - - if self._conforming_periodical_toc : - self._oeb.logger.info('Generating structured CTOC ...') - for (child) in toc.iter(): - if self.opts.verbose > 2 : - self._oeb.logger.info(" %s" % child) - self._add_structured_ctoc_node(child, self._ctoc) - #first = False - - else : - self._oeb.logger.info('Generating flat CTOC ...') - previousOffset = -1 - currentOffset = 0 - for (i, child) in enumerate(toc.iterdescendants()): - # Only add chapters or articles at depth==1 - # no class defaults to 'chapter' - if child.klass is None : child.klass = 'chapter' - if (child.klass == 'article' or child.klass == 'chapter') and child.depth() == 1 : - if self.opts.verbose > 2 : - self._oeb.logger.info("adding (klass:%s depth:%d) %s to flat ctoc" % \ - (child.klass, child.depth(), child) ) - - # Test to see if this child's offset is the same as the previous child's - # offset, skip it - h = child.href - - if h is None: - self._oeb.logger.warn(' Ignoring TOC entry with no href:', - child.title) - continue - if h not in self._id_offsets: - self._oeb.logger.warn(' Ignoring missing TOC entry:', - unicode(child)) - continue - - currentOffset = self._id_offsets[h] - # print "_generate_ctoc: child offset: 0x%X" % currentOffset - - if currentOffset != previousOffset : - self._add_flat_ctoc_node(child, self._ctoc) - reduced_toc.append(child) - previousOffset = currentOffset - else : - self._oeb.logger.warn(" Ignoring redundant href: %s in '%s'" % (h, child.title)) - - else : - if self.opts.verbose > 2 : - self._oeb.logger.info("skipping class: %s depth %d at position %d" % \ - (child.klass, child.depth(),i)) - - # Update the TOC with our edited version - self._oeb.toc.nodes = reduced_toc - - # Instantiate a MobiDocument(mobitype) - if (not self._periodicalCount and not self._sectionCount and not self._articleCount) or \ - not self.opts.mobi_periodical : - mobiType = 0x002 - elif self._periodicalCount: - pt = None - if self._oeb.metadata.publication_type: - x = unicode(self._oeb.metadata.publication_type[0]).split(':') - if len(x) > 1: - pt = x[1] - mobiType = {'newspaper':0x101}.get(pt, 0x103) - else : - raise NotImplementedError('_generate_ctoc: Unrecognized document structured') - - self._MobiDoc = MobiDocument(mobiType) - - if self.opts.verbose > 2 : - structType = 'book' - if mobiType > 0x100 : - structType = 'flat periodical' if mobiType == 0x102 else 'structured periodical' - self._oeb.logger.info("Instantiating a %s MobiDocument of type 0x%X" % (structType, mobiType ) ) - if mobiType > 0x100 : - self._oeb.logger.info("periodicalCount: %d sectionCount: %d articleCount: %d"% \ - (self._periodicalCount, self._sectionCount, self._articleCount) ) - else : - self._oeb.logger.info("chapterCount: %d" % self._chapterCount) - - # Apparently the CTOC must end with a null byte - self._ctoc.write('\0') - - ctoc = self._ctoc.getvalue() - rec_count = len(self._ctoc_records) - self._oeb.logger.info(" CNCX utilization: %d %s %.0f%% full" % \ - (rec_count + 1, 'records, last record' if rec_count else 'record,', - len(ctoc)/655) ) - - return align_block(ctoc) - - # }}} - -class HTMLRecordData(object): - """ A data structure containing indexing/navigation data for an HTML record """ - def __init__(self): - self._continuingNode = -1 - self._continuingNodeParent = -1 - self._openingNode = -1 - self._openingNodeParent = -1 - self._currentSectionNodeCount = -1 - self._nextSectionNumber = -1 - self._nextSectionOpeningNode = -1 - self._nextSectionNodeCount = -1 - - def getContinuingNode(self): - return self._continuingNode - def setContinuingNode(self, value): - self._continuingNode = value - continuingNode = property(getContinuingNode, setContinuingNode, None, None) - - def getContinuingNodeParent(self): - return self._continuingNodeParent - def setContinuingNodeParent(self, value): - self._continuingNodeParent = value - continuingNodeParent = property(getContinuingNodeParent, setContinuingNodeParent, None, None) - - def getOpeningNode(self): - return self._openingNode - def setOpeningNode(self, value): - self._openingNode = value - openingNode = property(getOpeningNode, setOpeningNode, None, None) - - def getOpeningNodeParent(self): - return self._openingNodeParent - def setOpeningNodeParent(self, value): - self._openingNodeParent = value - openingNodeParent = property(getOpeningNodeParent, setOpeningNodeParent, None, None) - - def getCurrentSectionNodeCount(self): - return self._currentSectionNodeCount - def setCurrentSectionNodeCount(self, value): - self._currentSectionNodeCount = value - currentSectionNodeCount = property(getCurrentSectionNodeCount, setCurrentSectionNodeCount, None, None) - - def getNextSectionNumber(self): - return self._nextSectionNumber - def setNextSectionNumber(self, value): - self._nextSectionNumber = value - nextSectionNumber = property(getNextSectionNumber, setNextSectionNumber, None, None) - - def getNextSectionOpeningNode(self): - return self._nextSectionOpeningNode - def setNextSectionOpeningNode(self, value): - self._nextSectionOpeningNode = value - nextSectionOpeningNode = property(getNextSectionOpeningNode, setNextSectionOpeningNode, None, None) - - def getNextSectionNodeCount(self): - return self._nextSectionNodeCount - def setNextSectionNodeCount(self, value): - self._nextSectionNodeCount = value - nextSectionNodeCount = property(getNextSectionNodeCount, setNextSectionNodeCount, None, None) - - def dumpData(self, recordNumber, oeb): - oeb.logger.info( "--- Summary of HTML Record 0x%x [%d] indexing ---" % (recordNumber, recordNumber) ) - oeb.logger.info( " continuingNode: %03d" % self.continuingNode ) - oeb.logger.info( " continuingNodeParent: %03d" % self.continuingNodeParent ) - oeb.logger.info( " openingNode: %03d" % self.openingNode ) - oeb.logger.info( " openingNodeParent: %03d" % self.openingNodeParent ) - oeb.logger.info( " currentSectionNodeCount: %03d" % self.currentSectionNodeCount ) - oeb.logger.info( " nextSectionNumber: %03d" % self.nextSectionNumber ) - oeb.logger.info( " nextSectionOpeningNode: %03d" % self.nextSectionOpeningNode ) - oeb.logger.info( " nextSectionNodeCount: %03d" % self.nextSectionNodeCount ) - -class MobiDocument(object): - """ Hierarchical description of a Mobi document """ - - # Counter to assign index values as new nodes are created - _nextNode = -1 - - def __init__(self, mobitype): - self._mobitype = mobitype - self._documentStructure = None # Assigned in _generate_indxt - - def getMobiType(self): - return self._mobitype - def setMobiType(self, value): - self._mobitype = value - mobiType = property(getMobiType, setMobiType, None, None) - - def getDocumentStructure(self): - return self._documentStructure - def setDocumentStructure(self, value): - self._documentStructure = value - documentStructure = property(getDocumentStructure, setDocumentStructure, None, None) - - def getNextNode(self): - self._nextNode += 1 - return self._nextNode - - def dumpInfo(self): - self._documentStructure.dumpInfo() - -class MobiBook(object): - """ A container for a flat chapter-to-chapter Mobi book """ - def __init__(self): - self._chapters = [] - - def chapterCount(self): - return len(self._chapters) - - def getChapters(self): - return self._chapters - def setChapters(self, value): - self._chapters = value - chapters = property(getChapters, setChapters, None, None) - - def addChapter(self, value): - self._chapters.append(value) - - def dumpInfo(self): - print "%20s:" % ("Book") - print "%20s: %d" % ("Number of chapters", len(self._chapters)) - for (count, chapter) in enumerate(self._chapters): - print "%20s: %d" % ("myCtocMapIndex",chapter.myCtocMapIndex) - print "%20s: %d" % ("Chapter",count) - print "%20s: 0x%X" % ("startAddress", chapter.startAddress) - print "%20s: 0x%X" % ("length", chapter.length) - print - -class MobiChapter(object): - """ A container for Mobi chapters """ - def __init__(self, myIndex, startAddress, length, ctoc_map_index): - self._myIndex = myIndex - self._startAddress = startAddress - self._length = length - self._myCtocMapIndex = ctoc_map_index - - def getMyCtocMapIndex(self): - return self._myCtocMapIndex - def setMyCtocMapIndex(self, value): - self._myCtocMapIndex = value - myCtocMapIndex = property(getMyCtocMapIndex, setMyCtocMapIndex, None, None) - - def getMyIndex(self): - return self._myIndex - myIndex = property(getMyIndex, None, None, None) - - def getStartAddress(self): - return self._startAddress - def setStartAddress(self, value): - self._startAddress = value - startAddress = property(getStartAddress, setStartAddress, None, None) - - def getLength(self): - return self._length - def setLength(self, value): - self._length = value - length = property(getLength, setLength, None, None) - -class MobiPeriodical(object): - """ A container for a structured periodical """ - def __init__(self, myIndex): - self._myIndex = myIndex - self._sectionParents = [] - self._startAddress = 0xFFFFFFFF - self._length = 0xFFFFFFFF - self._firstSectionIndex = 0xFFFFFFFF - self._lastSectionIndex = 0xFFFFFFFF - self._myCtocMapIndex = 0 # Always first entry - - def getMyIndex(self): - return self._myIndex - def setMyIndex(self, value): - self._myIndex = value - myIndex = property(getMyIndex, setMyIndex, None, None) - - def getSectionParents(self): - return self._sectionParents - def setSectionParents(self, value): - self._sectionParents = value - sectionParents = property(getSectionParents, setSectionParents, None, None) - - def sectionCount(self): - return len(self._sectionParents) - - def getStartAddress(self): - return self._startAddress - def setStartAddress(self, value): - self._startAddress = value - startAddress = property(getStartAddress, setStartAddress, None, None) - - def getLength(self): - return self._length - def setLength(self, value): - self._length = value - length = property(getLength, setLength, None, None) - - def getFirstSectionIndex(self): - return self._firstSectionIndex - def setFirstSectionIndex(self, value): - self._firstSectionIndex = value - firstSectionIndex = property(getFirstSectionIndex, setFirstSectionIndex, None, None) - - def getLastSectionIndex(self): - return self._lastSectionIndex - def setLastSectionIndex(self, value): - self._lastSectionIndex = value - lastSectionIndex = property(getLastSectionIndex, setLastSectionIndex, None, None) - - def getMyCtocMapIndex(self): - return self._myCtocMapIndex - def setMyCtocMapIndex(self, value): - self._myCtocMapIndex = value - myCtocMapIndex = property(getMyCtocMapIndex, setMyCtocMapIndex, None, None) - - def addSectionParent(self, myIndex, ctoc_map_index): - # Create a new section parent - newSection = MobiSection(myIndex) - # Assign our index to the section - newSection.parentIndex = self._myIndex - # Assign section number - newSection.sectionIndex = len(self._sectionParents) - # Assign ctoc_map_index - newSection.myCtocMapIndex = ctoc_map_index - # Add it to the list - self._sectionParents.append(newSection) - return newSection - - def dumpInfo(self): - print "%20s:" % ("Periodical") - print "%20s: 0x%X" % ("myIndex", self.myIndex) - print "%20s: 0x%X" % ("startAddress", self.startAddress) - print "%20s: 0x%X" % ("length", self.length) - print "%20s: 0x%X" % ("myCtocMapIndex", self.myCtocMapIndex) - print "%20s: 0x%X" % ("firstSectionIndex", self.firstSectionIndex) - print "%20s: 0x%X" % ("lastSectionIndex", self.lastSectionIndex) - print "%20s: %d" % ("Number of Sections", len(self._sectionParents)) - for (count, section) in enumerate(self._sectionParents): - print "\t%20s: %d" % ("Section",count) - print "\t%20s: 0x%X" % ("startAddress", section.startAddress) - print "\t%20s: 0x%X" % ("length", section.sectionLength) - print "\t%20s: 0x%X" % ("parentIndex", section.parentIndex) - print "\t%20s: 0x%X" % ("myIndex", section.myIndex) - print "\t%20s: 0x%X" % ("firstArticleIndex", section.firstArticleIndex) - print "\t%20s: 0x%X" % ("lastArticleIndex", section.lastArticleIndex) - print "\t%20s: 0x%X" % ("articles", len(section.articles) ) - print "\t%20s: 0x%X" % ("myCtocMapIndex", section.myCtocMapIndex ) - print - for (artCount, article) in enumerate(section.articles) : - print "\t\t%20s: %d" % ("Article",artCount) - print "\t\t%20s: 0x%X" % ("startAddress", article.startAddress) - print "\t\t%20s: 0x%X" % ("length", article.articleLength) - print "\t\t%20s: 0x%X" % ("sectionIndex", article.sectionParentIndex) - print "\t\t%20s: 0x%X" % ("myIndex", article.myIndex) - print "\t\t%20s: 0x%X" % ("myCtocMapIndex", article.myCtocMapIndex) - print - -class MobiSection(object): - """ A container for periodical sections """ - def __init__(self, myMobiDoc): - self._myMobiDoc = myMobiDoc - self._myIndex = myMobiDoc.getNextNode() - self._parentIndex = 0xFFFFFFFF - self._firstArticleIndex = 0x00 - self._lastArticleIndex = 0x00 - self._startAddress = 0xFFFFFFFF - self._sectionLength = 0xFFFFFFFF - self._articles = [] - self._myCtocMapIndex = -1 - - def getMyMobiDoc(self): - return self._myMobiDoc - def setMyMobiDoc(self, value): - self._myMobiDoc = value - myMobiDoc = property(getMyMobiDoc, setMyMobiDoc, None, None) - - def getMyIndex(self): - return self._myIndex - def setMyIndex(self, value): - self._myIndex = value - myIndex = property(getMyIndex, setMyIndex, None, None) - - def getParentIndex(self): - return self._parentIndex - def setParentIndex(self, value): - self._parentIndex = value - parenIndex = property(getParentIndex, setParentIndex, None, None) - - def getFirstArticleIndex(self): - return self._firstArticleIndex - def setFirstArticleIndex(self, value): - self._firstArticleIndex = value - firstArticleIndex = property(getFirstArticleIndex, setFirstArticleIndex, None, None) - - def getLastArticleIndex(self): - return self._lastArticleIndex - def setLastArticleIndex(self, value): - self._lastArticleIndex = value - lastArticleIndex = property(getLastArticleIndex, setLastArticleIndex, None, None) - - def getStartAddress(self): - return self._startAddress - def setStartAddress(self, value): - self._startAddress = value - startAddress = property(getStartAddress, setStartAddress, None, None) - - def getSectionLength(self): - return self._sectionLength - def setSectionLength(self, value): - self._sectionLength = value - sectionLength = property(getSectionLength, setSectionLength, None, None) - - def getArticles(self): - return self._articles - def setArticles(self, value): - self._articles = value - articles = property(getArticles, setArticles, None, None) - - def getMyCtocMapIndex(self): - return self._myCtocMapIndex - def setMyCtocMapIndex(self, value): - self._myCtocMapIndex = value - myCtocMapIndex = property(getMyCtocMapIndex, setMyCtocMapIndex, None, None) - - def addArticle(self, article): - self._articles.append(article) - - # Adjust the Periodical parameters - # If this is the first article of the first section, init the values - if self.myIndex == 1 and len(self.articles) == 1 : - self.myMobiDoc.documentStructure.firstSectionIndex = self.myIndex - self.myMobiDoc.documentStructure.lastSectionIndex = self.myIndex - self.myMobiDoc.documentStructure.length = article.articleLength + \ - ( article.startAddress - self.myMobiDoc.documentStructure.startAddress) - else: - self.myMobiDoc.documentStructure.length += article.articleLength - - # Always set the highest section index to myIndex - self.myMobiDoc.documentStructure.lastSectionIndex = self.myIndex - - # Adjust the Section parameters - if len(self.articles) == 1 : - self.firstArticleIndex = article.myIndex - - if len(self.myMobiDoc.documentStructure.sectionParents) == 1 : - self.startAddress = self.myMobiDoc.documentStructure.startAddress - self.sectionLength = article.articleLength + \ - ( article.startAddress - self.myMobiDoc.documentStructure.startAddress ) - - else : - self.startAddress = article.startAddress - self.sectionLength = article.articleLength - - self.lastArticleIndex = article.myIndex - else : - self.lastArticleIndex = article.myIndex - - # Adjust the Section length - if len(self.articles) > 1 : - self.sectionLength += article.articleLength - -class MobiArticle(object): - """ A container for periodical articles """ - def __init__(self, sectionParent, startAddress, length, ctocMapIndex): - self._mySectionParent = sectionParent - self._myMobiDoc = sectionParent.myMobiDoc - self._myIndex = sectionParent.myMobiDoc.getNextNode() - self._myCtocMapIndex = ctocMapIndex - self._sectionParentIndex = sectionParent.myIndex - self._startAddress = startAddress - self._articleLength = length - - def getMySectionParent(self): - return self._mySectionParent - def setMySectionParent(self, value): - self._mySectionParent = value - mySectionParent = property(getMySectionParent, setMySectionParent, None, None) - - def getMyMobiDoc(self): - return self._myMobiDoc - def setMyMobiDoc(self, value): - self._myMobiDoc = value - myMobiDoc = property(getMyMobiDoc, setMyMobiDoc, None, None) - - def getMyIndex(self): - return self._myIndex - def setMyIndex(self, value): - self._sectionIndex = value - myIndex = property(getMyIndex, setMyIndex, None, None) - - def getSectionParentIndex(self): - return self._sectionParentIndex - def setSectionParentIndex(self, value): - self._sectionParentIndex = value - sectionParentIndex = property(getSectionParentIndex, setSectionParentIndex, None, None) - - def getStartAddress(self): - return self._startAddress - def setStartAddress(self, value): - self._startAddress = value - startAddress = property(getStartAddress, setStartAddress, None, None) - - def getArticleLength(self): - return self._articleLength - def setArticleLength(self, value): - self._articleLength = value - articleLength = property(getArticleLength, setArticleLength, None, None) - - def getMyCtocMapIndex(self): - return self._myCtocMapIndex - def setMyCtocMapIndex(self, value): - self._myCtocMapIndex = value - myCtocMapIndex = property(getMyCtocMapIndex, setMyCtocMapIndex, None, None) - diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py index 6f0c2b56e9..27f8bec54d 100644 --- a/src/calibre/ebooks/mobi/writer2/main.py +++ b/src/calibre/ebooks/mobi/writer2/main.py @@ -21,6 +21,7 @@ from calibre.ebooks.mobi.writer2 import (PALMDOC, UNCOMPRESSED, RECORD_SIZE) from calibre.ebooks.mobi.utils import (rescale_image, encint, encode_trailing_data, align_block, detect_periodical) from calibre.ebooks.mobi.writer2.indexer import Indexer +from calibre.ebooks.mobi import MAX_THUMB_DIMEN, MAX_THUMB_SIZE EXTH_CODES = { 'creator': 100, @@ -46,9 +47,6 @@ EXTH_CODES = { # Disabled as I dont care about uncrossable breaks WRITE_UNCROSSABLE_BREAKS = False -MAX_THUMB_SIZE = 16 * 1024 -MAX_THUMB_DIMEN = (180, 240) - class MobiWriter(object): COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+') diff --git a/src/calibre/gui2/convert/mobi_output.py b/src/calibre/gui2/convert/mobi_output.py index cd1d0430ae..bff9598e6e 100644 --- a/src/calibre/gui2/convert/mobi_output.py +++ b/src/calibre/gui2/convert/mobi_output.py @@ -21,7 +21,7 @@ class PluginWidget(Widget, Ui_Form): def __init__(self, parent, get_option, get_help, db=None, book_id=None): Widget.__init__(self, parent, - ['prefer_author_sort', 'rescale_images', 'toc_title', + ['prefer_author_sort', 'toc_title', 'mobi_ignore_margins', 'mobi_toc_at_start', 'dont_compress', 'no_inline_toc', 'share_not_sync', 'personal_doc']#, 'mobi_navpoints_only_deepest'] diff --git a/src/calibre/gui2/convert/mobi_output.ui b/src/calibre/gui2/convert/mobi_output.ui index 68cd55ab95..dd3fdd49be 100644 --- a/src/calibre/gui2/convert/mobi_output.ui +++ b/src/calibre/gui2/convert/mobi_output.ui @@ -6,7 +6,7 @@ <rect> <x>0</x> <y>0</y> - <width>521</width> + <width>588</width> <height>342</height> </rect> </property> @@ -14,48 +14,7 @@ <string>Form</string> </property> <layout class="QGridLayout" name="gridLayout"> - <item row="1" column="0"> - <widget class="QLabel" name="label"> - <property name="text"> - <string>&Title for Table of Contents:</string> - </property> - <property name="buddy"> - <cstring>opt_toc_title</cstring> - </property> - </widget> - </item> - <item row="1" column="1"> - <widget class="QLineEdit" name="opt_toc_title"/> - </item> - <item row="4" column="0" colspan="2"> - <widget class="QCheckBox" name="opt_rescale_images"> - <property name="text"> - <string>Rescale images for &Palm devices</string> - </property> - </widget> - </item> - <item row="5" column="0" colspan="2"> - <widget class="QCheckBox" name="opt_prefer_author_sort"> - <property name="text"> - <string>Use author &sort for author</string> - </property> - </widget> - </item> - <item row="6" column="0"> - <widget class="QCheckBox" name="opt_dont_compress"> - <property name="text"> - <string>Disable compression of the file contents</string> - </property> - </widget> - </item> - <item row="0" column="0"> - <widget class="QCheckBox" name="opt_no_inline_toc"> - <property name="text"> - <string>Do not add Table of Contents to book</string> - </property> - </widget> - </item> - <item row="8" column="0" colspan="2"> + <item row="7" column="0" colspan="2"> <widget class="QGroupBox" name="groupBox"> <property name="title"> <string>Kindle options</string> @@ -98,7 +57,7 @@ </layout> </widget> </item> - <item row="9" column="0"> + <item row="8" column="0"> <spacer name="verticalSpacer_2"> <property name="orientation"> <enum>Qt::Vertical</enum> @@ -125,6 +84,40 @@ </property> </widget> </item> + <item row="4" column="0" colspan="2"> + <widget class="QCheckBox" name="opt_prefer_author_sort"> + <property name="text"> + <string>Use author &sort for author</string> + </property> + </widget> + </item> + <item row="1" column="0"> + <widget class="QLabel" name="label"> + <property name="text"> + <string>&Title for Table of Contents:</string> + </property> + <property name="buddy"> + <cstring>opt_toc_title</cstring> + </property> + </widget> + </item> + <item row="1" column="1"> + <widget class="QLineEdit" name="opt_toc_title"/> + </item> + <item row="5" column="0"> + <widget class="QCheckBox" name="opt_dont_compress"> + <property name="text"> + <string>Disable compression of the file contents</string> + </property> + </widget> + </item> + <item row="0" column="0"> + <widget class="QCheckBox" name="opt_no_inline_toc"> + <property name="text"> + <string>Do not add Table of Contents to book</string> + </property> + </widget> + </item> </layout> </widget> <resources/>