mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-11-03 19:17:02 -05:00 
			
		
		
		
	Start work on new MOBI indexing implementation
This commit is contained in:
		
							parent
							
								
									eab57e4f82
								
							
						
					
					
						commit
						60f1f24e66
					
				@ -82,26 +82,6 @@ class MOBIOutput(OutputFormatPlugin):
 | 
			
		||||
        else:
 | 
			
		||||
            self.oeb.log.debug('Using mastheadImage supplied in manifest...')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def dump_toc(self, toc) :
 | 
			
		||||
        self.log( "\n         >>> TOC contents <<<")
 | 
			
		||||
        self.log( "     toc.title: %s" % toc.title)
 | 
			
		||||
        self.log( "      toc.href: %s" % toc.href)
 | 
			
		||||
        for periodical in toc.nodes :
 | 
			
		||||
            self.log( "\tperiodical title: %s" % periodical.title)
 | 
			
		||||
            self.log( "\t            href: %s" % periodical.href)
 | 
			
		||||
            for section in periodical :
 | 
			
		||||
                self.log( "\t\tsection title: %s" % section.title)
 | 
			
		||||
                self.log( "\t\tfirst article: %s" % section.href)
 | 
			
		||||
                for article in section :
 | 
			
		||||
                    self.log( "\t\t\tarticle title: %s" % repr(article.title))
 | 
			
		||||
                    self.log( "\t\t\t         href: %s" % article.href)
 | 
			
		||||
 | 
			
		||||
    def dump_manifest(self) :
 | 
			
		||||
        self.log( "\n         >>> Manifest entries <<<")
 | 
			
		||||
        for href in self.oeb.manifest.hrefs :
 | 
			
		||||
            self.log ("\t%s" % href)
 | 
			
		||||
 | 
			
		||||
    def periodicalize_toc(self):
 | 
			
		||||
        from calibre.ebooks.oeb.base import TOC
 | 
			
		||||
        toc = self.oeb.toc
 | 
			
		||||
@ -156,12 +136,6 @@ class MOBIOutput(OutputFormatPlugin):
 | 
			
		||||
            # Fix up the periodical href to point to first section href
 | 
			
		||||
            toc.nodes[0].href = toc.nodes[0].nodes[0].href
 | 
			
		||||
 | 
			
		||||
            # diagnostics
 | 
			
		||||
            if self.opts.verbose > 3:
 | 
			
		||||
                self.dump_toc(toc)
 | 
			
		||||
                self.dump_manifest()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def convert(self, oeb, output_path, input_plugin, opts, log):
 | 
			
		||||
        self.log, self.opts, self.oeb = log, opts, oeb
 | 
			
		||||
        from calibre.ebooks.mobi.mobiml import MobiMLizer
 | 
			
		||||
 | 
			
		||||
@ -177,3 +177,23 @@ def get_trailing_data(record, extra_data_flags):
 | 
			
		||||
            record = record[:-sz]
 | 
			
		||||
    return data, record
 | 
			
		||||
 | 
			
		||||
def encode_trailing_data(raw):
 | 
			
		||||
    '''
 | 
			
		||||
    Given some data in the bytestring raw, return a bytestring of the form
 | 
			
		||||
 | 
			
		||||
        <data><size>
 | 
			
		||||
 | 
			
		||||
    where size is a backwards encoded vwi whose value is the length of the
 | 
			
		||||
    entire return bytestring.
 | 
			
		||||
 | 
			
		||||
    This is the encoding used for trailing data entries at the end of text
 | 
			
		||||
    records. See get_trailing_data() for details.
 | 
			
		||||
    '''
 | 
			
		||||
    lsize = 1
 | 
			
		||||
    while True:
 | 
			
		||||
        encoded = encint(len(raw) + lsize, forward=False)
 | 
			
		||||
        if len(encoded) == lsize:
 | 
			
		||||
            break
 | 
			
		||||
        lsize += 1
 | 
			
		||||
    return raw + encoded
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -12,4 +12,5 @@ UNCOMPRESSED = 1
 | 
			
		||||
PALMDOC = 2
 | 
			
		||||
HUFFDIC = 17480
 | 
			
		||||
PALM_MAX_IMAGE_SIZE = 63 * 1024
 | 
			
		||||
RECORD_SIZE = 0x1000 # 4096 (Text record size (uncompressed))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										116
									
								
								src/calibre/ebooks/mobi/writer2/indexer.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										116
									
								
								src/calibre/ebooks/mobi/writer2/indexer.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,116 @@
 | 
			
		||||
#!/usr/bin/env python
 | 
			
		||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 | 
			
		||||
from __future__ import (unicode_literals, division, absolute_import,
 | 
			
		||||
                        print_function)
 | 
			
		||||
 | 
			
		||||
__license__   = 'GPL v3'
 | 
			
		||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 | 
			
		||||
__docformat__ = 'restructuredtext en'
 | 
			
		||||
 | 
			
		||||
from struct import pack
 | 
			
		||||
from cStringIO import StringIO
 | 
			
		||||
from collections import OrderedDict
 | 
			
		||||
 | 
			
		||||
from calibre.ebooks import normalize
 | 
			
		||||
from calibre.ebooks.mobi.utils import encint
 | 
			
		||||
 | 
			
		||||
def utf8_text(text):
 | 
			
		||||
    '''
 | 
			
		||||
    Convert a possibly null string to utf-8 bytes, guaranteeing to return a non
 | 
			
		||||
    empty, normalized bytestring.
 | 
			
		||||
    '''
 | 
			
		||||
    if text and text.strip():
 | 
			
		||||
        text = text.strip()
 | 
			
		||||
        if not isinstance(text, unicode):
 | 
			
		||||
            text = text.decode('utf-8', 'replace')
 | 
			
		||||
        text = normalize(text).encode('utf-8')
 | 
			
		||||
    else:
 | 
			
		||||
        text = _('Unknown').encode('utf-8')
 | 
			
		||||
    return text
 | 
			
		||||
 | 
			
		||||
def align_block(raw, multiple=4, pad=b'\0'):
 | 
			
		||||
    '''
 | 
			
		||||
    Return raw with enough pad bytes append to ensure its length is a multiple
 | 
			
		||||
    of 4.
 | 
			
		||||
    '''
 | 
			
		||||
    extra = len(raw) % multiple
 | 
			
		||||
    if extra == 0: return raw
 | 
			
		||||
    return raw + pad*(multiple - extra)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CNCX(object): # {{{
 | 
			
		||||
 | 
			
		||||
    '''
 | 
			
		||||
    Create the CNCX records. These are records containing all the strings from
 | 
			
		||||
    the NCX. Each record is of the form: <vwi string size><utf-8 encoded
 | 
			
		||||
    string>
 | 
			
		||||
    '''
 | 
			
		||||
 | 
			
		||||
    MAX_STRING_LENGTH = 500
 | 
			
		||||
 | 
			
		||||
    def __init__(self, toc, opts):
 | 
			
		||||
        self.strings = OrderedDict()
 | 
			
		||||
 | 
			
		||||
        for item in toc:
 | 
			
		||||
            if item is self.toc: continue
 | 
			
		||||
            label = item.title
 | 
			
		||||
            klass = item.klass
 | 
			
		||||
            if opts.mobi_periodical:
 | 
			
		||||
                if item.description:
 | 
			
		||||
                    self.strings[item.description] = 0
 | 
			
		||||
                if item.author:
 | 
			
		||||
                    self.string[item.author] = 0
 | 
			
		||||
            self.strings[label] = self.strings[klass] = 0
 | 
			
		||||
 | 
			
		||||
        self.records = []
 | 
			
		||||
 | 
			
		||||
        offset = 0
 | 
			
		||||
        buf = StringIO()
 | 
			
		||||
        for key in tuple(self.strings.iterkeys()):
 | 
			
		||||
            utf8 = utf8_text(key[:self.MAX_STRING_LENGTH])
 | 
			
		||||
            l = len(utf8)
 | 
			
		||||
            sz_bytes = encint(l)
 | 
			
		||||
            raw = sz_bytes + utf8
 | 
			
		||||
            if 0xfbf8 - buf.tell() < 6 + len(raw):
 | 
			
		||||
                # Records in PDB files cannot be larger than 0x10000, so we
 | 
			
		||||
                # stop well before that.
 | 
			
		||||
                pad = 0xfbf8 - self._ctoc.tell()
 | 
			
		||||
                buf.write(b'\0' * pad)
 | 
			
		||||
                self.records.append(buf.getvalue())
 | 
			
		||||
                buf.truncate(0)
 | 
			
		||||
                offset = len(self.records) * 0x10000
 | 
			
		||||
 | 
			
		||||
            self.strings[key] = offset
 | 
			
		||||
            offset += len(raw)
 | 
			
		||||
 | 
			
		||||
        buf.write(b'\0') # CNCX must end with zero byte
 | 
			
		||||
        self.records.append(align_block(buf.getvalue()))
 | 
			
		||||
 | 
			
		||||
    def __getitem__(self, string):
 | 
			
		||||
        return self.strings[string]
 | 
			
		||||
# }}}
 | 
			
		||||
 | 
			
		||||
class Indexer(object):
 | 
			
		||||
 | 
			
		||||
    def __init__(self, serializer, number_of_text_records, opts, oeb):
 | 
			
		||||
        self.serializer = serializer
 | 
			
		||||
        self.number_of_text_records = number_of_text_records
 | 
			
		||||
        self.oeb = oeb
 | 
			
		||||
        self.log = oeb.log
 | 
			
		||||
        self.opts = opts
 | 
			
		||||
 | 
			
		||||
        self.cncx = CNCX(oeb.toc, opts)
 | 
			
		||||
 | 
			
		||||
        self.records = []
 | 
			
		||||
 | 
			
		||||
    def create_header(self):
 | 
			
		||||
        buf = StringIO()
 | 
			
		||||
 | 
			
		||||
        # Ident
 | 
			
		||||
        buf.write(b'INDX')
 | 
			
		||||
 | 
			
		||||
        # Header length
 | 
			
		||||
        buf.write(pack(b'>I', 192))
 | 
			
		||||
 | 
			
		||||
        # Index type: 0 - normal, 2 - inflection
 | 
			
		||||
        buf.write(pack(b'>I', 2))
 | 
			
		||||
@ -17,8 +17,9 @@ from calibre.ebooks.mobi.writer2.serializer import Serializer
 | 
			
		||||
from calibre.ebooks.compression.palmdoc import compress_doc
 | 
			
		||||
from calibre.ebooks.mobi.langcodes import iana2mobi
 | 
			
		||||
from calibre.utils.filenames import ascii_filename
 | 
			
		||||
from calibre.ebooks.mobi.writer2 import PALMDOC, UNCOMPRESSED
 | 
			
		||||
from calibre.ebooks.mobi.utils import (rescale_image, encint)
 | 
			
		||||
from calibre.ebooks.mobi.writer2 import (PALMDOC, UNCOMPRESSED, RECORD_SIZE)
 | 
			
		||||
from calibre.ebooks.mobi.utils import (rescale_image, encint,
 | 
			
		||||
        encode_trailing_data)
 | 
			
		||||
 | 
			
		||||
EXTH_CODES = {
 | 
			
		||||
    'creator': 100,
 | 
			
		||||
@ -39,9 +40,6 @@ EXTH_CODES = {
 | 
			
		||||
# Disabled as I dont care about uncrossable breaks
 | 
			
		||||
WRITE_UNCROSSABLE_BREAKS = False
 | 
			
		||||
 | 
			
		||||
RECORD_SIZE = 0x1000 # 4096
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
MAX_THUMB_SIZE = 16 * 1024
 | 
			
		||||
MAX_THUMB_DIMEN = (180, 240)
 | 
			
		||||
 | 
			
		||||
@ -53,6 +51,7 @@ class MobiWriter(object):
 | 
			
		||||
        self.write_page_breaks_after_item = write_page_breaks_after_item
 | 
			
		||||
        self.compression = UNCOMPRESSED if opts.dont_compress else PALMDOC
 | 
			
		||||
        self.prefer_author_sort = opts.prefer_author_sort
 | 
			
		||||
        self.last_text_record_idx = 1
 | 
			
		||||
 | 
			
		||||
    def __call__(self, oeb, path_or_stream):
 | 
			
		||||
        if hasattr(path_or_stream, 'write'):
 | 
			
		||||
@ -79,9 +78,44 @@ class MobiWriter(object):
 | 
			
		||||
    def generate_content(self):
 | 
			
		||||
        self.map_image_names()
 | 
			
		||||
        self.generate_text()
 | 
			
		||||
        # Image records come after text records
 | 
			
		||||
        # Index records come after text records
 | 
			
		||||
        self.generate_index()
 | 
			
		||||
        self.write_uncrossable_breaks()
 | 
			
		||||
        # Image records come after index records
 | 
			
		||||
        self.generate_images()
 | 
			
		||||
 | 
			
		||||
    # Indexing {{{
 | 
			
		||||
    def generate_index(self):
 | 
			
		||||
        self.primary_index_record_idx = None
 | 
			
		||||
    # }}}
 | 
			
		||||
 | 
			
		||||
    def write_uncrossable_breaks(self): # {{{
 | 
			
		||||
        '''
 | 
			
		||||
        Write information about uncrossable breaks (non linear items in
 | 
			
		||||
        the spine.
 | 
			
		||||
        '''
 | 
			
		||||
        if not WRITE_UNCROSSABLE_BREAKS:
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        breaks = self.serializer.breaks
 | 
			
		||||
 | 
			
		||||
        for i in xrange(1, self.last_text_record_idx+1):
 | 
			
		||||
            offset = i * RECORD_SIZE
 | 
			
		||||
            pbreak = 0
 | 
			
		||||
            running = offset
 | 
			
		||||
 | 
			
		||||
            buf = StringIO()
 | 
			
		||||
 | 
			
		||||
            while breaks and (breaks[0] - offset) < RECORD_SIZE:
 | 
			
		||||
                pbreak = (breaks.pop(0) - running) >> 3
 | 
			
		||||
                encoded = encint(pbreak)
 | 
			
		||||
                buf.write(encoded)
 | 
			
		||||
                running += pbreak << 3
 | 
			
		||||
            encoded = encode_trailing_data(buf.getvalue())
 | 
			
		||||
            self.records[i] += encoded
 | 
			
		||||
    # }}}
 | 
			
		||||
 | 
			
		||||
    # Images {{{
 | 
			
		||||
    def map_image_names(self):
 | 
			
		||||
        '''
 | 
			
		||||
        Map image names to record indices, ensuring that the masthead image if
 | 
			
		||||
@ -120,23 +154,38 @@ class MobiWriter(object):
 | 
			
		||||
            if self.first_image_record is None:
 | 
			
		||||
                self.first_image_record = len(self.records) - 1
 | 
			
		||||
 | 
			
		||||
    def add_thumbnail(self, item):
 | 
			
		||||
        try:
 | 
			
		||||
            data = rescale_image(item.data, dimen=MAX_THUMB_DIMEN,
 | 
			
		||||
                    maxsizeb=MAX_THUMB_SIZE)
 | 
			
		||||
        except IOError:
 | 
			
		||||
            self.oeb.logger.warn('Bad image file %r' % item.href)
 | 
			
		||||
            return None
 | 
			
		||||
        manifest = self.oeb.manifest
 | 
			
		||||
        id, href = manifest.generate('thumbnail', 'thumbnail.jpeg')
 | 
			
		||||
        manifest.add(id, href, 'image/jpeg', data=data)
 | 
			
		||||
        index = len(self.images) + 1
 | 
			
		||||
        self.images[href] = index
 | 
			
		||||
        self.records.append(data)
 | 
			
		||||
        return index
 | 
			
		||||
 | 
			
		||||
    # }}}
 | 
			
		||||
 | 
			
		||||
    # Text {{{
 | 
			
		||||
 | 
			
		||||
    def generate_text(self):
 | 
			
		||||
        self.oeb.logger.info('Serializing markup content...')
 | 
			
		||||
        serializer = Serializer(self.oeb, self.images,
 | 
			
		||||
        self.serializer = Serializer(self.oeb, self.images,
 | 
			
		||||
                write_page_breaks_after_item=self.write_page_breaks_after_item)
 | 
			
		||||
        text = serializer()
 | 
			
		||||
        breaks = serializer.breaks
 | 
			
		||||
        self.anchor_offset_kindle = serializer.anchor_offset_kindle
 | 
			
		||||
        self.id_offsets = serializer.id_offsets
 | 
			
		||||
        text = self.serializer()
 | 
			
		||||
        self.content_length = len(text)
 | 
			
		||||
        self.text_length = len(text)
 | 
			
		||||
        text = StringIO(text)
 | 
			
		||||
        buf = []
 | 
			
		||||
        nrecords = 0
 | 
			
		||||
        offset = 0
 | 
			
		||||
 | 
			
		||||
        if self.compression != UNCOMPRESSED:
 | 
			
		||||
            self.oeb.logger.info('  Compressing markup content...')
 | 
			
		||||
 | 
			
		||||
        data, overlap = self.read_text_record(text)
 | 
			
		||||
 | 
			
		||||
        while len(data) > 0:
 | 
			
		||||
@ -146,39 +195,15 @@ class MobiWriter(object):
 | 
			
		||||
            record.write(data)
 | 
			
		||||
 | 
			
		||||
            self.records.append(record.getvalue())
 | 
			
		||||
            buf.append(self.records[-1])
 | 
			
		||||
            nrecords += 1
 | 
			
		||||
            offset += RECORD_SIZE
 | 
			
		||||
            data, overlap = self.read_text_record(text)
 | 
			
		||||
 | 
			
		||||
            # Write information about the mutibyte character overlap, if any
 | 
			
		||||
            record.write(overlap)
 | 
			
		||||
            record.write(pack(b'>B', len(overlap)))
 | 
			
		||||
 | 
			
		||||
            # Write information about uncrossable breaks (non linear items in
 | 
			
		||||
            # the spine)
 | 
			
		||||
            if WRITE_UNCROSSABLE_BREAKS:
 | 
			
		||||
                nextra = 0
 | 
			
		||||
                pbreak = 0
 | 
			
		||||
                running = offset
 | 
			
		||||
 | 
			
		||||
                # Write information about every uncrossable break that occurs in
 | 
			
		||||
                # the next record.
 | 
			
		||||
                while breaks and (breaks[0] - offset) < RECORD_SIZE:
 | 
			
		||||
                    pbreak = (breaks.pop(0) - running) >> 3
 | 
			
		||||
                    encoded = encint(pbreak)
 | 
			
		||||
                    record.write(encoded)
 | 
			
		||||
                    running += pbreak << 3
 | 
			
		||||
                    nextra += len(encoded)
 | 
			
		||||
                lsize = 1
 | 
			
		||||
                while True:
 | 
			
		||||
                    size = encint(nextra + lsize, forward=False)
 | 
			
		||||
                    if len(size) == lsize:
 | 
			
		||||
                        break
 | 
			
		||||
                    lsize += 1
 | 
			
		||||
                record.write(size)
 | 
			
		||||
 | 
			
		||||
        self.text_nrecords = nrecords + 1
 | 
			
		||||
        self.last_text_record_idx = nrecords
 | 
			
		||||
 | 
			
		||||
    def read_text_record(self, text):
 | 
			
		||||
        '''
 | 
			
		||||
@ -230,25 +255,31 @@ class MobiWriter(object):
 | 
			
		||||
 | 
			
		||||
        return data, overlap
 | 
			
		||||
 | 
			
		||||
    def generate_end_records(self):
 | 
			
		||||
        self.flis_number = len(self.records)
 | 
			
		||||
        self.records.append('\xE9\x8E\x0D\x0A')
 | 
			
		||||
    # }}}
 | 
			
		||||
 | 
			
		||||
    def generate_record0(self): # {{{
 | 
			
		||||
    def generate_record0(self): #  MOBI header {{{
 | 
			
		||||
        metadata = self.oeb.metadata
 | 
			
		||||
        exth = self.build_exth()
 | 
			
		||||
        last_content_record = len(self.records) - 1
 | 
			
		||||
 | 
			
		||||
        # EOF record
 | 
			
		||||
        self.records.append('\xE9\x8E\x0D\x0A')
 | 
			
		||||
 | 
			
		||||
        self.generate_end_records()
 | 
			
		||||
 | 
			
		||||
        record0 = StringIO()
 | 
			
		||||
        # The PalmDOC Header
 | 
			
		||||
        record0.write(pack(b'>HHIHHHH', self.compression, 0,
 | 
			
		||||
            self.text_length,
 | 
			
		||||
            self.text_nrecords-1, RECORD_SIZE, 0, 0)) # 0 - 15 (0x0 - 0xf)
 | 
			
		||||
        # The MOBI Header
 | 
			
		||||
        record0.write(pack(b'>HHIHHHH',
 | 
			
		||||
            self.compression, # compression type # compression type
 | 
			
		||||
            0, # Unused
 | 
			
		||||
            self.text_length, # Text length
 | 
			
		||||
            self.last_text_record_idx, # Number of text records or last tr idx
 | 
			
		||||
            RECORD_SIZE, # Text record size
 | 
			
		||||
            0, # Unused
 | 
			
		||||
            0  # Unused
 | 
			
		||||
        )) # 0 - 15 (0x0 - 0xf)
 | 
			
		||||
        uid = random.randint(0, 0xffffffff)
 | 
			
		||||
        title = normalize(unicode(metadata.title[0])).encode('utf-8')
 | 
			
		||||
        # The MOBI Header
 | 
			
		||||
 | 
			
		||||
        # 0x0 - 0x3
 | 
			
		||||
        record0.write(b'MOBI')
 | 
			
		||||
@ -270,7 +301,6 @@ class MobiWriter(object):
 | 
			
		||||
        # 0x18 - 0x1f : Unknown
 | 
			
		||||
        record0.write(b'\xff' * 8)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        # 0x20 - 0x23 : Secondary index record
 | 
			
		||||
        record0.write(pack(b'>I', 0xffffffff))
 | 
			
		||||
 | 
			
		||||
@ -279,7 +309,7 @@ class MobiWriter(object):
 | 
			
		||||
 | 
			
		||||
        # 0x40 - 0x43 : Offset of first non-text record
 | 
			
		||||
        record0.write(pack(b'>I',
 | 
			
		||||
            self.text_nrecords + 1))
 | 
			
		||||
            self.last_text_record_idx + 1))
 | 
			
		||||
 | 
			
		||||
        # 0x44 - 0x4b : title offset, title length
 | 
			
		||||
        record0.write(pack(b'>II',
 | 
			
		||||
@ -289,7 +319,7 @@ class MobiWriter(object):
 | 
			
		||||
        record0.write(iana2mobi(
 | 
			
		||||
            str(metadata.language[0])))
 | 
			
		||||
 | 
			
		||||
        # 0x50 - 0x57 : Unknown
 | 
			
		||||
        # 0x50 - 0x57 : Input language and Output language
 | 
			
		||||
        record0.write(b'\0' * 8)
 | 
			
		||||
 | 
			
		||||
        # 0x58 - 0x5b : Format version
 | 
			
		||||
@ -348,19 +378,20 @@ class MobiWriter(object):
 | 
			
		||||
 | 
			
		||||
        # 0xe0 - 0xe3 : Extra record data
 | 
			
		||||
        # Extra record data flags:
 | 
			
		||||
        #   - 0x1: <extra multibyte bytes><size> (?)
 | 
			
		||||
        #   - 0x2: <TBS indexing description of this HTML record><size> GR
 | 
			
		||||
        #   - 0x4: <uncrossable breaks><size>
 | 
			
		||||
        # GR: Use 7 for indexed files, 5 for unindexed
 | 
			
		||||
        #   - 0b1  : <extra multibyte bytes><size>
 | 
			
		||||
        #   - 0b10 : <TBS indexing description of this HTML record><size>
 | 
			
		||||
        #   - 0b100: <uncrossable breaks><size>
 | 
			
		||||
        # Setting bit 2 (0x2) disables <guide><reference type="start"> functionality
 | 
			
		||||
 | 
			
		||||
        extra_data_flags = 0b1 # Has multibyte overlap bytes
 | 
			
		||||
        if self.primary_index_record_idx is not None:
 | 
			
		||||
            extra_data_flags |= 0b10
 | 
			
		||||
        if WRITE_UNCROSSABLE_BREAKS:
 | 
			
		||||
            extra_data_flags |= 0b100
 | 
			
		||||
        record0.write(pack(b'>I', extra_data_flags))
 | 
			
		||||
 | 
			
		||||
        # 0xe4 - 0xe7 : Primary index record
 | 
			
		||||
        record0.write(pack(b'>I', 0xffffffff))
 | 
			
		||||
        record0.write(pack(b'>I', 0xffffffff if self.primary_index_record_idx
 | 
			
		||||
            is None else self.primary_index_record_idx))
 | 
			
		||||
 | 
			
		||||
        record0.write(exth)
 | 
			
		||||
        record0.write(title)
 | 
			
		||||
@ -371,7 +402,7 @@ class MobiWriter(object):
 | 
			
		||||
        self.records[0] = record0
 | 
			
		||||
    # }}}
 | 
			
		||||
 | 
			
		||||
    def build_exth(self): # {{{
 | 
			
		||||
    def build_exth(self): # EXTH Header {{{
 | 
			
		||||
        oeb = self.oeb
 | 
			
		||||
        exth = StringIO()
 | 
			
		||||
        nrecs = 0
 | 
			
		||||
@ -467,22 +498,10 @@ class MobiWriter(object):
 | 
			
		||||
        return b''.join(exth)
 | 
			
		||||
    # }}}
 | 
			
		||||
 | 
			
		||||
    def add_thumbnail(self, item):
 | 
			
		||||
        try:
 | 
			
		||||
            data = rescale_image(item.data, dimen=MAX_THUMB_DIMEN,
 | 
			
		||||
                    maxsizeb=MAX_THUMB_SIZE)
 | 
			
		||||
        except IOError:
 | 
			
		||||
            self.oeb.logger.warn('Bad image file %r' % item.href)
 | 
			
		||||
            return None
 | 
			
		||||
        manifest = self.oeb.manifest
 | 
			
		||||
        id, href = manifest.generate('thumbnail', 'thumbnail.jpeg')
 | 
			
		||||
        manifest.add(id, href, 'image/jpeg', data=data)
 | 
			
		||||
        index = len(self.images) + 1
 | 
			
		||||
        self.images[href] = index
 | 
			
		||||
        self.records.append(data)
 | 
			
		||||
        return index
 | 
			
		||||
 | 
			
		||||
    def write_header(self):
 | 
			
		||||
    def write_header(self): # PalmDB header {{{
 | 
			
		||||
        '''
 | 
			
		||||
        Write the PalmDB header
 | 
			
		||||
        '''
 | 
			
		||||
        title = ascii_filename(unicode(self.oeb.metadata.title[0]))
 | 
			
		||||
        title = title + (b'\0' * (32 - len(title)))
 | 
			
		||||
        now = int(time.time())
 | 
			
		||||
@ -494,6 +513,7 @@ class MobiWriter(object):
 | 
			
		||||
            self.write(pack(b'>I', offset), b'\0', pack(b'>I', 2*i)[1:])
 | 
			
		||||
            offset += len(record)
 | 
			
		||||
        self.write(b'\0\0')
 | 
			
		||||
    # }}}
 | 
			
		||||
 | 
			
		||||
    def write_content(self):
 | 
			
		||||
        for record in self.records:
 | 
			
		||||
 | 
			
		||||
@ -138,7 +138,7 @@ class Serializer(object):
 | 
			
		||||
        buf = self.buf
 | 
			
		||||
        self.anchor_offset = buf.tell()
 | 
			
		||||
        buf.write(b'<body>')
 | 
			
		||||
        self.anchor_offset_kindle = buf.tell()
 | 
			
		||||
        self.body_start_offset = buf.tell()
 | 
			
		||||
        spine = [item for item in self.oeb.spine if item.linear]
 | 
			
		||||
        spine.extend([item for item in self.oeb.spine if not item.linear])
 | 
			
		||||
        for item in spine:
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user