Start refactoring of MobiWriter

2025-08-30 23:00:21 -04:00 · 2011-07-16 22:50:35 -06:00 · 2011-07-16 22:50:35 -06:00 · 4805fa7c77
commit 4805fa7c77
parent 48929a4dbd
4 changed files with 846 additions and 1 deletions
--- a/src/calibre/ebooks/mobi/output.py
+++ b/src/calibre/ebooks/mobi/output.py
@ -184,7 +184,12 @@ class MOBIOutput(OutputFormatPlugin):
        mobimlizer(oeb, opts)
        self.check_for_periodical()
        write_page_breaks_after_item = input_plugin is not plugin_for_input_format('cbz')
-        from calibre.ebooks.mobi.writer import MobiWriter
+        from calibre.utils.config import tweaks
        if tweaks.get('new_mobi_writer', False):
            from calibre.ebooks.mobi.writer2.main import MobiWriter
            MobiWriter
        else:
            from calibre.ebooks.mobi.writer import MobiWriter
        writer = MobiWriter(opts,
                        write_page_breaks_after_item=write_page_breaks_after_item)
        writer(oeb, output_path)
--- a/src/calibre/ebooks/mobi/writer2/init.py
+++ b/src/calibre/ebooks/mobi/writer2/init.py
@ -0,0 +1,15 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 UNCOMPRESSED = 1
 PALMDOC = 2
 HUFFDIC = 17480
 PALM_MAX_IMAGE_SIZE = 63 * 1024
--- a/src/calibre/ebooks/mobi/writer2/main.py
+++ b/src/calibre/ebooks/mobi/writer2/main.py
@ -0,0 +1,579 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import re, random, time
 from cStringIO import StringIO
 from struct import pack
 from calibre.ebooks import normalize
 from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
 from calibre.ebooks.mobi.writer2.serializer import Serializer
 from calibre.ebooks.compression.palmdoc import compress_doc
 from calibre.utils.magick.draw import Image, save_cover_data_to, thumbnail
 from calibre.ebooks.mobi.langcodes import iana2mobi
 from calibre.utils.filenames import ascii_filename
 from calibre.ebooks.mobi.writer2 import PALMDOC, UNCOMPRESSED
 EXTH_CODES = {
    'creator': 100,
    'publisher': 101,
    'description': 103,
    'identifier': 104,
    'subject': 105,
    'pubdate': 106,
    'date': 106,
    'review': 107,
    'contributor': 108,
    'rights': 109,
    'type': 111,
    'source': 112,
    'title': 503,
    }
 # Disabled as I dont care about uncrossable breaks
 WRITE_UNCROSSABLE_BREAKS = False
 RECORD_SIZE = 0x1000 # 4096
 IMAGE_MAX_SIZE = 10 * 1024 * 1024
 MAX_THUMB_SIZE = 16 * 1024
 MAX_THUMB_DIMEN = (180, 240)
 # Almost like the one for MS LIT, but not quite.
 DECINT_FORWARD = 0
 DECINT_BACKWARD = 1
 def decint(value, direction):
    '''
    Some parts of the Mobipocket format encode data as variable-width integers.
    These integers are represented big-endian with 7 bits per byte in bits 1-7.
    They may be either forward-encoded, in which case only the LSB has bit 8 set,
    or backward-encoded, in which case only the MSB has bit 8 set.
    For example, the number 0x11111 would be represented forward-encoded as:
        0x04 0x22 0x91
    And backward-encoded as:
        0x84 0x22 0x11
    This function encodes the integer ``value`` as a variable width integer and
    returns the bytestring corresponding to it.
    '''
    # Encode vwi
    byts = bytearray()
    while True:
        b = value & 0x7f
        value >>= 7
        byts.append(b)
        if value == 0:
            break
    if direction == DECINT_FORWARD:
        byts[0] |= 0x80
    elif direction == DECINT_BACKWARD:
        byts[-1] |= 0x80
    return bytes(byts)
 def rescale_image(data, maxsizeb=IMAGE_MAX_SIZE, dimen=None):
    '''
    Convert image setting all transparent pixels to white and changing format
    to JPEG. Ensure the resultant image has a byte size less than
    maxsizeb.
    If dimen is not None, generate a thumbnail of width=dimen, height=dimen
    Returns the image as a bytestring
    '''
    if dimen is not None:
        data = thumbnail(data, width=dimen, height=dimen,
                compression_quality=90)[-1]
    else:
        # Replace transparent pixels with white pixels and convert to JPEG
        data = save_cover_data_to(data, 'img.jpg', return_data=True)
    if len(data) <= maxsizeb:
        return data
    orig_data = data
    img = Image()
    quality = 95
    img.load(data)
    while len(data) >= maxsizeb and quality >= 10:
        quality -= 5
        img.set_compression_quality(quality)
        data = img.export('jpg')
    if len(data) <= maxsizeb:
        return data
    orig_data = data
    scale = 0.9
    while len(data) >= maxsizeb and scale >= 0.05:
        img = Image()
        img.load(orig_data)
        w, h = img.size
        img.size = (int(scale*w), int(scale*h))
        img.set_compression_quality(quality)
        data = img.export('jpg')
        scale -= 0.05
    return data
 class MobiWriter(object):
    COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
    def __init__(self, opts, write_page_breaks_after_item=True):
        self.opts = opts
        self.write_page_breaks_after_item = write_page_breaks_after_item
        self.compression = UNCOMPRESSED if opts.dont_compress else PALMDOC
        self.prefer_author_sort = opts.prefer_author_sort
    def __call__(self, oeb, path_or_stream):
        if hasattr(path_or_stream, 'write'):
            return self.dump_stream(oeb, path_or_stream)
        with open(path_or_stream, 'w+b') as stream:
            return self.dump_stream(oeb, stream)
    def write(self, *args):
        for datum in args:
            self.stream.write(datum)
    def tell(self):
        return self.stream.tell()
    def dump_stream(self, oeb, stream):
        self.oeb = oeb
        self.stream = stream
        self.records = [None]
        self.generate_content()
        self.generate_record0()
        self.write_header()
        self.write_content()
    def generate_content(self):
        self.map_image_names()
        self.generate_text()
        # Image records come after text records
        self.generate_images()
    def map_image_names(self):
        '''
        Map image names to record indices, ensuring that the masthead image if
        present has index number 1.
        '''
        index = 1
        self.images = images = {}
        mh_href = None
        if 'masthead' in self.oeb.guide:
            mh_href = self.oeb.guide['masthead'].href
            images[mh_href] = 1
            index += 1
        for item in self.oeb.manifest.values():
            if item.media_type in OEB_RASTER_IMAGES:
                if item.href == mh_href: continue
                images[item.href] = index
                index += 1
    def generate_images(self):
        self.oeb.logger.info('Serializing images...')
        images = [(index, href) for href, index in self.images.iteritems()]
        images.sort()
        self.first_image_record = None
        for _, href in images:
            item = self.oeb.manifest.hrefs[href]
            try:
                data = rescale_image(item.data)
            except:
                self.oeb.logger.warn('Bad image file %r' % item.href)
                continue
            finally:
                item.unload_data_from_memory()
            self.records.append(data)
            if self.first_image_record is None:
                self.first_image_record = len(self.records) - 1
    def generate_text(self):
        self.oeb.logger.info('Serializing markup content...')
        serializer = Serializer(self.oeb, self.images,
                write_page_breaks_after_item=self.write_page_breaks_after_item)
        text = serializer()
        breaks = serializer.breaks
        self.anchor_offset_kindle = serializer.anchor_offset_kindle
        self.id_offsets = serializer.id_offsets
        self.content_length = len(text)
        self.text_length = len(text)
        text = StringIO(text)
        buf = []
        nrecords = 0
        offset = 0
        if self.compression != UNCOMPRESSED:
            self.oeb.logger.info('  Compressing markup content...')
        data, overlap = self.read_text_record(text)
        while len(data) > 0:
            if self.compression == PALMDOC:
                data = compress_doc(data)
            record = StringIO()
            record.write(data)
            self.records.append(record.getvalue())
            buf.append(self.records[-1])
            nrecords += 1
            offset += RECORD_SIZE
            data, overlap = self.read_text_record(text)
            # Write information about the mutibyte character overlap, if any
            record.write(overlap)
            record.write(pack('>B', len(overlap)))
            # Write information about uncrossable breaks (non linear items in
            # the spine)
            if WRITE_UNCROSSABLE_BREAKS:
                nextra = 0
                pbreak = 0
                running = offset
                # Write information about every uncrossable break that occurs in
                # the next record.
                while breaks and (breaks[0] - offset) < RECORD_SIZE:
                    pbreak = (breaks.pop(0) - running) >> 3
                    encoded = decint(pbreak, DECINT_FORWARD)
                    record.write(encoded)
                    running += pbreak << 3
                    nextra += len(encoded)
                lsize = 1
                while True:
                    size = decint(nextra + lsize, DECINT_BACKWARD)
                    if len(size) == lsize:
                        break
                    lsize += 1
                record.write(size)
        self.text_nrecords = nrecords + 1
    def read_text_record(self, text):
        '''
        Return a Palmdoc record of size RECORD_SIZE from the text file object.
        In case the record ends in the middle of a multibyte character return
        the overlap as well.
        Returns data, overlap: where both are byte strings. overlap is the
        extra bytes needed to complete the truncated multibyte character.
        '''
        opos = text.tell()
        text.seek(0, 2)
        # npos is the position of the next record
        npos = min((opos + RECORD_SIZE, text.tell()))
        # Number of bytes from the next record needed to complete the last
        # character in this record
        extra = 0
        last = b''
        while not last.decode('utf-8', 'ignore'):
            # last contains no valid utf-8 characters
            size = len(last) + 1
            text.seek(npos - size)
            last = text.read(size)
        # last now has one valid utf-8 char and possibly some bytes that belong
        # to a truncated char
        try:
            last.decode('utf-8', 'strict')
        except UnicodeDecodeError:
            # There are some truncated bytes in last
            prev = len(last)
            while True:
                text.seek(npos - prev)
                last = text.read(len(last) + 1)
                try:
                    last.decode('utf-8')
                except UnicodeDecodeError:
                    pass
                else:
                    break
            extra = len(last) - prev
        text.seek(opos)
        data = text.read(RECORD_SIZE)
        overlap = text.read(extra)
        text.seek(npos)
        return data, overlap
    def generate_end_records(self):
        self.flis_number = len(self.records)
        self.records.append('\xE9\x8E\x0D\x0A')
    def generate_record0(self): # {{{
        metadata = self.oeb.metadata
        exth = self.build_exth()
        last_content_record = len(self.records) - 1
        self.generate_end_records()
        record0 = StringIO()
        # The PalmDOC Header
        record0.write(pack('>HHIHHHH', self.compression, 0,
            self.text_length,
            self.text_nrecords-1, RECORD_SIZE, 0, 0)) # 0 - 15 (0x0 - 0xf)
        uid = random.randint(0, 0xffffffff)
        title = normalize(unicode(metadata.title[0])).encode('utf-8')
        # The MOBI Header
        # 0x0 - 0x3
        record0.write(b'MOBI')
        # 0x4 - 0x7   : Length of header
        # 0x8 - 0x11  : MOBI type
        #   type    meaning
        #   0x002   MOBI book (chapter - chapter navigation)
        #   0x101   News - Hierarchical navigation with sections and articles
        #   0x102   News feed - Flat navigation
        #   0x103   News magazine - same as 0x101
        # 0xC - 0xF   : Text encoding (65001 is utf-8)
        # 0x10 - 0x13 : UID
        # 0x14 - 0x17 : Generator version
        record0.write(pack('>IIIII',
            0xe8, 0x002, 65001, uid, 6))
        # 0x18 - 0x1f : Unknown
        record0.write(b'\xff' * 8)
        # 0x20 - 0x23 : Secondary index record
        record0.write(pack('>I', 0xffffffff))
        # 0x24 - 0x3f : Unknown
        record0.write(b'\xff' * 28)
        # 0x40 - 0x43 : Offset of first non-text record
        record0.write(pack('>I',
            self.text_nrecords + 1))
        # 0x44 - 0x4b : title offset, title length
        record0.write(pack('>II',
            0xe8 + 16 + len(exth), len(title)))
        # 0x4c - 0x4f : Language specifier
        record0.write(iana2mobi(
            str(metadata.language[0])))
        # 0x50 - 0x57 : Unknown
        record0.write(b'\0' * 8)
        # 0x58 - 0x5b : Format version
        # 0x5c - 0x5f : First image record number
        record0.write(pack('>II',
            6, self.first_image_record if self.first_image_record else 0))
        # 0x60 - 0x63 : First HUFF/CDIC record number
        # 0x64 - 0x67 : Number of HUFF/CDIC records
        # 0x68 - 0x6b : First DATP record number
        # 0x6c - 0x6f : Number of DATP records
        record0.write(b'\0' * 16)
        # 0x70 - 0x73 : EXTH flags
        record0.write(pack('>I', 0x50))
        # 0x74 - 0x93 : Unknown
        record0.write(b'\0' * 32)
        # 0x94 - 0x97 : DRM offset
        # 0x98 - 0x9b : DRM count
        # 0x9c - 0x9f : DRM size
        # 0xa0 - 0xa3 : DRM flags
        record0.write(pack('>IIII',
            0xffffffff, 0xffffffff, 0, 0))
        # 0xa4 - 0xaf : Unknown
        record0.write(b'\0'*12)
        # 0xb0 - 0xb1 : First content record number
        # 0xb2 - 0xb3 : last content record number
        # (Includes Image, DATP, HUFF, DRM)
        record0.write(pack('>HH', 1, last_content_record))
        # 0xb4 - 0xb7 : Unknown
        record0.write(b'\0\0\0\x01')
        # 0xb8 - 0xbb : FCIS record number
        record0.write(pack('>I', 0xffffffff))
        # 0xbc - 0xbf : Unknown (FCIS record count?)
        record0.write(pack('>I', 0xffffffff))
        # 0xc0 - 0xc3 : FLIS record number
        record0.write(pack('>I', 0xffffffff))
        # 0xc4 - 0xc7 : Unknown (FLIS record count?)
        record0.write(pack('>I', 1))
        # 0xc8 - 0xcf : Unknown
        record0.write(b'\0'*8)
        # 0xd0 - 0xdf : Unknown
        record0.write(pack('>IIII', 0xffffffff, 0, 0xffffffff, 0xffffffff))
        # 0xe0 - 0xe3 : Extra record data
        # Extra record data flags:
        #   - 0x1: <extra multibyte bytes><size> (?)
        #   - 0x2: <TBS indexing description of this HTML record><size> GR
        #   - 0x4: <uncrossable breaks><size>
        # GR: Use 7 for indexed files, 5 for unindexed
        # Setting bit 2 (0x4) disables <guide><reference type="start"> functionality
        trailingDataFlags = 1
        if WRITE_UNCROSSABLE_BREAKS:
            trailingDataFlags |= 4
        record0.write(pack('>I', trailingDataFlags))
        # 0xe4 - 0xe7 : Primary index record
        record0.write(pack('>I', 0xffffffff))
        record0.write(exth)
        record0.write(title)
        record0 = record0.getvalue()
        # Add some buffer so that Amazon can add encryption information if this
        # MOBI is submitted for publication
        record0 += (b'\0' * (1024*8))
        self.records[0] = record0
    # }}}
    def build_exth(self): # {{{
        oeb = self.oeb
        exth = StringIO()
        nrecs = 0
        for term in oeb.metadata:
            if term not in EXTH_CODES: continue
            code = EXTH_CODES[term]
            items = oeb.metadata[term]
            if term == 'creator':
                if self.prefer_author_sort:
                    creators = [normalize(unicode(c.file_as or c)) for c in items]
                else:
                    creators = [normalize(unicode(c)) for c in items]
                items = ['; '.join(creators)]
            for item in items:
                data = self.COLLAPSE_RE.sub(' ', normalize(unicode(item)))
                if term == 'identifier':
                    if data.lower().startswith('urn:isbn:'):
                        data = data[9:]
                    elif item.scheme.lower() == 'isbn':
                        pass
                    else:
                        continue
                data = data.encode('utf-8')
                exth.write(pack('>II', code, len(data) + 8))
                exth.write(data)
                nrecs += 1
            if term == 'rights' :
                try:
                    rights = normalize(unicode(oeb.metadata.rights[0])).encode('utf-8')
                except:
                    rights = b'Unknown'
                exth.write(pack('>II', EXTH_CODES['rights'], len(rights) + 8))
                exth.write(rights)
                nrecs += 1
        # Write UUID as ASIN
        uuid = None
        from calibre.ebooks.oeb.base import OPF
        for x in oeb.metadata['identifier']:
            if (x.get(OPF('scheme'), None).lower() == 'uuid' or
                    unicode(x).startswith('urn:uuid:')):
                uuid = unicode(x).split(':')[-1]
                break
        if uuid is None:
            from uuid import uuid4
            uuid = str(uuid4())
        if isinstance(uuid, unicode):
            uuid = uuid.encode('utf-8')
        exth.write(pack('>II', 113, len(uuid) + 8))
        exth.write(uuid)
        nrecs += 1
        # Write cdetype
        if not self.opts.mobi_periodical:
            data = b'EBOK'
            exth.write(pack('>II', 501, len(data)+8))
            exth.write(data)
            nrecs += 1
        # Add a publication date entry
        if oeb.metadata['date'] != [] :
            datestr = str(oeb.metadata['date'][0])
        elif oeb.metadata['timestamp'] != [] :
            datestr = str(oeb.metadata['timestamp'][0])
        if datestr is not None:
            exth.write(pack('>II', EXTH_CODES['pubdate'], len(datestr) + 8))
            exth.write(datestr)
            nrecs += 1
        else:
            raise NotImplementedError("missing date or timestamp needed for mobi_periodical")
        if (oeb.metadata.cover and
                unicode(oeb.metadata.cover[0]) in oeb.manifest.ids):
            id = unicode(oeb.metadata.cover[0])
            item = oeb.manifest.ids[id]
            href = item.href
            if href in self.images:
                index = self.images[href] - 1
                exth.write(pack('>III', 0xc9, 0x0c, index))
                exth.write(pack('>III', 0xcb, 0x0c, 0))
                nrecs += 2
                index = self.add_thumbnail(item)
                if index is not None:
                    exth.write(pack('>III', 0xca, 0x0c, index - 1))
                    nrecs += 1
        exth = exth.getvalue()
        trail = len(exth) % 4
        pad = b'\0' * (4 - trail) # Always pad w/ at least 1 byte
        exth = [b'EXTH', pack('>II', len(exth) + 12, nrecs), exth, pad]
        return b''.join(exth)
    # }}}
    def add_thumbnail(self, item):
        try:
            data = rescale_image(item.data, dimen=MAX_THUMB_DIMEN,
                    maxsizeb=MAX_THUMB_SIZE)
        except IOError:
            self.oeb.logger.warn('Bad image file %r' % item.href)
            return None
        manifest = self.oeb.manifest
        id, href = manifest.generate('thumbnail', 'thumbnail.jpeg')
        manifest.add(id, href, 'image/jpeg', data=data)
        index = len(self.images) + 1
        self.images[href] = index
        self.records.append(data)
        return index
    def write_header(self):
        title = ascii_filename(unicode(self.oeb.metadata.title[0]))
        title = title + (b'\0' * (32 - len(title)))
        now = int(time.time())
        nrecords = len(self.records)
        self.write(title, pack('>HHIIIIII', 0, 0, now, now, 0, 0, 0, 0),
            b'BOOK', b'MOBI', pack('>IIH', nrecords, 0, nrecords))
        offset = self.tell() + (8 * nrecords) + 2
        for i, record in enumerate(self.records):
            self.write(pack('>I', offset), b'\0', pack('>I', 2*i)[1:])
            offset += len(record)
        self.write(b'\0\0')
    def write_content(self):
        for record in self.records:
            self.write(record)
--- a/src/calibre/ebooks/mobi/writer2/serializer.py
+++ b/src/calibre/ebooks/mobi/writer2/serializer.py
@ -0,0 +1,246 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from calibre.ebooks.oeb.base import (OEB_DOCS, XHTML, XHTML_NS, XML_NS,
        namespace, prefixname, urlnormalize)
 from calibre.ebooks.mobi.mobiml import MBP_NS
 from collections import defaultdict
 from urlparse import urldefrag
 from cStringIO import StringIO
 class Serializer(object):
    NSRMAP = {'': None, XML_NS: 'xml', XHTML_NS: '', MBP_NS: 'mbp'}
    def __init__(self, oeb, images, write_page_breaks_after_item=True):
        '''
        Write all the HTML markup in oeb into a single in memory buffer
        containing a single html document with links replaced by offsets into
        the buffer.
        :param oeb: OEBBook object that encapsulates the document to be
        processed.
        :param images: Mapping of image hrefs (urlnormalized) to image record
        indices.
        :param write_page_breaks_after_item: If True a MOBIpocket pagebreak tag
        is written after every element of the spine in ``oeb``.
        '''
        self.oeb = oeb
        self.images = images
        self.logger = oeb.logger
        self.write_page_breaks_after_item = write_page_breaks_after_item
        # Mapping of hrefs (urlnormalized) to the offset in the buffer where
        # the resource pointed to by the href lives. Used at the end to fill in
        # the correct values into all filepos="..." links.
        self.id_offsets = {}
        # Mapping of hrefs (urlnormalized) to a list of offsets into the buffer
        # where filepos="..." elements are written corresponding to links that
        # point to the href. This is used at the end to fill in the correct values.
        self.href_offsets = defaultdict(list)
        # List of offsets in the buffer of non linear items in the spine. These
        # become uncrossable breaks in the MOBI
        self.breaks = []
    def __call__(self):
        '''
        Return the document serialized as a single UTF-8 encoded bytestring.
        '''
        buf = self.buf = StringIO()
        buf.write(b'<html>')
        self.serialize_head()
        self.serialize_body()
        buf.write(b'</html>')
        self.fixup_links()
        return buf.getvalue()
    def serialize_head(self):
        buf = self.buf
        buf.write(b'<head>')
        if len(self.oeb.guide) > 0:
            self.serialize_guide()
        buf.write(b'</head>')
    def serialize_guide(self):
        '''
        The Kindle decides where to open a book based on the presence of
        an item in the guide that looks like
        <reference type="text" title="Start" href="chapter-one.xhtml"/>
        Similarly an item with type="toc" controls where the Goto Table of
        Contents operation on the kindle goes.
        '''
        buf = self.buf
        hrefs = self.oeb.manifest.hrefs
        buf.write(b'<guide>')
        for ref in self.oeb.guide.values():
            path = urldefrag(ref.href)[0]
            if path not in hrefs or hrefs[path].media_type not in OEB_DOCS:
                continue
            buf.write(b'<reference type="')
            if ref.type.startswith('other.') :
                self.serialize_text(ref.type.replace('other.',''), quot=True)
            else:
                self.serialize_text(ref.type, quot=True)
            buf.write(b'" ')
            if ref.title is not None:
                buf.write(b'title="')
                self.serialize_text(ref.title, quot=True)
                buf.write(b'" ')
            self.serialize_href(ref.href)
            # Space required or won't work, I kid you not
            buf.write(b' />')
        buf.write(b'</guide>')
    def serialize_href(self, href, base=None):
        '''
        Serialize the href attribute of an <a> or <reference> tag. It is
        serialized as filepos="000000000" and a pointer to its location is
        stored in self.href_offsets so that the correct value can be filled in
        at the end.
        '''
        hrefs = self.oeb.manifest.hrefs
        path, frag = urldefrag(urlnormalize(href))
        if path and base:
            path = base.abshref(path)
        if path and path not in hrefs:
            return False
        buf = self.buf
        item = hrefs[path] if path else None
        if item and item.spine_position is None:
            return False
        path = item.href if item else base.href
        href = '#'.join((path, frag)) if frag else path
        buf.write(b'filepos=')
        self.href_offsets[href].append(buf.tell())
        buf.write(b'0000000000')
        return True
    def serialize_body(self):
        '''
        Serialize all items in the spine of the document. Non linear items are
        moved to the end.
        '''
        buf = self.buf
        self.anchor_offset = buf.tell()
        buf.write(b'<body>')
        self.anchor_offset_kindle = buf.tell()
        spine = [item for item in self.oeb.spine if item.linear]
        spine.extend([item for item in self.oeb.spine if not item.linear])
        for item in spine:
            self.serialize_item(item)
        buf.write(b'</body>')
    def serialize_item(self, item):
        '''
        Serialize an individual item from the spine of the input document.
        A reference to this item is stored in self.href_offsets
        '''
        buf = self.buf
        if not item.linear:
            self.breaks.append(buf.tell() - 1)
        self.id_offsets[urlnormalize(item.href)] = buf.tell()
        # Kindle periodical articles are contained in a <div> tag
        buf.write(b'<div>')
        for elem in item.data.find(XHTML('body')):
            self.serialize_elem(elem, item)
        # Kindle periodical article end marker
        buf.write(b'<div></div>')
        if self.write_page_breaks_after_item:
            buf.write(b'<mbp:pagebreak/>')
        buf.write(b'</div>')
        self.anchor_offset = None
    def serialize_elem(self, elem, item, nsrmap=NSRMAP):
        buf = self.buf
        if not isinstance(elem.tag, basestring) \
            or namespace(elem.tag) not in nsrmap:
                return
        tag = prefixname(elem.tag, nsrmap)
        # Previous layers take care of @name
        id_ = elem.attrib.pop('id', None)
        if id_:
            href = '#'.join((item.href, id_))
            offset = self.anchor_offset or buf.tell()
            self.id_offsets[urlnormalize(href)] = offset
        if self.anchor_offset is not None and \
            tag == 'a' and not elem.attrib and \
            not len(elem) and not elem.text:
                return
        self.anchor_offset = buf.tell()
        buf.write(b'<')
        buf.write(tag.encode('utf-8'))
        if elem.attrib:
            for attr, val in elem.attrib.items():
                if namespace(attr) not in nsrmap:
                    continue
                attr = prefixname(attr, nsrmap)
                buf.write(b' ')
                if attr == 'href':
                    if self.serialize_href(val, item):
                        continue
                elif attr == 'src':
                    href = urlnormalize(item.abshref(val))
                    if href in self.images:
                        index = self.images[href]
                        buf.write(b'recindex="%05d"' % index)
                        continue
                buf.write(attr.encode('utf-8'))
                buf.write(b'="')
                self.serialize_text(val, quot=True)
                buf.write(b'"')
        buf.write(b'>')
        if elem.text or len(elem) > 0:
            if elem.text:
                self.anchor_offset = None
                self.serialize_text(elem.text)
            for child in elem:
                self.serialize_elem(child, item)
                if child.tail:
                    self.anchor_offset = None
                    self.serialize_text(child.tail)
        buf.write(b'</%s>' % tag.encode('utf-8'))
    def serialize_text(self, text, quot=False):
        text = text.replace('&', '&amp;')
        text = text.replace('<', '&lt;')
        text = text.replace('>', '&gt;')
        text = text.replace(u'\u00AD', '') # Soft-hyphen
        if quot:
            text = text.replace('"', '&quot;')
        self.buf.write(text.encode('utf-8'))
    def fixup_links(self):
        '''
        Fill in the correct values for all filepos="..." links with the offsets
        of the linked to content (as stored in id_offsets).
        '''
        buf = self.buf
        id_offsets = self.id_offsets
        for href, hoffs in self.href_offsets.items():
            # Iterate over all filepos items
            if href not in id_offsets:
                self.logger.warn('Hyperlink target %r not found' % href)
                # Link to the top of the document, better than just ignoring
                href, _ = urldefrag(href)
            if href in self.id_offsets:
                ioff = self.id_offsets[href]
                for hoff in hoffs:
                    buf.seek(hoff)
                    buf.write(b'%010d' % ioff)