mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
KF8 Output: Can now generate standalone KF8 files. There are still bugs that need to be fixed, the produced files are not usable.
This commit is contained in:
parent
fe1e290820
commit
57b01c645d
@ -164,7 +164,8 @@ class MOBIOutput(OutputFormatPlugin):
|
||||
from calibre.ebooks.mobi.writer2.resources import Resources
|
||||
self.log, self.opts, self.oeb = log, opts, oeb
|
||||
|
||||
create_kf8 = tweaks.get('create_kf8', False)
|
||||
mobi_type = tweaks.get('test_mobi_output_type', 'old')
|
||||
create_kf8 = mobi_type in ('new', 'both')
|
||||
|
||||
self.remove_html_cover()
|
||||
resources = Resources(oeb, opts, self.is_periodical,
|
||||
@ -172,13 +173,17 @@ class MOBIOutput(OutputFormatPlugin):
|
||||
self.check_for_periodical()
|
||||
|
||||
kf8 = self.create_kf8(resources) if create_kf8 else None
|
||||
if mobi_type == 'new':
|
||||
kf8.write(output_path)
|
||||
self.extract_mobi(output_path, opts)
|
||||
return
|
||||
|
||||
self.log('Creating MOBI 6 output')
|
||||
self.write_mobi(input_plugin, output_path, kf8, resources)
|
||||
|
||||
def create_kf8(self, resources):
|
||||
from calibre.ebooks.mobi.writer8.main import KF8Writer
|
||||
return KF8Writer(self.oeb, self.opts, resources)
|
||||
from calibre.ebooks.mobi.writer8.main import create_kf8_book
|
||||
return create_kf8_book(self.oeb, self.opts, resources)
|
||||
|
||||
def write_mobi(self, input_plugin, output_path, kf8, resources):
|
||||
from calibre.ebooks.mobi.mobiml import MobiMLizer
|
||||
@ -209,7 +214,9 @@ class MOBIOutput(OutputFormatPlugin):
|
||||
writer = MobiWriter(opts, resources, kf8,
|
||||
write_page_breaks_after_item=write_page_breaks_after_item)
|
||||
writer(oeb, output_path)
|
||||
self.extract_mobi(output_path, opts)
|
||||
|
||||
def extract_mobi(self, output_path, opts):
|
||||
if opts.extract_to is not None:
|
||||
from calibre.ebooks.mobi.debug.main import inspect_mobi
|
||||
ddir = opts.extract_to
|
||||
|
@ -7,7 +7,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re, random, time
|
||||
import random, time
|
||||
from cStringIO import StringIO
|
||||
from struct import pack
|
||||
|
||||
@ -21,32 +21,10 @@ from calibre.ebooks.mobi.utils import (encint, encode_trailing_data,
|
||||
align_block, detect_periodical, RECORD_SIZE, create_text_record)
|
||||
from calibre.ebooks.mobi.writer2.indexer import Indexer
|
||||
|
||||
EXTH_CODES = {
|
||||
'creator': 100,
|
||||
'publisher': 101,
|
||||
'description': 103,
|
||||
'identifier': 104,
|
||||
'subject': 105,
|
||||
'pubdate': 106,
|
||||
'review': 107,
|
||||
'contributor': 108,
|
||||
'rights': 109,
|
||||
'type': 111,
|
||||
'source': 112,
|
||||
'versionnumber': 114,
|
||||
'startreading': 116,
|
||||
'coveroffset': 201,
|
||||
'thumboffset': 202,
|
||||
'hasfakecover': 203,
|
||||
'lastupdatetime': 502,
|
||||
'title': 503,
|
||||
}
|
||||
|
||||
# Disabled as I dont care about uncrossable breaks
|
||||
WRITE_UNCROSSABLE_BREAKS = False
|
||||
|
||||
class MobiWriter(object):
|
||||
COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
|
||||
|
||||
def __init__(self, opts, resources, kf8, write_page_breaks_after_item=True):
|
||||
self.opts = opts
|
||||
@ -210,7 +188,15 @@ class MobiWriter(object):
|
||||
# header as well
|
||||
bt = 0x103 if self.indexer.is_flat_periodical else 0x101
|
||||
|
||||
exth = self.build_exth(bt)
|
||||
from calibre.ebooks.mobi.writer8.exth import build_exth
|
||||
exth = build_exth(metadata,
|
||||
prefer_author_sort=self.opts.prefer_author_sort,
|
||||
is_periodical=self.is_periodical,
|
||||
share_not_sync=self.opts.share_not_sync,
|
||||
cover_offset=self.cover_offset,
|
||||
thumbnail_offset=self.thumbnail_offset,
|
||||
start_offset=self.serializer.start_offset, mobi_doctype=bt
|
||||
)
|
||||
first_image_record = None
|
||||
if self.resources:
|
||||
used_images = self.serializer.used_images
|
||||
@ -379,127 +365,6 @@ class MobiWriter(object):
|
||||
self.records[0] = align_block(record0)
|
||||
# }}}
|
||||
|
||||
def build_exth(self, mobi_doctype): # EXTH Header {{{
|
||||
oeb = self.oeb
|
||||
exth = StringIO()
|
||||
nrecs = 0
|
||||
for term in oeb.metadata:
|
||||
if term not in EXTH_CODES: continue
|
||||
code = EXTH_CODES[term]
|
||||
items = oeb.metadata[term]
|
||||
if term == 'creator':
|
||||
if self.prefer_author_sort:
|
||||
creators = [normalize(unicode(c.file_as or c)) for c in
|
||||
items][:1]
|
||||
else:
|
||||
creators = [normalize(unicode(c)) for c in items]
|
||||
items = ['; '.join(creators)]
|
||||
for item in items:
|
||||
data = normalize(unicode(item))
|
||||
if term != 'description':
|
||||
data = self.COLLAPSE_RE.sub(' ', data)
|
||||
if term == 'identifier':
|
||||
if data.lower().startswith('urn:isbn:'):
|
||||
data = data[9:]
|
||||
elif item.scheme.lower() == 'isbn':
|
||||
pass
|
||||
else:
|
||||
continue
|
||||
data = data.encode('utf-8')
|
||||
exth.write(pack(b'>II', code, len(data) + 8))
|
||||
exth.write(data)
|
||||
nrecs += 1
|
||||
if term == 'rights' :
|
||||
try:
|
||||
rights = normalize(unicode(oeb.metadata.rights[0])).encode('utf-8')
|
||||
except:
|
||||
rights = b'Unknown'
|
||||
exth.write(pack(b'>II', EXTH_CODES['rights'], len(rights) + 8))
|
||||
exth.write(rights)
|
||||
nrecs += 1
|
||||
|
||||
# Write UUID as ASIN
|
||||
uuid = None
|
||||
from calibre.ebooks.oeb.base import OPF
|
||||
for x in oeb.metadata['identifier']:
|
||||
if (x.get(OPF('scheme'), None).lower() == 'uuid' or
|
||||
unicode(x).startswith('urn:uuid:')):
|
||||
uuid = unicode(x).split(':')[-1]
|
||||
break
|
||||
if uuid is None:
|
||||
from uuid import uuid4
|
||||
uuid = str(uuid4())
|
||||
|
||||
if isinstance(uuid, unicode):
|
||||
uuid = uuid.encode('utf-8')
|
||||
if not self.opts.share_not_sync:
|
||||
exth.write(pack(b'>II', 113, len(uuid) + 8))
|
||||
exth.write(uuid)
|
||||
nrecs += 1
|
||||
|
||||
# Write cdetype
|
||||
if not self.is_periodical:
|
||||
if not self.opts.share_not_sync:
|
||||
exth.write(pack(b'>II', 501, 12))
|
||||
exth.write(b'EBOK')
|
||||
nrecs += 1
|
||||
else:
|
||||
ids = {0x101:b'NWPR', 0x103:b'MAGZ'}.get(mobi_doctype, None)
|
||||
if ids:
|
||||
exth.write(pack(b'>II', 501, 12))
|
||||
exth.write(ids)
|
||||
nrecs += 1
|
||||
|
||||
# Add a publication date entry
|
||||
if oeb.metadata['date']:
|
||||
datestr = str(oeb.metadata['date'][0])
|
||||
elif oeb.metadata['timestamp']:
|
||||
datestr = str(oeb.metadata['timestamp'][0])
|
||||
|
||||
if datestr is None:
|
||||
raise ValueError("missing date or timestamp")
|
||||
|
||||
datestr = bytes(datestr)
|
||||
exth.write(pack(b'>II', EXTH_CODES['pubdate'], len(datestr) + 8))
|
||||
exth.write(datestr)
|
||||
nrecs += 1
|
||||
if self.is_periodical:
|
||||
exth.write(pack(b'>II', EXTH_CODES['lastupdatetime'], len(datestr) + 8))
|
||||
exth.write(datestr)
|
||||
nrecs += 1
|
||||
|
||||
if self.is_periodical:
|
||||
# Pretend to be amazon's super secret periodical generator
|
||||
vals = {204:201, 205:2, 206:0, 207:101}
|
||||
else:
|
||||
# Pretend to be kindlegen 1.2
|
||||
vals = {204:201, 205:1, 206:2, 207:33307}
|
||||
for code, val in vals.iteritems():
|
||||
exth.write(pack(b'>III', code, 12, val))
|
||||
nrecs += 1
|
||||
|
||||
if self.cover_offset is not None:
|
||||
exth.write(pack(b'>III', EXTH_CODES['coveroffset'], 12,
|
||||
self.cover_offset))
|
||||
exth.write(pack(b'>III', EXTH_CODES['hasfakecover'], 12, 0))
|
||||
nrecs += 2
|
||||
if self.thumbnail_offset is not None:
|
||||
exth.write(pack(b'>III', EXTH_CODES['thumboffset'], 12,
|
||||
self.thumbnail_offset))
|
||||
nrecs += 1
|
||||
|
||||
if self.serializer.start_offset is not None:
|
||||
exth.write(pack(b'>III', EXTH_CODES['startreading'], 12,
|
||||
self.serializer.start_offset))
|
||||
nrecs += 1
|
||||
|
||||
exth = exth.getvalue()
|
||||
trail = len(exth) % 4
|
||||
pad = b'\0' * (4 - trail) # Always pad w/ at least 1 byte
|
||||
exth = [b'EXTH', pack(b'>II', len(exth) + 12, nrecs), exth, pad]
|
||||
return b''.join(exth)
|
||||
# }}}
|
||||
|
||||
def write_header(self): # PalmDB header {{{
|
||||
'''
|
||||
Write the PalmDB header
|
||||
|
176
src/calibre/ebooks/mobi/writer8/exth.py
Normal file
176
src/calibre/ebooks/mobi/writer8/exth.py
Normal file
@ -0,0 +1,176 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re
|
||||
from struct import pack
|
||||
from io import BytesIO
|
||||
|
||||
from calibre.ebooks.mobi.utils import utf8_text
|
||||
|
||||
EXTH_CODES = {
|
||||
'creator': 100,
|
||||
'publisher': 101,
|
||||
'description': 103,
|
||||
'identifier': 104,
|
||||
'subject': 105,
|
||||
'pubdate': 106,
|
||||
'review': 107,
|
||||
'contributor': 108,
|
||||
'rights': 109,
|
||||
'type': 111,
|
||||
'source': 112,
|
||||
'versionnumber': 114,
|
||||
'startreading': 116,
|
||||
'num_of_resources': 125,
|
||||
'kf8_unknown_count': 131,
|
||||
'coveroffset': 201,
|
||||
'thumboffset': 202,
|
||||
'hasfakecover': 203,
|
||||
'lastupdatetime': 502,
|
||||
'title': 503,
|
||||
}
|
||||
|
||||
COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
|
||||
|
||||
def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
|
||||
share_not_sync=True, cover_offset=None, thumbnail_offset=None,
|
||||
start_offset=None, mobi_doctype=2, num_of_resources=None,
|
||||
kf8_unknown_count=0, be_kindlegen2=False):
|
||||
exth = BytesIO()
|
||||
nrecs = 0
|
||||
|
||||
for term in metadata:
|
||||
if term not in EXTH_CODES: continue
|
||||
code = EXTH_CODES[term]
|
||||
items = metadata[term]
|
||||
if term == 'creator':
|
||||
if prefer_author_sort:
|
||||
creators = [unicode(c.file_as or c) for c in
|
||||
items][:1]
|
||||
else:
|
||||
creators = [unicode(c) for c in items]
|
||||
items = ['; '.join(creators)]
|
||||
for item in items:
|
||||
data = unicode(item)
|
||||
if term != 'description':
|
||||
data = COLLAPSE_RE.sub(' ', data)
|
||||
if term == 'identifier':
|
||||
if data.lower().startswith('urn:isbn:'):
|
||||
data = data[9:]
|
||||
elif item.scheme.lower() == 'isbn':
|
||||
pass
|
||||
else:
|
||||
continue
|
||||
data = utf8_text(data)
|
||||
exth.write(pack(b'>II', code, len(data) + 8))
|
||||
exth.write(data)
|
||||
nrecs += 1
|
||||
if term == 'rights' :
|
||||
try:
|
||||
rights = utf8_text(unicode(metadata.rights[0]))
|
||||
except:
|
||||
rights = b'Unknown'
|
||||
exth.write(pack(b'>II', EXTH_CODES['rights'], len(rights) + 8))
|
||||
exth.write(rights)
|
||||
nrecs += 1
|
||||
|
||||
# Write UUID as ASIN
|
||||
uuid = None
|
||||
from calibre.ebooks.oeb.base import OPF
|
||||
for x in metadata['identifier']:
|
||||
if (x.get(OPF('scheme'), None).lower() == 'uuid' or
|
||||
unicode(x).startswith('urn:uuid:')):
|
||||
uuid = unicode(x).split(':')[-1]
|
||||
break
|
||||
if uuid is None:
|
||||
from uuid import uuid4
|
||||
uuid = str(uuid4())
|
||||
|
||||
if isinstance(uuid, unicode):
|
||||
uuid = uuid.encode('utf-8')
|
||||
if share_not_sync:
|
||||
exth.write(pack(b'>II', 113, len(uuid) + 8))
|
||||
exth.write(uuid)
|
||||
nrecs += 1
|
||||
|
||||
# Write cdetype
|
||||
if not is_periodical:
|
||||
if not share_not_sync:
|
||||
exth.write(pack(b'>II', 501, 12))
|
||||
exth.write(b'EBOK')
|
||||
nrecs += 1
|
||||
else:
|
||||
ids = {0x101:b'NWPR', 0x103:b'MAGZ'}.get(mobi_doctype, None)
|
||||
if ids:
|
||||
exth.write(pack(b'>II', 501, 12))
|
||||
exth.write(ids)
|
||||
nrecs += 1
|
||||
|
||||
# Add a publication date entry
|
||||
if metadata['date']:
|
||||
datestr = str(metadata['date'][0])
|
||||
elif metadata['timestamp']:
|
||||
datestr = str(metadata['timestamp'][0])
|
||||
|
||||
if datestr is None:
|
||||
raise ValueError("missing date or timestamp")
|
||||
|
||||
datestr = bytes(datestr)
|
||||
exth.write(pack(b'>II', EXTH_CODES['pubdate'], len(datestr) + 8))
|
||||
exth.write(datestr)
|
||||
nrecs += 1
|
||||
if is_periodical:
|
||||
exth.write(pack(b'>II', EXTH_CODES['lastupdatetime'], len(datestr) + 8))
|
||||
exth.write(datestr)
|
||||
nrecs += 1
|
||||
|
||||
if be_kindlegen2:
|
||||
vals = {204:201, 205:2, 206:2, 207:35621}
|
||||
elif is_periodical:
|
||||
# Pretend to be amazon's super secret periodical generator
|
||||
vals = {204:201, 205:2, 206:0, 207:101}
|
||||
else:
|
||||
# Pretend to be kindlegen 1.2
|
||||
vals = {204:201, 205:1, 206:2, 207:33307}
|
||||
for code, val in vals.iteritems():
|
||||
exth.write(pack(b'>III', code, 12, val))
|
||||
nrecs += 1
|
||||
|
||||
if cover_offset is not None:
|
||||
exth.write(pack(b'>III', EXTH_CODES['coveroffset'], 12,
|
||||
cover_offset))
|
||||
exth.write(pack(b'>III', EXTH_CODES['hasfakecover'], 12, 0))
|
||||
nrecs += 2
|
||||
if thumbnail_offset is not None:
|
||||
exth.write(pack(b'>III', EXTH_CODES['thumboffset'], 12,
|
||||
thumbnail_offset))
|
||||
nrecs += 1
|
||||
|
||||
if start_offset is not None:
|
||||
exth.write(pack(b'>III', EXTH_CODES['startreading'], 12,
|
||||
start_offset))
|
||||
nrecs += 1
|
||||
|
||||
if num_of_resources is not None:
|
||||
exth.write(pack(b'>III', EXTH_CODES['num_of_resources'], 12,
|
||||
num_of_resources))
|
||||
nrecs += 1
|
||||
|
||||
if kf8_unknown_count is not None:
|
||||
exth.write(pack(b'>III', EXTH_CODES['kf8_unknown_count'], 12,
|
||||
kf8_unknown_count))
|
||||
nrecs += 1
|
||||
|
||||
exth = exth.getvalue()
|
||||
trail = len(exth) % 4
|
||||
pad = b'\0' * (4 - trail) # Always pad w/ at least 1 byte
|
||||
exth = [b'EXTH', pack(b'>II', len(exth) + 12, nrecs), exth, pad]
|
||||
return b''.join(exth)
|
||||
|
||||
|
@ -7,6 +7,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import random
|
||||
from io import BytesIO
|
||||
from collections import OrderedDict
|
||||
from struct import pack
|
||||
@ -16,6 +17,7 @@ from calibre.ebooks.mobi.utils import align_block
|
||||
NULL = 0xffffffff
|
||||
zeroes = lambda x: b'\0'*x
|
||||
nulls = lambda x: b'\xff'*x
|
||||
short = lambda x: pack(b'>H', x)
|
||||
|
||||
class Header(OrderedDict):
|
||||
|
||||
@ -25,7 +27,9 @@ class Header(OrderedDict):
|
||||
'''
|
||||
|
||||
ALIGN_BLOCK = False
|
||||
POSITIONS = {}
|
||||
POSITIONS = {} # Mapping of position field to field whose position should
|
||||
# be stored in the position field
|
||||
SHORT_FIELDS = set()
|
||||
|
||||
def __init__(self):
|
||||
OrderedDict.__init__(self)
|
||||
@ -36,13 +40,17 @@ class Header(OrderedDict):
|
||||
name, val = [x.strip() for x in line.partition('=')[0::2]]
|
||||
if val:
|
||||
val = eval(val, {'zeroes':zeroes, 'NULL':NULL, 'DYN':None,
|
||||
'nulls':nulls})
|
||||
'nulls':nulls, 'short':short, 'random':random})
|
||||
else:
|
||||
val = 0
|
||||
if name in self:
|
||||
raise ValueError('Duplicate field in definition: %r'%name)
|
||||
self[name] = val
|
||||
|
||||
@property
|
||||
def dynamic_fields(self):
|
||||
return tuple(k for k, v in self.iteritems() if v is None)
|
||||
|
||||
def __call__(self, **kwargs):
|
||||
positions = {}
|
||||
for name, val in kwargs.iteritems():
|
||||
@ -58,7 +66,8 @@ class Header(OrderedDict):
|
||||
if val is None:
|
||||
raise ValueError('Dynamic field %r not set'%name)
|
||||
if isinstance(val, (int, long)):
|
||||
val = pack(b'>I', val)
|
||||
fmt = 'H' if name in self.SHORT_FIELDS else 'I'
|
||||
val = pack(b'>'+fmt, val)
|
||||
buf.write(val)
|
||||
|
||||
for pos_field, field in self.POSITIONS.iteritems():
|
||||
|
@ -182,7 +182,7 @@ class Index(object): # {{{
|
||||
if len(body) + self.HEADER_LENGTH >= 0x10000:
|
||||
raise too_large
|
||||
header = b'INDX'
|
||||
buf.truncate(0)
|
||||
buf.seek(0), buf.truncate(0)
|
||||
buf.write(pack(b'>I', self.HEADER_LENGTH))
|
||||
buf.write(b'\0'*4) # Unknown
|
||||
buf.write(pack(b'>I', 1)) # Header type? Or index record number?
|
||||
|
@ -26,6 +26,7 @@ from calibre.ebooks.oeb.parse_utils import barename
|
||||
from calibre.ebooks.mobi.writer8.skeleton import Chunker, aid_able_tags, to_href
|
||||
from calibre.ebooks.mobi.writer8.index import (NCXIndex, SkelIndex,
|
||||
ChunkIndex, GuideIndex)
|
||||
from calibre.ebooks.mobi.writer8.mobi import KF8Book
|
||||
|
||||
XML_DOCS = OEB_DOCS | {SVG_MIME}
|
||||
|
||||
@ -42,7 +43,7 @@ class KF8Writer(object):
|
||||
self.used_images = set()
|
||||
self.resources = resources
|
||||
self.flows = [None] # First flow item is reserved for the text
|
||||
self.records = []
|
||||
self.records = [None] # Placeholder for zeroth record
|
||||
|
||||
self.log('\tGenerating KF8 markup...')
|
||||
self.dup_data()
|
||||
@ -266,9 +267,10 @@ class KF8Writer(object):
|
||||
start = 0 if i == 0 else self.fdst_table[-1].end
|
||||
self.fdst_table.append(FDST(start, start + len(flow)))
|
||||
entries.extend(self.fdst_table[-1])
|
||||
rec = (b'FDST' + pack(b'>LL', len(self.fdst_table), 12) +
|
||||
rec = (b'FDST' + pack(b'>LL', 12, len(self.fdst_table)) +
|
||||
pack(b'>%dL'%len(entries), *entries))
|
||||
self.fdst_records = [rec]
|
||||
self.fdst_count = len(self.fdst_table)
|
||||
|
||||
def create_indices(self):
|
||||
self.skel_records = SkelIndex(self.skel_table)()
|
||||
@ -347,3 +349,7 @@ class KF8Writer(object):
|
||||
if self.guide_table:
|
||||
self.guide_records = GuideIndex(self.guide_table)()
|
||||
|
||||
def create_kf8_book(oeb, opts, resources):
|
||||
writer = KF8Writer(oeb, opts, resources)
|
||||
return KF8Book(writer)
|
||||
|
||||
|
302
src/calibre/ebooks/mobi/writer8/mobi.py
Normal file
302
src/calibre/ebooks/mobi/writer8/mobi.py
Normal file
@ -0,0 +1,302 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import time
|
||||
from struct import pack
|
||||
|
||||
from calibre.ebooks.mobi.utils import RECORD_SIZE, utf8_text
|
||||
from calibre.ebooks.mobi.writer8.header import Header
|
||||
from calibre.ebooks.mobi.writer2 import (PALMDOC, UNCOMPRESSED)
|
||||
from calibre.ebooks.mobi.langcodes import iana2mobi
|
||||
from calibre.ebooks.mobi.writer8.exth import build_exth
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
|
||||
NULL_INDEX = 0xffffffff
|
||||
|
||||
class MOBIHeader(Header): # {{{
|
||||
'''
|
||||
Represents the first record in a MOBI file, contains all the metadata about
|
||||
the file.
|
||||
'''
|
||||
|
||||
FILE_VERSION = 8
|
||||
|
||||
DEFINITION = '''
|
||||
# 0: Compression
|
||||
compression = DYN
|
||||
|
||||
# 2: Unused
|
||||
unused1 = zeroes(2)
|
||||
|
||||
# 4: Text length
|
||||
text_length = DYN
|
||||
|
||||
# 8: Last text record
|
||||
last_text_record = DYN
|
||||
|
||||
# 10: Text record size
|
||||
record_size = {record_size}
|
||||
|
||||
# 12: Unused
|
||||
unused2
|
||||
|
||||
# 16: Ident
|
||||
ident = b'MOBI'
|
||||
|
||||
# 20: Header length
|
||||
header_length = 248
|
||||
|
||||
# 24: Book Type (0x2 - Book, 0x101 - News hierarchical, 0x102 - News
|
||||
# (flat), 0x103 - News magazine same as 0x101)
|
||||
book_type = DYN
|
||||
|
||||
# 28: Text encoding (utf-8 = 65001)
|
||||
encoding = 65001
|
||||
|
||||
# 32: UID
|
||||
uid = random.randint(0, 0xffffffff)
|
||||
|
||||
# 36: File version
|
||||
file_version = {file_version}
|
||||
|
||||
# 40: Meta orth record (Chunk table index in KF8)
|
||||
meta_orth_record = DYN
|
||||
|
||||
# 44: Meta infl index
|
||||
meta_infl_index = NULL
|
||||
|
||||
# 48: Extra indices
|
||||
extra_index0 = NULL
|
||||
extra_index1 = NULL
|
||||
extra_index2 = NULL
|
||||
extra_index3 = NULL
|
||||
extra_index4 = NULL
|
||||
extra_index5 = NULL
|
||||
extra_index6 = NULL
|
||||
extra_index7 = NULL
|
||||
|
||||
# 80: First non text record
|
||||
first_non_text_record = DYN
|
||||
|
||||
# 84: Title offset
|
||||
title_offset
|
||||
|
||||
# 88: Title Length
|
||||
title_length = DYN
|
||||
|
||||
# 92: Language code
|
||||
language_code = DYN
|
||||
|
||||
# 96: Dictionary in and out languages
|
||||
in_lang
|
||||
out_lang
|
||||
|
||||
# 104: Min version
|
||||
min_version = {file_version}
|
||||
|
||||
# 108: First resource record
|
||||
first_resource_record = DYN
|
||||
|
||||
# 112: Huff/CDIC compression
|
||||
huff_first_record
|
||||
huff_count
|
||||
|
||||
# 120: DATP records
|
||||
datp_first_record
|
||||
datp_count
|
||||
|
||||
# 128: EXTH flags
|
||||
exth_flags = DYN
|
||||
|
||||
# 132: Unknown
|
||||
unknown = zeroes(32)
|
||||
|
||||
# 164: DRM
|
||||
drm_offset = NULL
|
||||
drm_count = NULL
|
||||
drm_size
|
||||
drm_flags
|
||||
|
||||
# 180: Unknown
|
||||
unknown2 = zeroes(12)
|
||||
|
||||
# 192: FDST
|
||||
fdst_record = DYN
|
||||
fdst_count = DYN
|
||||
|
||||
# 200: FCI
|
||||
fcis_record = NULL
|
||||
fcis_count
|
||||
|
||||
# 208: FLIS
|
||||
flis_record = NULL
|
||||
flis_count
|
||||
|
||||
# 216: Unknown
|
||||
unknown3 = zeroes(8)
|
||||
|
||||
# 224: SRCS
|
||||
srcs_record = NULL
|
||||
srcs_count
|
||||
|
||||
# 232: Unknown
|
||||
unknown4 = nulls(8)
|
||||
|
||||
# 240: Extra data flags
|
||||
# 0b1 - extra multibyte bytes after text records
|
||||
# 0b10 - TBS indexing data (only used in MOBI 6)
|
||||
# 0b100 - uncrossable breaks only used in MOBI 6
|
||||
extra_data_flags = 1
|
||||
|
||||
# 244: KF8 Indices
|
||||
ncx_index = DYN
|
||||
chunk_index = DYN
|
||||
skel_index = DYN
|
||||
datp_index = NULL
|
||||
guide_index = DYN
|
||||
|
||||
# 264: EXTH
|
||||
exth = DYN
|
||||
|
||||
# Full title
|
||||
full_title = DYN
|
||||
|
||||
# Padding to allow amazon's DTP service to add data
|
||||
padding = zeroes(8192)
|
||||
'''.format(record_size=RECORD_SIZE, file_version=FILE_VERSION)
|
||||
|
||||
SHORT_FIELDS = {'compression', 'last_text_record', 'record_size'}
|
||||
ALIGN = True
|
||||
POSITIONS = {'title_offset':'full_title'}
|
||||
|
||||
def format_value(self, name, val):
|
||||
if name == 'compression':
|
||||
val = PALMDOC if val else UNCOMPRESSED
|
||||
return super(MOBIHeader, self).format_value(name, val)
|
||||
|
||||
# }}}
|
||||
|
||||
# Fields that need to be set in the MOBI Header are
|
||||
|
||||
class KF8Book(object):
|
||||
|
||||
def __init__(self, writer):
|
||||
self.build_records(writer)
|
||||
|
||||
def build_records(self, writer):
|
||||
metadata = writer.oeb.metadata
|
||||
# The text records
|
||||
for x in ('last_text_record_idx', 'first_non_text_record_idx'):
|
||||
setattr(self, x.rpartition('_')[0], getattr(writer, x))
|
||||
self.records = writer.records
|
||||
self.text_length = writer.text_length
|
||||
|
||||
# KF8 Indices
|
||||
self.chunk_index = self.meta_orth_record = len(self.records)
|
||||
self.records.extend(writer.chunk_records)
|
||||
self.skel_index = len(self.records)
|
||||
self.records.extend(writer.skel_records)
|
||||
self.guide_index = NULL_INDEX
|
||||
if writer.guide_records:
|
||||
self.guide_index = len(self.records)
|
||||
self.records.extend(writer.guide_records)
|
||||
self.ncx_index = NULL_INDEX
|
||||
if writer.ncx_records:
|
||||
self.ncx_index = len(self.records)
|
||||
self.records.extend(writer.ncx_records)
|
||||
|
||||
# Resources
|
||||
resources = writer.resources
|
||||
for x in ('cover_offset', 'thumbnail_offset', 'masthead_offset'):
|
||||
setattr(self, x, getattr(resources, x))
|
||||
|
||||
self.first_resource_record = NULL_INDEX
|
||||
if resources.records:
|
||||
self.first_resource_record = len(self.records)
|
||||
self.records.extend(resources.records)
|
||||
|
||||
self.first_resource_record = len(self.records)
|
||||
self.num_of_resources = len(resources.records)
|
||||
|
||||
# FDST
|
||||
self.fdst_count = writer.fdst_count
|
||||
self.fdst_record = len(self.records)
|
||||
self.records.extend(writer.fdst_records)
|
||||
|
||||
# EOF
|
||||
self.records.append(b'\xe9\x8e\r\n') # EOF record
|
||||
|
||||
|
||||
# Miscellaneous header fields
|
||||
self.compression = writer.compress
|
||||
self.book_type = 0x101 if writer.opts.mobi_periodical else 2
|
||||
self.full_title = utf8_text(unicode(metadata.title[0]))
|
||||
self.title_length = len(self.full_title)
|
||||
|
||||
self.language_code = iana2mobi(str(metadata.language[0]))
|
||||
self.exth_flags = 0b1010000
|
||||
if writer.opts.mobi_periodical:
|
||||
self.exth_flags |= 0b1000
|
||||
|
||||
self.opts = writer.opts
|
||||
self.start_offset = writer.start_offset
|
||||
self.metadata = metadata
|
||||
|
||||
@property
|
||||
def record0(self):
|
||||
''' We generate the EXTH header and record0 dynamically, to allow other
|
||||
code to customize various values after build_record() has been
|
||||
called'''
|
||||
opts = self.opts
|
||||
kuc = 0 if self.num_of_resources > 0 else None
|
||||
self.exth = build_exth(self.metadata,
|
||||
prefer_author_sort=opts.prefer_author_sort,
|
||||
is_periodical=opts.mobi_periodical,
|
||||
share_not_sync=opts.share_not_sync,
|
||||
cover_offset=self.cover_offset,
|
||||
thumbnail_offset=self.thumbnail_offset,
|
||||
num_of_resources=self.num_of_resources,
|
||||
kf8_unknown_count=kuc, be_kindlegen2=True,
|
||||
start_offset=self.start_offset, mobi_doctype=self.book_type)
|
||||
|
||||
kwargs = {field:getattr(self, field) for field in
|
||||
('compression', 'text_length', 'last_text_record',
|
||||
'book_type', 'meta_orth_record', 'first_non_text_record',
|
||||
'title_length', 'language_code', 'first_resource_record',
|
||||
'exth_flags', 'fdst_record', 'fdst_count', 'ncx_index',
|
||||
'chunk_index', 'skel_index', 'guide_index', 'exth',
|
||||
'full_title')}
|
||||
return MOBIHeader()(**kwargs)
|
||||
|
||||
def write(self, outpath):
|
||||
records = [self.record0] + self.records[1:]
|
||||
|
||||
with open(outpath, 'wb') as f:
|
||||
|
||||
# Write PalmDB Header
|
||||
|
||||
title = ascii_filename(self.full_title.decode('utf-8')).replace(
|
||||
' ', '_')[:31]
|
||||
title += (b'\0' * (32 - len(title)))
|
||||
now = int(time.time())
|
||||
nrecords = len(records)
|
||||
f.write(title)
|
||||
f.write(pack(b'>HHIIIIII', 0, 0, now, now, 0, 0, 0, 0))
|
||||
f.write(b'BOOKMOBI')
|
||||
f.write(pack(b'>IIH', (2*nrecords)-1, 0, nrecords))
|
||||
offset = f.tell() + (8 * nrecords) + 2
|
||||
for i, record in enumerate(records):
|
||||
f.write(pack(b'>I', offset))
|
||||
f.write(b'\0' + pack(b'>I', 2*i)[1:])
|
||||
offset += len(record)
|
||||
f.write(b'\0\0')
|
||||
|
||||
for rec in records:
|
||||
f.write(rec)
|
||||
|
Loading…
x
Reference in New Issue
Block a user