mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
KF8 Output: Can now generate standalone KF8 files. There are still bugs that need to be fixed, the produced files are not usable.
This commit is contained in:
parent
fe1e290820
commit
57b01c645d
@ -164,7 +164,8 @@ class MOBIOutput(OutputFormatPlugin):
|
|||||||
from calibre.ebooks.mobi.writer2.resources import Resources
|
from calibre.ebooks.mobi.writer2.resources import Resources
|
||||||
self.log, self.opts, self.oeb = log, opts, oeb
|
self.log, self.opts, self.oeb = log, opts, oeb
|
||||||
|
|
||||||
create_kf8 = tweaks.get('create_kf8', False)
|
mobi_type = tweaks.get('test_mobi_output_type', 'old')
|
||||||
|
create_kf8 = mobi_type in ('new', 'both')
|
||||||
|
|
||||||
self.remove_html_cover()
|
self.remove_html_cover()
|
||||||
resources = Resources(oeb, opts, self.is_periodical,
|
resources = Resources(oeb, opts, self.is_periodical,
|
||||||
@ -172,13 +173,17 @@ class MOBIOutput(OutputFormatPlugin):
|
|||||||
self.check_for_periodical()
|
self.check_for_periodical()
|
||||||
|
|
||||||
kf8 = self.create_kf8(resources) if create_kf8 else None
|
kf8 = self.create_kf8(resources) if create_kf8 else None
|
||||||
|
if mobi_type == 'new':
|
||||||
|
kf8.write(output_path)
|
||||||
|
self.extract_mobi(output_path, opts)
|
||||||
|
return
|
||||||
|
|
||||||
self.log('Creating MOBI 6 output')
|
self.log('Creating MOBI 6 output')
|
||||||
self.write_mobi(input_plugin, output_path, kf8, resources)
|
self.write_mobi(input_plugin, output_path, kf8, resources)
|
||||||
|
|
||||||
def create_kf8(self, resources):
|
def create_kf8(self, resources):
|
||||||
from calibre.ebooks.mobi.writer8.main import KF8Writer
|
from calibre.ebooks.mobi.writer8.main import create_kf8_book
|
||||||
return KF8Writer(self.oeb, self.opts, resources)
|
return create_kf8_book(self.oeb, self.opts, resources)
|
||||||
|
|
||||||
def write_mobi(self, input_plugin, output_path, kf8, resources):
|
def write_mobi(self, input_plugin, output_path, kf8, resources):
|
||||||
from calibre.ebooks.mobi.mobiml import MobiMLizer
|
from calibre.ebooks.mobi.mobiml import MobiMLizer
|
||||||
@ -209,7 +214,9 @@ class MOBIOutput(OutputFormatPlugin):
|
|||||||
writer = MobiWriter(opts, resources, kf8,
|
writer = MobiWriter(opts, resources, kf8,
|
||||||
write_page_breaks_after_item=write_page_breaks_after_item)
|
write_page_breaks_after_item=write_page_breaks_after_item)
|
||||||
writer(oeb, output_path)
|
writer(oeb, output_path)
|
||||||
|
self.extract_mobi(output_path, opts)
|
||||||
|
|
||||||
|
def extract_mobi(self, output_path, opts):
|
||||||
if opts.extract_to is not None:
|
if opts.extract_to is not None:
|
||||||
from calibre.ebooks.mobi.debug.main import inspect_mobi
|
from calibre.ebooks.mobi.debug.main import inspect_mobi
|
||||||
ddir = opts.extract_to
|
ddir = opts.extract_to
|
||||||
|
@ -7,7 +7,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import re, random, time
|
import random, time
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
from struct import pack
|
from struct import pack
|
||||||
|
|
||||||
@ -21,32 +21,10 @@ from calibre.ebooks.mobi.utils import (encint, encode_trailing_data,
|
|||||||
align_block, detect_periodical, RECORD_SIZE, create_text_record)
|
align_block, detect_periodical, RECORD_SIZE, create_text_record)
|
||||||
from calibre.ebooks.mobi.writer2.indexer import Indexer
|
from calibre.ebooks.mobi.writer2.indexer import Indexer
|
||||||
|
|
||||||
EXTH_CODES = {
|
|
||||||
'creator': 100,
|
|
||||||
'publisher': 101,
|
|
||||||
'description': 103,
|
|
||||||
'identifier': 104,
|
|
||||||
'subject': 105,
|
|
||||||
'pubdate': 106,
|
|
||||||
'review': 107,
|
|
||||||
'contributor': 108,
|
|
||||||
'rights': 109,
|
|
||||||
'type': 111,
|
|
||||||
'source': 112,
|
|
||||||
'versionnumber': 114,
|
|
||||||
'startreading': 116,
|
|
||||||
'coveroffset': 201,
|
|
||||||
'thumboffset': 202,
|
|
||||||
'hasfakecover': 203,
|
|
||||||
'lastupdatetime': 502,
|
|
||||||
'title': 503,
|
|
||||||
}
|
|
||||||
|
|
||||||
# Disabled as I dont care about uncrossable breaks
|
# Disabled as I dont care about uncrossable breaks
|
||||||
WRITE_UNCROSSABLE_BREAKS = False
|
WRITE_UNCROSSABLE_BREAKS = False
|
||||||
|
|
||||||
class MobiWriter(object):
|
class MobiWriter(object):
|
||||||
COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
|
|
||||||
|
|
||||||
def __init__(self, opts, resources, kf8, write_page_breaks_after_item=True):
|
def __init__(self, opts, resources, kf8, write_page_breaks_after_item=True):
|
||||||
self.opts = opts
|
self.opts = opts
|
||||||
@ -210,7 +188,15 @@ class MobiWriter(object):
|
|||||||
# header as well
|
# header as well
|
||||||
bt = 0x103 if self.indexer.is_flat_periodical else 0x101
|
bt = 0x103 if self.indexer.is_flat_periodical else 0x101
|
||||||
|
|
||||||
exth = self.build_exth(bt)
|
from calibre.ebooks.mobi.writer8.exth import build_exth
|
||||||
|
exth = build_exth(metadata,
|
||||||
|
prefer_author_sort=self.opts.prefer_author_sort,
|
||||||
|
is_periodical=self.is_periodical,
|
||||||
|
share_not_sync=self.opts.share_not_sync,
|
||||||
|
cover_offset=self.cover_offset,
|
||||||
|
thumbnail_offset=self.thumbnail_offset,
|
||||||
|
start_offset=self.serializer.start_offset, mobi_doctype=bt
|
||||||
|
)
|
||||||
first_image_record = None
|
first_image_record = None
|
||||||
if self.resources:
|
if self.resources:
|
||||||
used_images = self.serializer.used_images
|
used_images = self.serializer.used_images
|
||||||
@ -379,127 +365,6 @@ class MobiWriter(object):
|
|||||||
self.records[0] = align_block(record0)
|
self.records[0] = align_block(record0)
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
def build_exth(self, mobi_doctype): # EXTH Header {{{
|
|
||||||
oeb = self.oeb
|
|
||||||
exth = StringIO()
|
|
||||||
nrecs = 0
|
|
||||||
for term in oeb.metadata:
|
|
||||||
if term not in EXTH_CODES: continue
|
|
||||||
code = EXTH_CODES[term]
|
|
||||||
items = oeb.metadata[term]
|
|
||||||
if term == 'creator':
|
|
||||||
if self.prefer_author_sort:
|
|
||||||
creators = [normalize(unicode(c.file_as or c)) for c in
|
|
||||||
items][:1]
|
|
||||||
else:
|
|
||||||
creators = [normalize(unicode(c)) for c in items]
|
|
||||||
items = ['; '.join(creators)]
|
|
||||||
for item in items:
|
|
||||||
data = normalize(unicode(item))
|
|
||||||
if term != 'description':
|
|
||||||
data = self.COLLAPSE_RE.sub(' ', data)
|
|
||||||
if term == 'identifier':
|
|
||||||
if data.lower().startswith('urn:isbn:'):
|
|
||||||
data = data[9:]
|
|
||||||
elif item.scheme.lower() == 'isbn':
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
continue
|
|
||||||
data = data.encode('utf-8')
|
|
||||||
exth.write(pack(b'>II', code, len(data) + 8))
|
|
||||||
exth.write(data)
|
|
||||||
nrecs += 1
|
|
||||||
if term == 'rights' :
|
|
||||||
try:
|
|
||||||
rights = normalize(unicode(oeb.metadata.rights[0])).encode('utf-8')
|
|
||||||
except:
|
|
||||||
rights = b'Unknown'
|
|
||||||
exth.write(pack(b'>II', EXTH_CODES['rights'], len(rights) + 8))
|
|
||||||
exth.write(rights)
|
|
||||||
nrecs += 1
|
|
||||||
|
|
||||||
# Write UUID as ASIN
|
|
||||||
uuid = None
|
|
||||||
from calibre.ebooks.oeb.base import OPF
|
|
||||||
for x in oeb.metadata['identifier']:
|
|
||||||
if (x.get(OPF('scheme'), None).lower() == 'uuid' or
|
|
||||||
unicode(x).startswith('urn:uuid:')):
|
|
||||||
uuid = unicode(x).split(':')[-1]
|
|
||||||
break
|
|
||||||
if uuid is None:
|
|
||||||
from uuid import uuid4
|
|
||||||
uuid = str(uuid4())
|
|
||||||
|
|
||||||
if isinstance(uuid, unicode):
|
|
||||||
uuid = uuid.encode('utf-8')
|
|
||||||
if not self.opts.share_not_sync:
|
|
||||||
exth.write(pack(b'>II', 113, len(uuid) + 8))
|
|
||||||
exth.write(uuid)
|
|
||||||
nrecs += 1
|
|
||||||
|
|
||||||
# Write cdetype
|
|
||||||
if not self.is_periodical:
|
|
||||||
if not self.opts.share_not_sync:
|
|
||||||
exth.write(pack(b'>II', 501, 12))
|
|
||||||
exth.write(b'EBOK')
|
|
||||||
nrecs += 1
|
|
||||||
else:
|
|
||||||
ids = {0x101:b'NWPR', 0x103:b'MAGZ'}.get(mobi_doctype, None)
|
|
||||||
if ids:
|
|
||||||
exth.write(pack(b'>II', 501, 12))
|
|
||||||
exth.write(ids)
|
|
||||||
nrecs += 1
|
|
||||||
|
|
||||||
# Add a publication date entry
|
|
||||||
if oeb.metadata['date']:
|
|
||||||
datestr = str(oeb.metadata['date'][0])
|
|
||||||
elif oeb.metadata['timestamp']:
|
|
||||||
datestr = str(oeb.metadata['timestamp'][0])
|
|
||||||
|
|
||||||
if datestr is None:
|
|
||||||
raise ValueError("missing date or timestamp")
|
|
||||||
|
|
||||||
datestr = bytes(datestr)
|
|
||||||
exth.write(pack(b'>II', EXTH_CODES['pubdate'], len(datestr) + 8))
|
|
||||||
exth.write(datestr)
|
|
||||||
nrecs += 1
|
|
||||||
if self.is_periodical:
|
|
||||||
exth.write(pack(b'>II', EXTH_CODES['lastupdatetime'], len(datestr) + 8))
|
|
||||||
exth.write(datestr)
|
|
||||||
nrecs += 1
|
|
||||||
|
|
||||||
if self.is_periodical:
|
|
||||||
# Pretend to be amazon's super secret periodical generator
|
|
||||||
vals = {204:201, 205:2, 206:0, 207:101}
|
|
||||||
else:
|
|
||||||
# Pretend to be kindlegen 1.2
|
|
||||||
vals = {204:201, 205:1, 206:2, 207:33307}
|
|
||||||
for code, val in vals.iteritems():
|
|
||||||
exth.write(pack(b'>III', code, 12, val))
|
|
||||||
nrecs += 1
|
|
||||||
|
|
||||||
if self.cover_offset is not None:
|
|
||||||
exth.write(pack(b'>III', EXTH_CODES['coveroffset'], 12,
|
|
||||||
self.cover_offset))
|
|
||||||
exth.write(pack(b'>III', EXTH_CODES['hasfakecover'], 12, 0))
|
|
||||||
nrecs += 2
|
|
||||||
if self.thumbnail_offset is not None:
|
|
||||||
exth.write(pack(b'>III', EXTH_CODES['thumboffset'], 12,
|
|
||||||
self.thumbnail_offset))
|
|
||||||
nrecs += 1
|
|
||||||
|
|
||||||
if self.serializer.start_offset is not None:
|
|
||||||
exth.write(pack(b'>III', EXTH_CODES['startreading'], 12,
|
|
||||||
self.serializer.start_offset))
|
|
||||||
nrecs += 1
|
|
||||||
|
|
||||||
exth = exth.getvalue()
|
|
||||||
trail = len(exth) % 4
|
|
||||||
pad = b'\0' * (4 - trail) # Always pad w/ at least 1 byte
|
|
||||||
exth = [b'EXTH', pack(b'>II', len(exth) + 12, nrecs), exth, pad]
|
|
||||||
return b''.join(exth)
|
|
||||||
# }}}
|
|
||||||
|
|
||||||
def write_header(self): # PalmDB header {{{
|
def write_header(self): # PalmDB header {{{
|
||||||
'''
|
'''
|
||||||
Write the PalmDB header
|
Write the PalmDB header
|
||||||
|
176
src/calibre/ebooks/mobi/writer8/exth.py
Normal file
176
src/calibre/ebooks/mobi/writer8/exth.py
Normal file
@ -0,0 +1,176 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import re
|
||||||
|
from struct import pack
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
|
from calibre.ebooks.mobi.utils import utf8_text
|
||||||
|
|
||||||
|
EXTH_CODES = {
|
||||||
|
'creator': 100,
|
||||||
|
'publisher': 101,
|
||||||
|
'description': 103,
|
||||||
|
'identifier': 104,
|
||||||
|
'subject': 105,
|
||||||
|
'pubdate': 106,
|
||||||
|
'review': 107,
|
||||||
|
'contributor': 108,
|
||||||
|
'rights': 109,
|
||||||
|
'type': 111,
|
||||||
|
'source': 112,
|
||||||
|
'versionnumber': 114,
|
||||||
|
'startreading': 116,
|
||||||
|
'num_of_resources': 125,
|
||||||
|
'kf8_unknown_count': 131,
|
||||||
|
'coveroffset': 201,
|
||||||
|
'thumboffset': 202,
|
||||||
|
'hasfakecover': 203,
|
||||||
|
'lastupdatetime': 502,
|
||||||
|
'title': 503,
|
||||||
|
}
|
||||||
|
|
||||||
|
COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
|
||||||
|
|
||||||
|
def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
|
||||||
|
share_not_sync=True, cover_offset=None, thumbnail_offset=None,
|
||||||
|
start_offset=None, mobi_doctype=2, num_of_resources=None,
|
||||||
|
kf8_unknown_count=0, be_kindlegen2=False):
|
||||||
|
exth = BytesIO()
|
||||||
|
nrecs = 0
|
||||||
|
|
||||||
|
for term in metadata:
|
||||||
|
if term not in EXTH_CODES: continue
|
||||||
|
code = EXTH_CODES[term]
|
||||||
|
items = metadata[term]
|
||||||
|
if term == 'creator':
|
||||||
|
if prefer_author_sort:
|
||||||
|
creators = [unicode(c.file_as or c) for c in
|
||||||
|
items][:1]
|
||||||
|
else:
|
||||||
|
creators = [unicode(c) for c in items]
|
||||||
|
items = ['; '.join(creators)]
|
||||||
|
for item in items:
|
||||||
|
data = unicode(item)
|
||||||
|
if term != 'description':
|
||||||
|
data = COLLAPSE_RE.sub(' ', data)
|
||||||
|
if term == 'identifier':
|
||||||
|
if data.lower().startswith('urn:isbn:'):
|
||||||
|
data = data[9:]
|
||||||
|
elif item.scheme.lower() == 'isbn':
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
data = utf8_text(data)
|
||||||
|
exth.write(pack(b'>II', code, len(data) + 8))
|
||||||
|
exth.write(data)
|
||||||
|
nrecs += 1
|
||||||
|
if term == 'rights' :
|
||||||
|
try:
|
||||||
|
rights = utf8_text(unicode(metadata.rights[0]))
|
||||||
|
except:
|
||||||
|
rights = b'Unknown'
|
||||||
|
exth.write(pack(b'>II', EXTH_CODES['rights'], len(rights) + 8))
|
||||||
|
exth.write(rights)
|
||||||
|
nrecs += 1
|
||||||
|
|
||||||
|
# Write UUID as ASIN
|
||||||
|
uuid = None
|
||||||
|
from calibre.ebooks.oeb.base import OPF
|
||||||
|
for x in metadata['identifier']:
|
||||||
|
if (x.get(OPF('scheme'), None).lower() == 'uuid' or
|
||||||
|
unicode(x).startswith('urn:uuid:')):
|
||||||
|
uuid = unicode(x).split(':')[-1]
|
||||||
|
break
|
||||||
|
if uuid is None:
|
||||||
|
from uuid import uuid4
|
||||||
|
uuid = str(uuid4())
|
||||||
|
|
||||||
|
if isinstance(uuid, unicode):
|
||||||
|
uuid = uuid.encode('utf-8')
|
||||||
|
if share_not_sync:
|
||||||
|
exth.write(pack(b'>II', 113, len(uuid) + 8))
|
||||||
|
exth.write(uuid)
|
||||||
|
nrecs += 1
|
||||||
|
|
||||||
|
# Write cdetype
|
||||||
|
if not is_periodical:
|
||||||
|
if not share_not_sync:
|
||||||
|
exth.write(pack(b'>II', 501, 12))
|
||||||
|
exth.write(b'EBOK')
|
||||||
|
nrecs += 1
|
||||||
|
else:
|
||||||
|
ids = {0x101:b'NWPR', 0x103:b'MAGZ'}.get(mobi_doctype, None)
|
||||||
|
if ids:
|
||||||
|
exth.write(pack(b'>II', 501, 12))
|
||||||
|
exth.write(ids)
|
||||||
|
nrecs += 1
|
||||||
|
|
||||||
|
# Add a publication date entry
|
||||||
|
if metadata['date']:
|
||||||
|
datestr = str(metadata['date'][0])
|
||||||
|
elif metadata['timestamp']:
|
||||||
|
datestr = str(metadata['timestamp'][0])
|
||||||
|
|
||||||
|
if datestr is None:
|
||||||
|
raise ValueError("missing date or timestamp")
|
||||||
|
|
||||||
|
datestr = bytes(datestr)
|
||||||
|
exth.write(pack(b'>II', EXTH_CODES['pubdate'], len(datestr) + 8))
|
||||||
|
exth.write(datestr)
|
||||||
|
nrecs += 1
|
||||||
|
if is_periodical:
|
||||||
|
exth.write(pack(b'>II', EXTH_CODES['lastupdatetime'], len(datestr) + 8))
|
||||||
|
exth.write(datestr)
|
||||||
|
nrecs += 1
|
||||||
|
|
||||||
|
if be_kindlegen2:
|
||||||
|
vals = {204:201, 205:2, 206:2, 207:35621}
|
||||||
|
elif is_periodical:
|
||||||
|
# Pretend to be amazon's super secret periodical generator
|
||||||
|
vals = {204:201, 205:2, 206:0, 207:101}
|
||||||
|
else:
|
||||||
|
# Pretend to be kindlegen 1.2
|
||||||
|
vals = {204:201, 205:1, 206:2, 207:33307}
|
||||||
|
for code, val in vals.iteritems():
|
||||||
|
exth.write(pack(b'>III', code, 12, val))
|
||||||
|
nrecs += 1
|
||||||
|
|
||||||
|
if cover_offset is not None:
|
||||||
|
exth.write(pack(b'>III', EXTH_CODES['coveroffset'], 12,
|
||||||
|
cover_offset))
|
||||||
|
exth.write(pack(b'>III', EXTH_CODES['hasfakecover'], 12, 0))
|
||||||
|
nrecs += 2
|
||||||
|
if thumbnail_offset is not None:
|
||||||
|
exth.write(pack(b'>III', EXTH_CODES['thumboffset'], 12,
|
||||||
|
thumbnail_offset))
|
||||||
|
nrecs += 1
|
||||||
|
|
||||||
|
if start_offset is not None:
|
||||||
|
exth.write(pack(b'>III', EXTH_CODES['startreading'], 12,
|
||||||
|
start_offset))
|
||||||
|
nrecs += 1
|
||||||
|
|
||||||
|
if num_of_resources is not None:
|
||||||
|
exth.write(pack(b'>III', EXTH_CODES['num_of_resources'], 12,
|
||||||
|
num_of_resources))
|
||||||
|
nrecs += 1
|
||||||
|
|
||||||
|
if kf8_unknown_count is not None:
|
||||||
|
exth.write(pack(b'>III', EXTH_CODES['kf8_unknown_count'], 12,
|
||||||
|
kf8_unknown_count))
|
||||||
|
nrecs += 1
|
||||||
|
|
||||||
|
exth = exth.getvalue()
|
||||||
|
trail = len(exth) % 4
|
||||||
|
pad = b'\0' * (4 - trail) # Always pad w/ at least 1 byte
|
||||||
|
exth = [b'EXTH', pack(b'>II', len(exth) + 12, nrecs), exth, pad]
|
||||||
|
return b''.join(exth)
|
||||||
|
|
||||||
|
|
@ -7,6 +7,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import random
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
from struct import pack
|
from struct import pack
|
||||||
@ -16,6 +17,7 @@ from calibre.ebooks.mobi.utils import align_block
|
|||||||
NULL = 0xffffffff
|
NULL = 0xffffffff
|
||||||
zeroes = lambda x: b'\0'*x
|
zeroes = lambda x: b'\0'*x
|
||||||
nulls = lambda x: b'\xff'*x
|
nulls = lambda x: b'\xff'*x
|
||||||
|
short = lambda x: pack(b'>H', x)
|
||||||
|
|
||||||
class Header(OrderedDict):
|
class Header(OrderedDict):
|
||||||
|
|
||||||
@ -25,7 +27,9 @@ class Header(OrderedDict):
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
ALIGN_BLOCK = False
|
ALIGN_BLOCK = False
|
||||||
POSITIONS = {}
|
POSITIONS = {} # Mapping of position field to field whose position should
|
||||||
|
# be stored in the position field
|
||||||
|
SHORT_FIELDS = set()
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
OrderedDict.__init__(self)
|
OrderedDict.__init__(self)
|
||||||
@ -36,13 +40,17 @@ class Header(OrderedDict):
|
|||||||
name, val = [x.strip() for x in line.partition('=')[0::2]]
|
name, val = [x.strip() for x in line.partition('=')[0::2]]
|
||||||
if val:
|
if val:
|
||||||
val = eval(val, {'zeroes':zeroes, 'NULL':NULL, 'DYN':None,
|
val = eval(val, {'zeroes':zeroes, 'NULL':NULL, 'DYN':None,
|
||||||
'nulls':nulls})
|
'nulls':nulls, 'short':short, 'random':random})
|
||||||
else:
|
else:
|
||||||
val = 0
|
val = 0
|
||||||
if name in self:
|
if name in self:
|
||||||
raise ValueError('Duplicate field in definition: %r'%name)
|
raise ValueError('Duplicate field in definition: %r'%name)
|
||||||
self[name] = val
|
self[name] = val
|
||||||
|
|
||||||
|
@property
|
||||||
|
def dynamic_fields(self):
|
||||||
|
return tuple(k for k, v in self.iteritems() if v is None)
|
||||||
|
|
||||||
def __call__(self, **kwargs):
|
def __call__(self, **kwargs):
|
||||||
positions = {}
|
positions = {}
|
||||||
for name, val in kwargs.iteritems():
|
for name, val in kwargs.iteritems():
|
||||||
@ -58,7 +66,8 @@ class Header(OrderedDict):
|
|||||||
if val is None:
|
if val is None:
|
||||||
raise ValueError('Dynamic field %r not set'%name)
|
raise ValueError('Dynamic field %r not set'%name)
|
||||||
if isinstance(val, (int, long)):
|
if isinstance(val, (int, long)):
|
||||||
val = pack(b'>I', val)
|
fmt = 'H' if name in self.SHORT_FIELDS else 'I'
|
||||||
|
val = pack(b'>'+fmt, val)
|
||||||
buf.write(val)
|
buf.write(val)
|
||||||
|
|
||||||
for pos_field, field in self.POSITIONS.iteritems():
|
for pos_field, field in self.POSITIONS.iteritems():
|
||||||
|
@ -182,7 +182,7 @@ class Index(object): # {{{
|
|||||||
if len(body) + self.HEADER_LENGTH >= 0x10000:
|
if len(body) + self.HEADER_LENGTH >= 0x10000:
|
||||||
raise too_large
|
raise too_large
|
||||||
header = b'INDX'
|
header = b'INDX'
|
||||||
buf.truncate(0)
|
buf.seek(0), buf.truncate(0)
|
||||||
buf.write(pack(b'>I', self.HEADER_LENGTH))
|
buf.write(pack(b'>I', self.HEADER_LENGTH))
|
||||||
buf.write(b'\0'*4) # Unknown
|
buf.write(b'\0'*4) # Unknown
|
||||||
buf.write(pack(b'>I', 1)) # Header type? Or index record number?
|
buf.write(pack(b'>I', 1)) # Header type? Or index record number?
|
||||||
|
@ -26,6 +26,7 @@ from calibre.ebooks.oeb.parse_utils import barename
|
|||||||
from calibre.ebooks.mobi.writer8.skeleton import Chunker, aid_able_tags, to_href
|
from calibre.ebooks.mobi.writer8.skeleton import Chunker, aid_able_tags, to_href
|
||||||
from calibre.ebooks.mobi.writer8.index import (NCXIndex, SkelIndex,
|
from calibre.ebooks.mobi.writer8.index import (NCXIndex, SkelIndex,
|
||||||
ChunkIndex, GuideIndex)
|
ChunkIndex, GuideIndex)
|
||||||
|
from calibre.ebooks.mobi.writer8.mobi import KF8Book
|
||||||
|
|
||||||
XML_DOCS = OEB_DOCS | {SVG_MIME}
|
XML_DOCS = OEB_DOCS | {SVG_MIME}
|
||||||
|
|
||||||
@ -42,7 +43,7 @@ class KF8Writer(object):
|
|||||||
self.used_images = set()
|
self.used_images = set()
|
||||||
self.resources = resources
|
self.resources = resources
|
||||||
self.flows = [None] # First flow item is reserved for the text
|
self.flows = [None] # First flow item is reserved for the text
|
||||||
self.records = []
|
self.records = [None] # Placeholder for zeroth record
|
||||||
|
|
||||||
self.log('\tGenerating KF8 markup...')
|
self.log('\tGenerating KF8 markup...')
|
||||||
self.dup_data()
|
self.dup_data()
|
||||||
@ -266,9 +267,10 @@ class KF8Writer(object):
|
|||||||
start = 0 if i == 0 else self.fdst_table[-1].end
|
start = 0 if i == 0 else self.fdst_table[-1].end
|
||||||
self.fdst_table.append(FDST(start, start + len(flow)))
|
self.fdst_table.append(FDST(start, start + len(flow)))
|
||||||
entries.extend(self.fdst_table[-1])
|
entries.extend(self.fdst_table[-1])
|
||||||
rec = (b'FDST' + pack(b'>LL', len(self.fdst_table), 12) +
|
rec = (b'FDST' + pack(b'>LL', 12, len(self.fdst_table)) +
|
||||||
pack(b'>%dL'%len(entries), *entries))
|
pack(b'>%dL'%len(entries), *entries))
|
||||||
self.fdst_records = [rec]
|
self.fdst_records = [rec]
|
||||||
|
self.fdst_count = len(self.fdst_table)
|
||||||
|
|
||||||
def create_indices(self):
|
def create_indices(self):
|
||||||
self.skel_records = SkelIndex(self.skel_table)()
|
self.skel_records = SkelIndex(self.skel_table)()
|
||||||
@ -347,3 +349,7 @@ class KF8Writer(object):
|
|||||||
if self.guide_table:
|
if self.guide_table:
|
||||||
self.guide_records = GuideIndex(self.guide_table)()
|
self.guide_records = GuideIndex(self.guide_table)()
|
||||||
|
|
||||||
|
def create_kf8_book(oeb, opts, resources):
|
||||||
|
writer = KF8Writer(oeb, opts, resources)
|
||||||
|
return KF8Book(writer)
|
||||||
|
|
||||||
|
302
src/calibre/ebooks/mobi/writer8/mobi.py
Normal file
302
src/calibre/ebooks/mobi/writer8/mobi.py
Normal file
@ -0,0 +1,302 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import time
|
||||||
|
from struct import pack
|
||||||
|
|
||||||
|
from calibre.ebooks.mobi.utils import RECORD_SIZE, utf8_text
|
||||||
|
from calibre.ebooks.mobi.writer8.header import Header
|
||||||
|
from calibre.ebooks.mobi.writer2 import (PALMDOC, UNCOMPRESSED)
|
||||||
|
from calibre.ebooks.mobi.langcodes import iana2mobi
|
||||||
|
from calibre.ebooks.mobi.writer8.exth import build_exth
|
||||||
|
from calibre.utils.filenames import ascii_filename
|
||||||
|
|
||||||
|
NULL_INDEX = 0xffffffff
|
||||||
|
|
||||||
|
class MOBIHeader(Header): # {{{
|
||||||
|
'''
|
||||||
|
Represents the first record in a MOBI file, contains all the metadata about
|
||||||
|
the file.
|
||||||
|
'''
|
||||||
|
|
||||||
|
FILE_VERSION = 8
|
||||||
|
|
||||||
|
DEFINITION = '''
|
||||||
|
# 0: Compression
|
||||||
|
compression = DYN
|
||||||
|
|
||||||
|
# 2: Unused
|
||||||
|
unused1 = zeroes(2)
|
||||||
|
|
||||||
|
# 4: Text length
|
||||||
|
text_length = DYN
|
||||||
|
|
||||||
|
# 8: Last text record
|
||||||
|
last_text_record = DYN
|
||||||
|
|
||||||
|
# 10: Text record size
|
||||||
|
record_size = {record_size}
|
||||||
|
|
||||||
|
# 12: Unused
|
||||||
|
unused2
|
||||||
|
|
||||||
|
# 16: Ident
|
||||||
|
ident = b'MOBI'
|
||||||
|
|
||||||
|
# 20: Header length
|
||||||
|
header_length = 248
|
||||||
|
|
||||||
|
# 24: Book Type (0x2 - Book, 0x101 - News hierarchical, 0x102 - News
|
||||||
|
# (flat), 0x103 - News magazine same as 0x101)
|
||||||
|
book_type = DYN
|
||||||
|
|
||||||
|
# 28: Text encoding (utf-8 = 65001)
|
||||||
|
encoding = 65001
|
||||||
|
|
||||||
|
# 32: UID
|
||||||
|
uid = random.randint(0, 0xffffffff)
|
||||||
|
|
||||||
|
# 36: File version
|
||||||
|
file_version = {file_version}
|
||||||
|
|
||||||
|
# 40: Meta orth record (Chunk table index in KF8)
|
||||||
|
meta_orth_record = DYN
|
||||||
|
|
||||||
|
# 44: Meta infl index
|
||||||
|
meta_infl_index = NULL
|
||||||
|
|
||||||
|
# 48: Extra indices
|
||||||
|
extra_index0 = NULL
|
||||||
|
extra_index1 = NULL
|
||||||
|
extra_index2 = NULL
|
||||||
|
extra_index3 = NULL
|
||||||
|
extra_index4 = NULL
|
||||||
|
extra_index5 = NULL
|
||||||
|
extra_index6 = NULL
|
||||||
|
extra_index7 = NULL
|
||||||
|
|
||||||
|
# 80: First non text record
|
||||||
|
first_non_text_record = DYN
|
||||||
|
|
||||||
|
# 84: Title offset
|
||||||
|
title_offset
|
||||||
|
|
||||||
|
# 88: Title Length
|
||||||
|
title_length = DYN
|
||||||
|
|
||||||
|
# 92: Language code
|
||||||
|
language_code = DYN
|
||||||
|
|
||||||
|
# 96: Dictionary in and out languages
|
||||||
|
in_lang
|
||||||
|
out_lang
|
||||||
|
|
||||||
|
# 104: Min version
|
||||||
|
min_version = {file_version}
|
||||||
|
|
||||||
|
# 108: First resource record
|
||||||
|
first_resource_record = DYN
|
||||||
|
|
||||||
|
# 112: Huff/CDIC compression
|
||||||
|
huff_first_record
|
||||||
|
huff_count
|
||||||
|
|
||||||
|
# 120: DATP records
|
||||||
|
datp_first_record
|
||||||
|
datp_count
|
||||||
|
|
||||||
|
# 128: EXTH flags
|
||||||
|
exth_flags = DYN
|
||||||
|
|
||||||
|
# 132: Unknown
|
||||||
|
unknown = zeroes(32)
|
||||||
|
|
||||||
|
# 164: DRM
|
||||||
|
drm_offset = NULL
|
||||||
|
drm_count = NULL
|
||||||
|
drm_size
|
||||||
|
drm_flags
|
||||||
|
|
||||||
|
# 180: Unknown
|
||||||
|
unknown2 = zeroes(12)
|
||||||
|
|
||||||
|
# 192: FDST
|
||||||
|
fdst_record = DYN
|
||||||
|
fdst_count = DYN
|
||||||
|
|
||||||
|
# 200: FCI
|
||||||
|
fcis_record = NULL
|
||||||
|
fcis_count
|
||||||
|
|
||||||
|
# 208: FLIS
|
||||||
|
flis_record = NULL
|
||||||
|
flis_count
|
||||||
|
|
||||||
|
# 216: Unknown
|
||||||
|
unknown3 = zeroes(8)
|
||||||
|
|
||||||
|
# 224: SRCS
|
||||||
|
srcs_record = NULL
|
||||||
|
srcs_count
|
||||||
|
|
||||||
|
# 232: Unknown
|
||||||
|
unknown4 = nulls(8)
|
||||||
|
|
||||||
|
# 240: Extra data flags
|
||||||
|
# 0b1 - extra multibyte bytes after text records
|
||||||
|
# 0b10 - TBS indexing data (only used in MOBI 6)
|
||||||
|
# 0b100 - uncrossable breaks only used in MOBI 6
|
||||||
|
extra_data_flags = 1
|
||||||
|
|
||||||
|
# 244: KF8 Indices
|
||||||
|
ncx_index = DYN
|
||||||
|
chunk_index = DYN
|
||||||
|
skel_index = DYN
|
||||||
|
datp_index = NULL
|
||||||
|
guide_index = DYN
|
||||||
|
|
||||||
|
# 264: EXTH
|
||||||
|
exth = DYN
|
||||||
|
|
||||||
|
# Full title
|
||||||
|
full_title = DYN
|
||||||
|
|
||||||
|
# Padding to allow amazon's DTP service to add data
|
||||||
|
padding = zeroes(8192)
|
||||||
|
'''.format(record_size=RECORD_SIZE, file_version=FILE_VERSION)
|
||||||
|
|
||||||
|
SHORT_FIELDS = {'compression', 'last_text_record', 'record_size'}
|
||||||
|
ALIGN = True
|
||||||
|
POSITIONS = {'title_offset':'full_title'}
|
||||||
|
|
||||||
|
def format_value(self, name, val):
|
||||||
|
if name == 'compression':
|
||||||
|
val = PALMDOC if val else UNCOMPRESSED
|
||||||
|
return super(MOBIHeader, self).format_value(name, val)
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
# Fields that need to be set in the MOBI Header are
|
||||||
|
|
||||||
|
class KF8Book(object):
|
||||||
|
|
||||||
|
def __init__(self, writer):
|
||||||
|
self.build_records(writer)
|
||||||
|
|
||||||
|
def build_records(self, writer):
|
||||||
|
metadata = writer.oeb.metadata
|
||||||
|
# The text records
|
||||||
|
for x in ('last_text_record_idx', 'first_non_text_record_idx'):
|
||||||
|
setattr(self, x.rpartition('_')[0], getattr(writer, x))
|
||||||
|
self.records = writer.records
|
||||||
|
self.text_length = writer.text_length
|
||||||
|
|
||||||
|
# KF8 Indices
|
||||||
|
self.chunk_index = self.meta_orth_record = len(self.records)
|
||||||
|
self.records.extend(writer.chunk_records)
|
||||||
|
self.skel_index = len(self.records)
|
||||||
|
self.records.extend(writer.skel_records)
|
||||||
|
self.guide_index = NULL_INDEX
|
||||||
|
if writer.guide_records:
|
||||||
|
self.guide_index = len(self.records)
|
||||||
|
self.records.extend(writer.guide_records)
|
||||||
|
self.ncx_index = NULL_INDEX
|
||||||
|
if writer.ncx_records:
|
||||||
|
self.ncx_index = len(self.records)
|
||||||
|
self.records.extend(writer.ncx_records)
|
||||||
|
|
||||||
|
# Resources
|
||||||
|
resources = writer.resources
|
||||||
|
for x in ('cover_offset', 'thumbnail_offset', 'masthead_offset'):
|
||||||
|
setattr(self, x, getattr(resources, x))
|
||||||
|
|
||||||
|
self.first_resource_record = NULL_INDEX
|
||||||
|
if resources.records:
|
||||||
|
self.first_resource_record = len(self.records)
|
||||||
|
self.records.extend(resources.records)
|
||||||
|
|
||||||
|
self.first_resource_record = len(self.records)
|
||||||
|
self.num_of_resources = len(resources.records)
|
||||||
|
|
||||||
|
# FDST
|
||||||
|
self.fdst_count = writer.fdst_count
|
||||||
|
self.fdst_record = len(self.records)
|
||||||
|
self.records.extend(writer.fdst_records)
|
||||||
|
|
||||||
|
# EOF
|
||||||
|
self.records.append(b'\xe9\x8e\r\n') # EOF record
|
||||||
|
|
||||||
|
|
||||||
|
# Miscellaneous header fields
|
||||||
|
self.compression = writer.compress
|
||||||
|
self.book_type = 0x101 if writer.opts.mobi_periodical else 2
|
||||||
|
self.full_title = utf8_text(unicode(metadata.title[0]))
|
||||||
|
self.title_length = len(self.full_title)
|
||||||
|
|
||||||
|
self.language_code = iana2mobi(str(metadata.language[0]))
|
||||||
|
self.exth_flags = 0b1010000
|
||||||
|
if writer.opts.mobi_periodical:
|
||||||
|
self.exth_flags |= 0b1000
|
||||||
|
|
||||||
|
self.opts = writer.opts
|
||||||
|
self.start_offset = writer.start_offset
|
||||||
|
self.metadata = metadata
|
||||||
|
|
||||||
|
@property
|
||||||
|
def record0(self):
|
||||||
|
''' We generate the EXTH header and record0 dynamically, to allow other
|
||||||
|
code to customize various values after build_record() has been
|
||||||
|
called'''
|
||||||
|
opts = self.opts
|
||||||
|
kuc = 0 if self.num_of_resources > 0 else None
|
||||||
|
self.exth = build_exth(self.metadata,
|
||||||
|
prefer_author_sort=opts.prefer_author_sort,
|
||||||
|
is_periodical=opts.mobi_periodical,
|
||||||
|
share_not_sync=opts.share_not_sync,
|
||||||
|
cover_offset=self.cover_offset,
|
||||||
|
thumbnail_offset=self.thumbnail_offset,
|
||||||
|
num_of_resources=self.num_of_resources,
|
||||||
|
kf8_unknown_count=kuc, be_kindlegen2=True,
|
||||||
|
start_offset=self.start_offset, mobi_doctype=self.book_type)
|
||||||
|
|
||||||
|
kwargs = {field:getattr(self, field) for field in
|
||||||
|
('compression', 'text_length', 'last_text_record',
|
||||||
|
'book_type', 'meta_orth_record', 'first_non_text_record',
|
||||||
|
'title_length', 'language_code', 'first_resource_record',
|
||||||
|
'exth_flags', 'fdst_record', 'fdst_count', 'ncx_index',
|
||||||
|
'chunk_index', 'skel_index', 'guide_index', 'exth',
|
||||||
|
'full_title')}
|
||||||
|
return MOBIHeader()(**kwargs)
|
||||||
|
|
||||||
|
def write(self, outpath):
|
||||||
|
records = [self.record0] + self.records[1:]
|
||||||
|
|
||||||
|
with open(outpath, 'wb') as f:
|
||||||
|
|
||||||
|
# Write PalmDB Header
|
||||||
|
|
||||||
|
title = ascii_filename(self.full_title.decode('utf-8')).replace(
|
||||||
|
' ', '_')[:31]
|
||||||
|
title += (b'\0' * (32 - len(title)))
|
||||||
|
now = int(time.time())
|
||||||
|
nrecords = len(records)
|
||||||
|
f.write(title)
|
||||||
|
f.write(pack(b'>HHIIIIII', 0, 0, now, now, 0, 0, 0, 0))
|
||||||
|
f.write(b'BOOKMOBI')
|
||||||
|
f.write(pack(b'>IIH', (2*nrecords)-1, 0, nrecords))
|
||||||
|
offset = f.tell() + (8 * nrecords) + 2
|
||||||
|
for i, record in enumerate(records):
|
||||||
|
f.write(pack(b'>I', offset))
|
||||||
|
f.write(b'\0' + pack(b'>I', 2*i)[1:])
|
||||||
|
offset += len(record)
|
||||||
|
f.write(b'\0\0')
|
||||||
|
|
||||||
|
for rec in records:
|
||||||
|
f.write(rec)
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user