mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
KF8 Output: Create NCX and Guide records
This commit is contained in:
parent
0db1fcb103
commit
e4a55aae56
@ -599,4 +599,8 @@ class CNCX(object): # {{{
|
||||
|
||||
# }}}
|
||||
|
||||
def is_guide_ref_start(ref):
|
||||
return (ref.title.lower() == 'start' or
|
||||
(ref.type and ref.type.lower() in {'start',
|
||||
'other.start', 'text'}))
|
||||
|
||||
|
@ -12,6 +12,7 @@ import re
|
||||
from calibre.ebooks.oeb.base import (OEB_DOCS, XHTML, XHTML_NS, XML_NS,
|
||||
namespace, prefixname, urlnormalize)
|
||||
from calibre.ebooks.mobi.mobiml import MBP_NS
|
||||
from calibre.ebooks.mobi.utils import is_guide_ref_start
|
||||
|
||||
from collections import defaultdict
|
||||
from urlparse import urldefrag
|
||||
@ -161,9 +162,7 @@ class Serializer(object):
|
||||
buf.write(b'title="')
|
||||
self.serialize_text(ref.title, quot=True)
|
||||
buf.write(b'" ')
|
||||
if (ref.title.lower() == 'start' or
|
||||
(ref.type and ref.type.lower() in {'start',
|
||||
'other.start', 'text'})):
|
||||
if is_guide_ref_start(ref):
|
||||
self._start_href = ref.href
|
||||
self.serialize_href(ref.href)
|
||||
# Space required or won't work, I kid you not
|
||||
|
@ -15,9 +15,10 @@ from io import BytesIO
|
||||
from calibre.ebooks.mobi.utils import CNCX, encint, align_block
|
||||
from calibre.ebooks.mobi.writer8.header import Header
|
||||
|
||||
TagMeta = namedtuple('TagMeta',
|
||||
TagMeta_ = namedtuple('TagMeta',
|
||||
'name number values_per_entry bitmask end_flag')
|
||||
EndTagTable = TagMeta('eof', 0, 0, 0, 1)
|
||||
TagMeta = lambda x:TagMeta_(*x)
|
||||
EndTagTable = TagMeta(('eof', 0, 0, 0, 1))
|
||||
|
||||
# map of mask to number of shifts needed, works with 1 bit and two-bit wide masks
|
||||
# could also be extended to 4 bit wide ones as well
|
||||
@ -118,7 +119,10 @@ class Index(object): # {{{
|
||||
cbs.append(ans)
|
||||
ans = 0
|
||||
continue
|
||||
nvals = len(tags.get(name, ()))
|
||||
try:
|
||||
nvals = len(tags.get(name, ()))
|
||||
except TypeError:
|
||||
nvals = 1
|
||||
nentries = nvals // vpe
|
||||
shifts = mask_to_bit_shifts[mask]
|
||||
ans |= mask & (nentries << shifts)
|
||||
@ -132,36 +136,51 @@ class Index(object): # {{{
|
||||
self.entries)
|
||||
|
||||
rendered_entries = []
|
||||
offset = 0
|
||||
index, idxt, buf = BytesIO(), BytesIO(), BytesIO()
|
||||
IndexEntry = namedtuple('IndexEntry', 'offset length raw')
|
||||
last_lead_text = b''
|
||||
too_large = ValueError('Index has too many entries, calibre does not'
|
||||
' support generating multiple index records at this'
|
||||
' time.')
|
||||
|
||||
for i, x in enumerate(self.entries):
|
||||
control_bytes = self.control_bytes[i]
|
||||
leading_text, tags = x
|
||||
buf.truncate(0)
|
||||
buf.seek(0), buf.truncate(0)
|
||||
leading_text = (leading_text.encode('utf-8') if
|
||||
isinstance(leading_text, unicode) else leading_text)
|
||||
raw = bytearray(leading_text)
|
||||
raw.insert(0, len(leading_text))
|
||||
buf.write(bytes(raw))
|
||||
buf.write(control_bytes)
|
||||
buf.write(bytes(bytearray(control_bytes)))
|
||||
for tag in self.tag_types:
|
||||
values = tags.get(tag.name, None)
|
||||
if values is None: continue
|
||||
try:
|
||||
len(values)
|
||||
except TypeError:
|
||||
values = [values]
|
||||
if values:
|
||||
for val in values:
|
||||
buf.write(encint(val))
|
||||
try:
|
||||
buf.write(encint(val))
|
||||
except ValueError:
|
||||
raise ValueError('Invalid values for %r: %r'%(
|
||||
tag, values))
|
||||
raw = buf.getvalue()
|
||||
offset = index.tell()
|
||||
if offset + self.HEADER_LENGTH >= 0x10000:
|
||||
raise too_large
|
||||
rendered_entries.append(IndexEntry(offset, len(raw), raw))
|
||||
idxt.write(pack(b'>H', self.HEADER_LENGTH+offset))
|
||||
offset += len(raw)
|
||||
index.write(raw)
|
||||
last_lead_text = leading_text
|
||||
|
||||
index_block = align_block(index.getvalue())
|
||||
idxt_block = align_block(b'IDXT' + idxt.getvalue())
|
||||
body = index_block + idxt_block
|
||||
if len(body) + self.HEADER_LENGTH >= 0x10000:
|
||||
raise ValueError('Index has too many entries, calibre does not'
|
||||
' support generating multiple index records at this'
|
||||
' time.')
|
||||
|
||||
raise too_large
|
||||
header = b'INDX'
|
||||
buf.truncate(0)
|
||||
buf.write(pack(b'>I', self.HEADER_LENGTH))
|
||||
@ -185,10 +204,15 @@ class Index(object): # {{{
|
||||
tagx = self.generate_tagx()
|
||||
idxt = (b'IDXT' + pack(b'>H', IndexHeader.HEADER_LENGTH + len(tagx)) +
|
||||
b'\0')
|
||||
# Last index
|
||||
idx = bytes(bytearray([len(last_lead_text)])) + last_lead_text
|
||||
idx += pack(b'>H', len(rendered_entries))
|
||||
|
||||
header = {
|
||||
'num_of_entries': len(rendered_entries),
|
||||
'num_of_cncx': len(self.cncx),
|
||||
'tagx':tagx,
|
||||
'last_index':align_block(idx),
|
||||
'idxt':idxt
|
||||
}
|
||||
header = IndexHeader()(**header)
|
||||
@ -235,6 +259,74 @@ class ChunkIndex(Index):
|
||||
'file_number':c.file_number,
|
||||
'sequence_number':c.sequence_number,
|
||||
'geometry':(c.start_pos, c.length),
|
||||
}) for s in chunk_table
|
||||
}) for c in chunk_table
|
||||
]
|
||||
|
||||
class GuideIndex(Index):
|
||||
|
||||
tag_types = tuple(map(TagMeta, (
|
||||
('title', 1, 1, 1, 0),
|
||||
('pos_fid', 6, 2, 2, 0),
|
||||
EndTagTable
|
||||
)))
|
||||
|
||||
def __init__(self, guide_table):
|
||||
self.cncx = CNCX(c.title for c in guide_table)
|
||||
|
||||
self.entries = [
|
||||
(r.type, {
|
||||
|
||||
'title':self.cncx[r.title],
|
||||
'pos_fid':r.pos_fid,
|
||||
}) for r in guide_table
|
||||
]
|
||||
|
||||
|
||||
class NCXIndex(Index):
|
||||
|
||||
control_byte_count = 2
|
||||
tag_types = tuple(map(TagMeta, (
|
||||
('offset', 1, 1, 1, 0),
|
||||
('length', 2, 1, 2, 0),
|
||||
('label', 3, 1, 4, 0),
|
||||
('depth', 4, 1, 8, 0),
|
||||
('parent', 21, 1, 16, 0),
|
||||
('first_child', 22, 1, 32, 0),
|
||||
('last_child', 23, 1, 64, 0),
|
||||
('pos_fid', 6, 2, 128, 0),
|
||||
EndTagTable,
|
||||
('image', 69, 1, 1, 0),
|
||||
('description', 70, 1, 2, 0),
|
||||
('author', 71, 1, 4, 0),
|
||||
('caption', 72, 1, 8, 0),
|
||||
('attribution', 73, 1, 16, 0),
|
||||
EndTagTable
|
||||
)))
|
||||
|
||||
def __init__(self, toc_table):
|
||||
strings = []
|
||||
for entry in toc_table:
|
||||
strings.append(entry['label'])
|
||||
aut = entry.get('author', None)
|
||||
if aut:
|
||||
strings.append(aut)
|
||||
desc = entry.get('description', None)
|
||||
if desc:
|
||||
strings.append(desc)
|
||||
self.cncx = CNCX(strings)
|
||||
|
||||
def to_entry(x):
|
||||
ans = {}
|
||||
for f in ('offset', 'length', 'depth', 'pos_fid', 'parent',
|
||||
'first_child', 'last_child'):
|
||||
if f in x:
|
||||
ans[f] = x[f]
|
||||
for f in ('label', 'description', 'author'):
|
||||
if f in x:
|
||||
ans[f] = self.cncx[x[f]]
|
||||
return ('%02x'%x['index'], ans)
|
||||
|
||||
self.entries = list(map(to_entry, toc_table))
|
||||
|
||||
|
||||
|
||||
|
@ -17,12 +17,15 @@ import cssutils
|
||||
from lxml import etree
|
||||
|
||||
from calibre import isbytestring, force_unicode
|
||||
from calibre.ebooks.mobi.utils import create_text_record, to_base
|
||||
from calibre.ebooks.mobi.utils import (create_text_record, to_base,
|
||||
is_guide_ref_start)
|
||||
from calibre.ebooks.compression.palmdoc import compress_doc
|
||||
from calibre.ebooks.oeb.base import (OEB_DOCS, OEB_STYLES, SVG_MIME, XPath,
|
||||
extract, XHTML, urlnormalize)
|
||||
from calibre.ebooks.oeb.parse_utils import barename
|
||||
from calibre.ebooks.mobi.writer8.skeleton import Chunker, aid_able_tags, to_href
|
||||
from calibre.ebooks.mobi.writer8.index import (NCXIndex, SkelIndex,
|
||||
ChunkIndex, GuideIndex)
|
||||
|
||||
XML_DOCS = OEB_DOCS | {SVG_MIME}
|
||||
|
||||
@ -38,11 +41,11 @@ class KF8Writer(object):
|
||||
self.log.info('Creating KF8 output')
|
||||
self.used_images = set()
|
||||
self.resources = resources
|
||||
self.dup_data()
|
||||
self.flows = [None] # First flow item is reserved for the text
|
||||
self.records = []
|
||||
self.fdst_table = []
|
||||
|
||||
self.log('\tGenerating KF8 markup...')
|
||||
self.dup_data()
|
||||
self.replace_resource_links()
|
||||
self.extract_css_into_flows()
|
||||
self.extract_svg_into_flows()
|
||||
@ -52,7 +55,10 @@ class KF8Writer(object):
|
||||
# Dump the cloned data as it is no longer needed
|
||||
del self._data_cache
|
||||
self.create_text_records()
|
||||
self.create_fdst_table()
|
||||
self.log('\tCreating indices...')
|
||||
self.create_fdst_records()
|
||||
self.create_indices()
|
||||
self.create_guide()
|
||||
|
||||
def dup_data(self):
|
||||
''' Duplicate data so that any changes we make to markup/CSS only
|
||||
@ -231,7 +237,7 @@ class KF8Writer(object):
|
||||
records_size = 0
|
||||
|
||||
if self.compress:
|
||||
self.oeb.logger.info(' Compressing markup content...')
|
||||
self.oeb.logger.info('\tCompressing markup...')
|
||||
|
||||
while text.tell() < self.text_length:
|
||||
data, overlap = create_text_record(text)
|
||||
@ -252,9 +258,90 @@ class KF8Writer(object):
|
||||
self.records.append(b'\x00'*(records_size % 4))
|
||||
self.first_non_text_record_idx += 1
|
||||
|
||||
def create_fdst_table(self):
|
||||
def create_fdst_records(self):
|
||||
FDST = namedtuple('Flow', 'start end')
|
||||
entries = []
|
||||
self.fdst_table = []
|
||||
for i, flow in enumerate(self.flows):
|
||||
start = 0 if i == 0 else self.fdst_table[-1].end
|
||||
self.fdst_table.append(FDST(start, start + len(flow)))
|
||||
entries.extend(self.fdst_table[-1])
|
||||
rec = (b'FDST' + pack(b'>LL', len(self.fdst_table), 12) +
|
||||
pack(b'>%dL'%len(entries), *entries))
|
||||
self.fdst_records = [rec]
|
||||
|
||||
def create_indices(self):
|
||||
self.skel_records = SkelIndex(self.skel_table)()
|
||||
self.chunk_records = ChunkIndex(self.chunk_table)()
|
||||
self.ncx_records = []
|
||||
toc = self.oeb.toc
|
||||
max_depth = toc.depth()
|
||||
entries = []
|
||||
is_periodical = self.opts.mobi_periodical
|
||||
if toc.count() < 2:
|
||||
self.log.warn('Document has no ToC, MOBI will have no NCX index')
|
||||
return
|
||||
|
||||
# Flatten the ToC into a depth first list
|
||||
fl = toc.iter() if is_periodical else toc.iterdescendants()
|
||||
for i, item in enumerate(fl):
|
||||
entry = {'index':i, 'depth': max_depth - item.depth() - (0 if
|
||||
is_periodical else 1), 'href':item.href, 'label':(item.title or
|
||||
_('Unknown'))}
|
||||
entries.append(entry)
|
||||
for child in item:
|
||||
child.ncx_parent = entry
|
||||
p = getattr(item, 'ncx_parent', None)
|
||||
if p is not None:
|
||||
entry['parent'] = p['index']
|
||||
if is_periodical:
|
||||
if item.author:
|
||||
entry['author'] = item.author
|
||||
if item.description:
|
||||
entry['description'] = item.description
|
||||
|
||||
for entry in entries:
|
||||
children = [e for e in entries if e.get('parent', -1) == entry['index']]
|
||||
if children:
|
||||
entry['first_child'] = children[0]['index']
|
||||
entry['last_child'] = children[-1]['index']
|
||||
href = entry.pop('href')
|
||||
href, frag = href.partition('#')[0::2]
|
||||
aid = self.id_map.get((href, frag), None)
|
||||
if aid is None:
|
||||
aid = self.id_map.get((href, ''), None)
|
||||
if aid is None:
|
||||
pos, fid = 0, 0
|
||||
else:
|
||||
pos, fid = self.aid_offset_map[aid]
|
||||
chunk = self.chunk_table[pos]
|
||||
offset = chunk.insert_pos + fid
|
||||
length = chunk.length
|
||||
entry['pos_fid'] = (pos, fid)
|
||||
entry['offset'] = offset
|
||||
entry['length'] = length
|
||||
|
||||
self.ncx_records = NCXIndex(entries)()
|
||||
|
||||
def create_guide(self):
|
||||
self.start_offset = None
|
||||
self.guide_table = []
|
||||
self.guide_records = []
|
||||
GuideRef = namedtuple('GuideRef', 'title type pos_fid')
|
||||
for ref in self.oeb.guide:
|
||||
ref = self.oeb.guide[ref]
|
||||
href, frag = ref.href.partition('#')[0::2]
|
||||
aid = self.id_map.get((href, frag), None)
|
||||
if aid is None:
|
||||
aid = self.id_map.get((href, ''))
|
||||
if aid is None:
|
||||
continue
|
||||
pos, fid = self.aid_offset_map[aid]
|
||||
if is_guide_ref_start(ref):
|
||||
self.start_offset = pos
|
||||
self.guide_table.append(GuideRef(ref.title or
|
||||
_('Unknown'), ref.type, (pos, fid)))
|
||||
|
||||
if self.guide_table:
|
||||
self.guide_records = GuideIndex(self.guide_table)()
|
||||
|
||||
|
@ -359,14 +359,14 @@ class Chunker(object):
|
||||
if pos_fid is None:
|
||||
raise ValueError('Could not find chunk for aid: %r'%
|
||||
match.group(1))
|
||||
aid_map[match.group(1)] = (to_base(chunk.sequence_number,
|
||||
base=32, min_num_digits=4),
|
||||
to_href(offset-chunk.insert_pos))
|
||||
aid_map[match.group(1)] = pos_fid
|
||||
|
||||
self.aid_offset_map = aid_map
|
||||
|
||||
def to_placeholder(aid):
|
||||
return bytes(':'.join(aid_map[aid]))
|
||||
pos, fid = aid_map[aid]
|
||||
pos, fid = to_base(pos, min_num_digits=4), to_href(fid)
|
||||
return bytes(':'.join((pos, fid)))
|
||||
|
||||
placeholder_map = {bytes(k):to_placeholder(v) for k, v in
|
||||
self.placeholder_map.iteritems()}
|
||||
|
Loading…
x
Reference in New Issue
Block a user