mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
KF8 Output: Create NCX and Guide records
This commit is contained in:
parent
0db1fcb103
commit
e4a55aae56
@ -599,4 +599,8 @@ class CNCX(object): # {{{
|
|||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
def is_guide_ref_start(ref):
|
||||||
|
return (ref.title.lower() == 'start' or
|
||||||
|
(ref.type and ref.type.lower() in {'start',
|
||||||
|
'other.start', 'text'}))
|
||||||
|
|
||||||
|
@ -12,6 +12,7 @@ import re
|
|||||||
from calibre.ebooks.oeb.base import (OEB_DOCS, XHTML, XHTML_NS, XML_NS,
|
from calibre.ebooks.oeb.base import (OEB_DOCS, XHTML, XHTML_NS, XML_NS,
|
||||||
namespace, prefixname, urlnormalize)
|
namespace, prefixname, urlnormalize)
|
||||||
from calibre.ebooks.mobi.mobiml import MBP_NS
|
from calibre.ebooks.mobi.mobiml import MBP_NS
|
||||||
|
from calibre.ebooks.mobi.utils import is_guide_ref_start
|
||||||
|
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from urlparse import urldefrag
|
from urlparse import urldefrag
|
||||||
@ -161,9 +162,7 @@ class Serializer(object):
|
|||||||
buf.write(b'title="')
|
buf.write(b'title="')
|
||||||
self.serialize_text(ref.title, quot=True)
|
self.serialize_text(ref.title, quot=True)
|
||||||
buf.write(b'" ')
|
buf.write(b'" ')
|
||||||
if (ref.title.lower() == 'start' or
|
if is_guide_ref_start(ref):
|
||||||
(ref.type and ref.type.lower() in {'start',
|
|
||||||
'other.start', 'text'})):
|
|
||||||
self._start_href = ref.href
|
self._start_href = ref.href
|
||||||
self.serialize_href(ref.href)
|
self.serialize_href(ref.href)
|
||||||
# Space required or won't work, I kid you not
|
# Space required or won't work, I kid you not
|
||||||
|
@ -15,9 +15,10 @@ from io import BytesIO
|
|||||||
from calibre.ebooks.mobi.utils import CNCX, encint, align_block
|
from calibre.ebooks.mobi.utils import CNCX, encint, align_block
|
||||||
from calibre.ebooks.mobi.writer8.header import Header
|
from calibre.ebooks.mobi.writer8.header import Header
|
||||||
|
|
||||||
TagMeta = namedtuple('TagMeta',
|
TagMeta_ = namedtuple('TagMeta',
|
||||||
'name number values_per_entry bitmask end_flag')
|
'name number values_per_entry bitmask end_flag')
|
||||||
EndTagTable = TagMeta('eof', 0, 0, 0, 1)
|
TagMeta = lambda x:TagMeta_(*x)
|
||||||
|
EndTagTable = TagMeta(('eof', 0, 0, 0, 1))
|
||||||
|
|
||||||
# map of mask to number of shifts needed, works with 1 bit and two-bit wide masks
|
# map of mask to number of shifts needed, works with 1 bit and two-bit wide masks
|
||||||
# could also be extended to 4 bit wide ones as well
|
# could also be extended to 4 bit wide ones as well
|
||||||
@ -118,7 +119,10 @@ class Index(object): # {{{
|
|||||||
cbs.append(ans)
|
cbs.append(ans)
|
||||||
ans = 0
|
ans = 0
|
||||||
continue
|
continue
|
||||||
nvals = len(tags.get(name, ()))
|
try:
|
||||||
|
nvals = len(tags.get(name, ()))
|
||||||
|
except TypeError:
|
||||||
|
nvals = 1
|
||||||
nentries = nvals // vpe
|
nentries = nvals // vpe
|
||||||
shifts = mask_to_bit_shifts[mask]
|
shifts = mask_to_bit_shifts[mask]
|
||||||
ans |= mask & (nentries << shifts)
|
ans |= mask & (nentries << shifts)
|
||||||
@ -132,36 +136,51 @@ class Index(object): # {{{
|
|||||||
self.entries)
|
self.entries)
|
||||||
|
|
||||||
rendered_entries = []
|
rendered_entries = []
|
||||||
offset = 0
|
|
||||||
index, idxt, buf = BytesIO(), BytesIO(), BytesIO()
|
index, idxt, buf = BytesIO(), BytesIO(), BytesIO()
|
||||||
IndexEntry = namedtuple('IndexEntry', 'offset length raw')
|
IndexEntry = namedtuple('IndexEntry', 'offset length raw')
|
||||||
|
last_lead_text = b''
|
||||||
|
too_large = ValueError('Index has too many entries, calibre does not'
|
||||||
|
' support generating multiple index records at this'
|
||||||
|
' time.')
|
||||||
|
|
||||||
for i, x in enumerate(self.entries):
|
for i, x in enumerate(self.entries):
|
||||||
control_bytes = self.control_bytes[i]
|
control_bytes = self.control_bytes[i]
|
||||||
leading_text, tags = x
|
leading_text, tags = x
|
||||||
buf.truncate(0)
|
buf.seek(0), buf.truncate(0)
|
||||||
|
leading_text = (leading_text.encode('utf-8') if
|
||||||
|
isinstance(leading_text, unicode) else leading_text)
|
||||||
raw = bytearray(leading_text)
|
raw = bytearray(leading_text)
|
||||||
raw.insert(0, len(leading_text))
|
raw.insert(0, len(leading_text))
|
||||||
buf.write(bytes(raw))
|
buf.write(bytes(raw))
|
||||||
buf.write(control_bytes)
|
buf.write(bytes(bytearray(control_bytes)))
|
||||||
for tag in self.tag_types:
|
for tag in self.tag_types:
|
||||||
values = tags.get(tag.name, None)
|
values = tags.get(tag.name, None)
|
||||||
|
if values is None: continue
|
||||||
|
try:
|
||||||
|
len(values)
|
||||||
|
except TypeError:
|
||||||
|
values = [values]
|
||||||
if values:
|
if values:
|
||||||
for val in values:
|
for val in values:
|
||||||
buf.write(encint(val))
|
try:
|
||||||
|
buf.write(encint(val))
|
||||||
|
except ValueError:
|
||||||
|
raise ValueError('Invalid values for %r: %r'%(
|
||||||
|
tag, values))
|
||||||
raw = buf.getvalue()
|
raw = buf.getvalue()
|
||||||
|
offset = index.tell()
|
||||||
|
if offset + self.HEADER_LENGTH >= 0x10000:
|
||||||
|
raise too_large
|
||||||
rendered_entries.append(IndexEntry(offset, len(raw), raw))
|
rendered_entries.append(IndexEntry(offset, len(raw), raw))
|
||||||
idxt.write(pack(b'>H', self.HEADER_LENGTH+offset))
|
idxt.write(pack(b'>H', self.HEADER_LENGTH+offset))
|
||||||
offset += len(raw)
|
|
||||||
index.write(raw)
|
index.write(raw)
|
||||||
|
last_lead_text = leading_text
|
||||||
|
|
||||||
index_block = align_block(index.getvalue())
|
index_block = align_block(index.getvalue())
|
||||||
idxt_block = align_block(b'IDXT' + idxt.getvalue())
|
idxt_block = align_block(b'IDXT' + idxt.getvalue())
|
||||||
body = index_block + idxt_block
|
body = index_block + idxt_block
|
||||||
if len(body) + self.HEADER_LENGTH >= 0x10000:
|
if len(body) + self.HEADER_LENGTH >= 0x10000:
|
||||||
raise ValueError('Index has too many entries, calibre does not'
|
raise too_large
|
||||||
' support generating multiple index records at this'
|
|
||||||
' time.')
|
|
||||||
|
|
||||||
header = b'INDX'
|
header = b'INDX'
|
||||||
buf.truncate(0)
|
buf.truncate(0)
|
||||||
buf.write(pack(b'>I', self.HEADER_LENGTH))
|
buf.write(pack(b'>I', self.HEADER_LENGTH))
|
||||||
@ -185,10 +204,15 @@ class Index(object): # {{{
|
|||||||
tagx = self.generate_tagx()
|
tagx = self.generate_tagx()
|
||||||
idxt = (b'IDXT' + pack(b'>H', IndexHeader.HEADER_LENGTH + len(tagx)) +
|
idxt = (b'IDXT' + pack(b'>H', IndexHeader.HEADER_LENGTH + len(tagx)) +
|
||||||
b'\0')
|
b'\0')
|
||||||
|
# Last index
|
||||||
|
idx = bytes(bytearray([len(last_lead_text)])) + last_lead_text
|
||||||
|
idx += pack(b'>H', len(rendered_entries))
|
||||||
|
|
||||||
header = {
|
header = {
|
||||||
'num_of_entries': len(rendered_entries),
|
'num_of_entries': len(rendered_entries),
|
||||||
'num_of_cncx': len(self.cncx),
|
'num_of_cncx': len(self.cncx),
|
||||||
'tagx':tagx,
|
'tagx':tagx,
|
||||||
|
'last_index':align_block(idx),
|
||||||
'idxt':idxt
|
'idxt':idxt
|
||||||
}
|
}
|
||||||
header = IndexHeader()(**header)
|
header = IndexHeader()(**header)
|
||||||
@ -235,6 +259,74 @@ class ChunkIndex(Index):
|
|||||||
'file_number':c.file_number,
|
'file_number':c.file_number,
|
||||||
'sequence_number':c.sequence_number,
|
'sequence_number':c.sequence_number,
|
||||||
'geometry':(c.start_pos, c.length),
|
'geometry':(c.start_pos, c.length),
|
||||||
}) for s in chunk_table
|
}) for c in chunk_table
|
||||||
]
|
]
|
||||||
|
|
||||||
|
class GuideIndex(Index):
|
||||||
|
|
||||||
|
tag_types = tuple(map(TagMeta, (
|
||||||
|
('title', 1, 1, 1, 0),
|
||||||
|
('pos_fid', 6, 2, 2, 0),
|
||||||
|
EndTagTable
|
||||||
|
)))
|
||||||
|
|
||||||
|
def __init__(self, guide_table):
|
||||||
|
self.cncx = CNCX(c.title for c in guide_table)
|
||||||
|
|
||||||
|
self.entries = [
|
||||||
|
(r.type, {
|
||||||
|
|
||||||
|
'title':self.cncx[r.title],
|
||||||
|
'pos_fid':r.pos_fid,
|
||||||
|
}) for r in guide_table
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class NCXIndex(Index):
|
||||||
|
|
||||||
|
control_byte_count = 2
|
||||||
|
tag_types = tuple(map(TagMeta, (
|
||||||
|
('offset', 1, 1, 1, 0),
|
||||||
|
('length', 2, 1, 2, 0),
|
||||||
|
('label', 3, 1, 4, 0),
|
||||||
|
('depth', 4, 1, 8, 0),
|
||||||
|
('parent', 21, 1, 16, 0),
|
||||||
|
('first_child', 22, 1, 32, 0),
|
||||||
|
('last_child', 23, 1, 64, 0),
|
||||||
|
('pos_fid', 6, 2, 128, 0),
|
||||||
|
EndTagTable,
|
||||||
|
('image', 69, 1, 1, 0),
|
||||||
|
('description', 70, 1, 2, 0),
|
||||||
|
('author', 71, 1, 4, 0),
|
||||||
|
('caption', 72, 1, 8, 0),
|
||||||
|
('attribution', 73, 1, 16, 0),
|
||||||
|
EndTagTable
|
||||||
|
)))
|
||||||
|
|
||||||
|
def __init__(self, toc_table):
|
||||||
|
strings = []
|
||||||
|
for entry in toc_table:
|
||||||
|
strings.append(entry['label'])
|
||||||
|
aut = entry.get('author', None)
|
||||||
|
if aut:
|
||||||
|
strings.append(aut)
|
||||||
|
desc = entry.get('description', None)
|
||||||
|
if desc:
|
||||||
|
strings.append(desc)
|
||||||
|
self.cncx = CNCX(strings)
|
||||||
|
|
||||||
|
def to_entry(x):
|
||||||
|
ans = {}
|
||||||
|
for f in ('offset', 'length', 'depth', 'pos_fid', 'parent',
|
||||||
|
'first_child', 'last_child'):
|
||||||
|
if f in x:
|
||||||
|
ans[f] = x[f]
|
||||||
|
for f in ('label', 'description', 'author'):
|
||||||
|
if f in x:
|
||||||
|
ans[f] = self.cncx[x[f]]
|
||||||
|
return ('%02x'%x['index'], ans)
|
||||||
|
|
||||||
|
self.entries = list(map(to_entry, toc_table))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -17,12 +17,15 @@ import cssutils
|
|||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
from calibre import isbytestring, force_unicode
|
from calibre import isbytestring, force_unicode
|
||||||
from calibre.ebooks.mobi.utils import create_text_record, to_base
|
from calibre.ebooks.mobi.utils import (create_text_record, to_base,
|
||||||
|
is_guide_ref_start)
|
||||||
from calibre.ebooks.compression.palmdoc import compress_doc
|
from calibre.ebooks.compression.palmdoc import compress_doc
|
||||||
from calibre.ebooks.oeb.base import (OEB_DOCS, OEB_STYLES, SVG_MIME, XPath,
|
from calibre.ebooks.oeb.base import (OEB_DOCS, OEB_STYLES, SVG_MIME, XPath,
|
||||||
extract, XHTML, urlnormalize)
|
extract, XHTML, urlnormalize)
|
||||||
from calibre.ebooks.oeb.parse_utils import barename
|
from calibre.ebooks.oeb.parse_utils import barename
|
||||||
from calibre.ebooks.mobi.writer8.skeleton import Chunker, aid_able_tags, to_href
|
from calibre.ebooks.mobi.writer8.skeleton import Chunker, aid_able_tags, to_href
|
||||||
|
from calibre.ebooks.mobi.writer8.index import (NCXIndex, SkelIndex,
|
||||||
|
ChunkIndex, GuideIndex)
|
||||||
|
|
||||||
XML_DOCS = OEB_DOCS | {SVG_MIME}
|
XML_DOCS = OEB_DOCS | {SVG_MIME}
|
||||||
|
|
||||||
@ -38,11 +41,11 @@ class KF8Writer(object):
|
|||||||
self.log.info('Creating KF8 output')
|
self.log.info('Creating KF8 output')
|
||||||
self.used_images = set()
|
self.used_images = set()
|
||||||
self.resources = resources
|
self.resources = resources
|
||||||
self.dup_data()
|
|
||||||
self.flows = [None] # First flow item is reserved for the text
|
self.flows = [None] # First flow item is reserved for the text
|
||||||
self.records = []
|
self.records = []
|
||||||
self.fdst_table = []
|
|
||||||
|
|
||||||
|
self.log('\tGenerating KF8 markup...')
|
||||||
|
self.dup_data()
|
||||||
self.replace_resource_links()
|
self.replace_resource_links()
|
||||||
self.extract_css_into_flows()
|
self.extract_css_into_flows()
|
||||||
self.extract_svg_into_flows()
|
self.extract_svg_into_flows()
|
||||||
@ -52,7 +55,10 @@ class KF8Writer(object):
|
|||||||
# Dump the cloned data as it is no longer needed
|
# Dump the cloned data as it is no longer needed
|
||||||
del self._data_cache
|
del self._data_cache
|
||||||
self.create_text_records()
|
self.create_text_records()
|
||||||
self.create_fdst_table()
|
self.log('\tCreating indices...')
|
||||||
|
self.create_fdst_records()
|
||||||
|
self.create_indices()
|
||||||
|
self.create_guide()
|
||||||
|
|
||||||
def dup_data(self):
|
def dup_data(self):
|
||||||
''' Duplicate data so that any changes we make to markup/CSS only
|
''' Duplicate data so that any changes we make to markup/CSS only
|
||||||
@ -231,7 +237,7 @@ class KF8Writer(object):
|
|||||||
records_size = 0
|
records_size = 0
|
||||||
|
|
||||||
if self.compress:
|
if self.compress:
|
||||||
self.oeb.logger.info(' Compressing markup content...')
|
self.oeb.logger.info('\tCompressing markup...')
|
||||||
|
|
||||||
while text.tell() < self.text_length:
|
while text.tell() < self.text_length:
|
||||||
data, overlap = create_text_record(text)
|
data, overlap = create_text_record(text)
|
||||||
@ -252,9 +258,90 @@ class KF8Writer(object):
|
|||||||
self.records.append(b'\x00'*(records_size % 4))
|
self.records.append(b'\x00'*(records_size % 4))
|
||||||
self.first_non_text_record_idx += 1
|
self.first_non_text_record_idx += 1
|
||||||
|
|
||||||
def create_fdst_table(self):
|
def create_fdst_records(self):
|
||||||
FDST = namedtuple('Flow', 'start end')
|
FDST = namedtuple('Flow', 'start end')
|
||||||
|
entries = []
|
||||||
|
self.fdst_table = []
|
||||||
for i, flow in enumerate(self.flows):
|
for i, flow in enumerate(self.flows):
|
||||||
start = 0 if i == 0 else self.fdst_table[-1].end
|
start = 0 if i == 0 else self.fdst_table[-1].end
|
||||||
self.fdst_table.append(FDST(start, start + len(flow)))
|
self.fdst_table.append(FDST(start, start + len(flow)))
|
||||||
|
entries.extend(self.fdst_table[-1])
|
||||||
|
rec = (b'FDST' + pack(b'>LL', len(self.fdst_table), 12) +
|
||||||
|
pack(b'>%dL'%len(entries), *entries))
|
||||||
|
self.fdst_records = [rec]
|
||||||
|
|
||||||
|
def create_indices(self):
|
||||||
|
self.skel_records = SkelIndex(self.skel_table)()
|
||||||
|
self.chunk_records = ChunkIndex(self.chunk_table)()
|
||||||
|
self.ncx_records = []
|
||||||
|
toc = self.oeb.toc
|
||||||
|
max_depth = toc.depth()
|
||||||
|
entries = []
|
||||||
|
is_periodical = self.opts.mobi_periodical
|
||||||
|
if toc.count() < 2:
|
||||||
|
self.log.warn('Document has no ToC, MOBI will have no NCX index')
|
||||||
|
return
|
||||||
|
|
||||||
|
# Flatten the ToC into a depth first list
|
||||||
|
fl = toc.iter() if is_periodical else toc.iterdescendants()
|
||||||
|
for i, item in enumerate(fl):
|
||||||
|
entry = {'index':i, 'depth': max_depth - item.depth() - (0 if
|
||||||
|
is_periodical else 1), 'href':item.href, 'label':(item.title or
|
||||||
|
_('Unknown'))}
|
||||||
|
entries.append(entry)
|
||||||
|
for child in item:
|
||||||
|
child.ncx_parent = entry
|
||||||
|
p = getattr(item, 'ncx_parent', None)
|
||||||
|
if p is not None:
|
||||||
|
entry['parent'] = p['index']
|
||||||
|
if is_periodical:
|
||||||
|
if item.author:
|
||||||
|
entry['author'] = item.author
|
||||||
|
if item.description:
|
||||||
|
entry['description'] = item.description
|
||||||
|
|
||||||
|
for entry in entries:
|
||||||
|
children = [e for e in entries if e.get('parent', -1) == entry['index']]
|
||||||
|
if children:
|
||||||
|
entry['first_child'] = children[0]['index']
|
||||||
|
entry['last_child'] = children[-1]['index']
|
||||||
|
href = entry.pop('href')
|
||||||
|
href, frag = href.partition('#')[0::2]
|
||||||
|
aid = self.id_map.get((href, frag), None)
|
||||||
|
if aid is None:
|
||||||
|
aid = self.id_map.get((href, ''), None)
|
||||||
|
if aid is None:
|
||||||
|
pos, fid = 0, 0
|
||||||
|
else:
|
||||||
|
pos, fid = self.aid_offset_map[aid]
|
||||||
|
chunk = self.chunk_table[pos]
|
||||||
|
offset = chunk.insert_pos + fid
|
||||||
|
length = chunk.length
|
||||||
|
entry['pos_fid'] = (pos, fid)
|
||||||
|
entry['offset'] = offset
|
||||||
|
entry['length'] = length
|
||||||
|
|
||||||
|
self.ncx_records = NCXIndex(entries)()
|
||||||
|
|
||||||
|
def create_guide(self):
|
||||||
|
self.start_offset = None
|
||||||
|
self.guide_table = []
|
||||||
|
self.guide_records = []
|
||||||
|
GuideRef = namedtuple('GuideRef', 'title type pos_fid')
|
||||||
|
for ref in self.oeb.guide:
|
||||||
|
ref = self.oeb.guide[ref]
|
||||||
|
href, frag = ref.href.partition('#')[0::2]
|
||||||
|
aid = self.id_map.get((href, frag), None)
|
||||||
|
if aid is None:
|
||||||
|
aid = self.id_map.get((href, ''))
|
||||||
|
if aid is None:
|
||||||
|
continue
|
||||||
|
pos, fid = self.aid_offset_map[aid]
|
||||||
|
if is_guide_ref_start(ref):
|
||||||
|
self.start_offset = pos
|
||||||
|
self.guide_table.append(GuideRef(ref.title or
|
||||||
|
_('Unknown'), ref.type, (pos, fid)))
|
||||||
|
|
||||||
|
if self.guide_table:
|
||||||
|
self.guide_records = GuideIndex(self.guide_table)()
|
||||||
|
|
||||||
|
@ -359,14 +359,14 @@ class Chunker(object):
|
|||||||
if pos_fid is None:
|
if pos_fid is None:
|
||||||
raise ValueError('Could not find chunk for aid: %r'%
|
raise ValueError('Could not find chunk for aid: %r'%
|
||||||
match.group(1))
|
match.group(1))
|
||||||
aid_map[match.group(1)] = (to_base(chunk.sequence_number,
|
aid_map[match.group(1)] = pos_fid
|
||||||
base=32, min_num_digits=4),
|
|
||||||
to_href(offset-chunk.insert_pos))
|
|
||||||
|
|
||||||
self.aid_offset_map = aid_map
|
self.aid_offset_map = aid_map
|
||||||
|
|
||||||
def to_placeholder(aid):
|
def to_placeholder(aid):
|
||||||
return bytes(':'.join(aid_map[aid]))
|
pos, fid = aid_map[aid]
|
||||||
|
pos, fid = to_base(pos, min_num_digits=4), to_href(fid)
|
||||||
|
return bytes(':'.join((pos, fid)))
|
||||||
|
|
||||||
placeholder_map = {bytes(k):to_placeholder(v) for k, v in
|
placeholder_map = {bytes(k):to_placeholder(v) for k, v in
|
||||||
self.placeholder_map.iteritems()}
|
self.placeholder_map.iteritems()}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user