mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
...
This commit is contained in:
parent
919011f8e5
commit
e61b86cd24
@ -957,15 +957,17 @@ class TBSIndexing(object): # {{{
|
||||
return str({bin4(k):v for k, v in extra.iteritems()})
|
||||
|
||||
tbs_type = 0
|
||||
is_periodical = self.doc_type in (257, 258, 259)
|
||||
if len(byts):
|
||||
outermost_index, extra, consumed = decode_tbs(byts)
|
||||
outermost_index, extra, consumed = decode_tbs(byts, flag_size=4 if
|
||||
is_periodical else 3)
|
||||
byts = byts[consumed:]
|
||||
for k in extra:
|
||||
tbs_type |= k
|
||||
ans.append('\nTBS: %d (%s)'%(tbs_type, bin4(tbs_type)))
|
||||
ans.append('Outermost index: %d'%outermost_index)
|
||||
ans.append('Unknown extra start bytes: %s'%repr_extra(extra))
|
||||
if self.doc_type in (257, 259): # Hierarchical periodical
|
||||
if is_periodical: # Hierarchical periodical
|
||||
byts, a = self.interpret_periodical(tbs_type, byts,
|
||||
dat['geom'][0])
|
||||
ans += a
|
||||
|
@ -66,11 +66,14 @@ def encint(value, forward=True):
|
||||
If forward is True the bytes returned are suitable for prepending to the
|
||||
output buffer, otherwise they must be append to the output buffer.
|
||||
'''
|
||||
if value < 0:
|
||||
raise ValueError('Cannot encode negative numbers as vwi')
|
||||
# Encode vwi
|
||||
byts = bytearray()
|
||||
while True:
|
||||
b = value & 0b01111111
|
||||
value >>= 7 # shift value to the right by 7 bits
|
||||
|
||||
byts.append(b)
|
||||
if value == 0:
|
||||
break
|
||||
@ -198,24 +201,31 @@ def encode_trailing_data(raw):
|
||||
lsize += 1
|
||||
return raw + encoded
|
||||
|
||||
def encode_fvwi(val, flags):
|
||||
def encode_fvwi(val, flags, flag_size=4):
|
||||
'''
|
||||
Encode the value val and the 4 bit flags flags as a fvwi. This encoding is
|
||||
Encode the value val and the flag_size bits from flags as a fvwi. This encoding is
|
||||
used in the trailing byte sequences for indexing. Returns encoded
|
||||
bytestring.
|
||||
'''
|
||||
ans = (val << 4) | (flags & 0b1111)
|
||||
ans = val << flag_size
|
||||
for i in xrange(flag_size):
|
||||
ans |= (flags & (1 << i))
|
||||
return encint(ans)
|
||||
|
||||
|
||||
def decode_fvwi(byts):
|
||||
def decode_fvwi(byts, flag_size=4):
|
||||
'''
|
||||
Decode encoded fvwi. Returns number, flags, consumed
|
||||
'''
|
||||
arg, consumed = decint(bytes(byts))
|
||||
return (arg >> 4), (arg & 0b1111), consumed
|
||||
val = arg >> flag_size
|
||||
flags = 0
|
||||
for i in xrange(flag_size):
|
||||
flags |= (arg & (1 << i))
|
||||
return val, flags, consumed
|
||||
|
||||
def decode_tbs(byts):
|
||||
|
||||
def decode_tbs(byts, flag_size=4):
|
||||
'''
|
||||
Trailing byte sequences for indexing consists of series of fvwi numbers.
|
||||
This function reads the fvwi number and its associated flags. It them uses
|
||||
@ -226,10 +236,10 @@ def decode_tbs(byts):
|
||||
data and the number of bytes consumed.
|
||||
'''
|
||||
byts = bytes(byts)
|
||||
val, flags, consumed = decode_fvwi(byts)
|
||||
val, flags, consumed = decode_fvwi(byts, flag_size=flag_size)
|
||||
extra = {}
|
||||
byts = byts[consumed:]
|
||||
if flags & 0b1000:
|
||||
if flags & 0b1000 and flag_size > 3:
|
||||
extra[0b1000] = True
|
||||
if flags & 0b0010:
|
||||
x, consumed2 = decint(byts)
|
||||
@ -247,7 +257,7 @@ def decode_tbs(byts):
|
||||
consumed += consumed2
|
||||
return val, extra, consumed
|
||||
|
||||
def encode_tbs(val, extra):
|
||||
def encode_tbs(val, extra, flag_size=4):
|
||||
'''
|
||||
Encode the number val and the extra data in the extra dict as an fvwi. See
|
||||
decode_tbs above.
|
||||
@ -255,7 +265,7 @@ def encode_tbs(val, extra):
|
||||
flags = 0
|
||||
for flag in extra:
|
||||
flags |= flag
|
||||
ans = encode_fvwi(val, flags)
|
||||
ans = encode_fvwi(val, flags, flag_size=flag_size)
|
||||
|
||||
if 0b0010 in extra:
|
||||
ans += encint(extra[0b0010])
|
||||
|
@ -28,13 +28,12 @@ class CNCX(object): # {{{
|
||||
|
||||
MAX_STRING_LENGTH = 500
|
||||
|
||||
def __init__(self, toc, opts):
|
||||
def __init__(self, toc, is_periodical):
|
||||
self.strings = OrderedDict()
|
||||
|
||||
for item in toc:
|
||||
if item is self.toc: continue
|
||||
for item in toc.iterdescendants():
|
||||
self.strings[item.title] = 0
|
||||
if opts.mobi_periodical:
|
||||
if is_periodical:
|
||||
self.strings[item.klass] = 0
|
||||
|
||||
self.records = []
|
||||
@ -91,6 +90,17 @@ class IndexEntry(object): # {{{
|
||||
self.first_child_index = None
|
||||
self.last_child_index = None
|
||||
|
||||
def __repr__(self):
|
||||
return ('IndexEntry(offset=%r, depth=%r, length=%r, index=%r,'
|
||||
' parent_index=%r)')%(self.offset, self.depth, self.length,
|
||||
self.index, self.parent_index)
|
||||
|
||||
@dynamic_property
|
||||
def size(self):
|
||||
def fget(self): return self.length
|
||||
def fset(self, val): self.length = val
|
||||
return property(fget=fget, fset=fset, doc='Alias for length')
|
||||
|
||||
@classmethod
|
||||
def tagx_block(cls, for_periodical=True):
|
||||
buf = bytearray()
|
||||
@ -137,7 +147,7 @@ class IndexEntry(object): # {{{
|
||||
def entry_type(self):
|
||||
ans = 0
|
||||
for tag in self.tag_nums:
|
||||
ans |= (1 << self.BITMASKS[tag]) # 1 << x == 2**x
|
||||
ans |= (1 << self.BITMASKS.index(tag)) # 1 << x == 2**x
|
||||
return ans
|
||||
|
||||
@property
|
||||
@ -152,7 +162,7 @@ class IndexEntry(object): # {{{
|
||||
val = getattr(self, attr)
|
||||
buf.write(encint(val))
|
||||
|
||||
ans = buf.get_value()
|
||||
ans = buf.getvalue()
|
||||
return ans
|
||||
|
||||
# }}}
|
||||
@ -175,13 +185,16 @@ class TBS(object): # {{{
|
||||
# The starting bytes.
|
||||
# The value is zero which I think indicates the periodical
|
||||
# index entry. The values for the various flags seem to be
|
||||
# unused. If the 0b0100 is present, it means that the record
|
||||
# unused. If the 0b100 is present, it means that the record
|
||||
# deals with section 1 (or is the final record with section
|
||||
# transitions).
|
||||
self.type_010 = encode_tbs(0, {0b0010: 0})
|
||||
self.type_011 = encode_tbs(0, {0b0010: 0, 0b0001: 0})
|
||||
self.type_110 = encode_tbs(0, {0b0100: 2, 0b0010: 0})
|
||||
self.type_111 = encode_tbs(0, {0b0100: 2, 0b0010: 0, 0b0001: 0})
|
||||
self.type_010 = encode_tbs(0, {0b010: 0}, flag_size=3)
|
||||
self.type_011 = encode_tbs(0, {0b010: 0, 0b001: 0},
|
||||
flag_size=3)
|
||||
self.type_110 = encode_tbs(0, {0b100: 2, 0b010: 0},
|
||||
flag_size=3)
|
||||
self.type_111 = encode_tbs(0, {0b100: 2, 0b010: 0, 0b001:
|
||||
0}, flag_size=3)
|
||||
|
||||
depth_map = defaultdict(list)
|
||||
for x in ('starts', 'ends', 'completes'):
|
||||
@ -221,12 +234,18 @@ class TBS(object): # {{{
|
||||
self.type_010)
|
||||
elif not depth_map[1]:
|
||||
# has only article nodes, i.e. spanned by a section
|
||||
parent_section_index = self.depth_map[2][0].parent_index
|
||||
parent_section_index = depth_map[2][0].parent_index
|
||||
typ = (self.type_111 if parent_section_index == 1 else
|
||||
self.type_010)
|
||||
else:
|
||||
# has section transitions
|
||||
parent_section_index = self.depth_map[2][0].parent_index
|
||||
if depth_map[2]:
|
||||
parent_section_index = depth_map[2][0].parent_index
|
||||
typ = self.type_011
|
||||
else:
|
||||
parent_section_index = depth_map[1][0].index
|
||||
typ = (self.type_110 if parent_section_index == 1 else
|
||||
self.type_011)
|
||||
|
||||
buf.write(typ)
|
||||
|
||||
@ -243,9 +262,10 @@ class TBS(object): # {{{
|
||||
|
||||
if spanner is None:
|
||||
articles = depth_map[2]
|
||||
sections = [self.section_map[a.parent_index] for a in articles]
|
||||
sections.sort(key=lambda x:x.offset)
|
||||
section_map = {s:[a for a in articles is a.parent_index ==
|
||||
sections = set([self.section_map[a.parent_index] for a in
|
||||
articles])
|
||||
sections = sorted(sections, key=lambda x:x.offset)
|
||||
section_map = {s:[a for a in articles if a.parent_index ==
|
||||
s.index] for s in sections}
|
||||
for i, section in enumerate(sections):
|
||||
# All the articles in this record that belong to section
|
||||
@ -257,7 +277,7 @@ class TBS(object): # {{{
|
||||
try:
|
||||
next_sec = sections[i+1]
|
||||
except:
|
||||
next_sec == None
|
||||
next_sec = None
|
||||
|
||||
extra = {}
|
||||
if num > 1:
|
||||
@ -299,14 +319,14 @@ class Indexer(object): # {{{
|
||||
self.log('Generating MOBI index for a %s'%('periodical' if
|
||||
self.is_periodical else 'book'))
|
||||
self.is_flat_periodical = False
|
||||
if opts.mobi_periodical:
|
||||
if self.is_periodical:
|
||||
periodical_node = iter(oeb.toc).next()
|
||||
sections = tuple(periodical_node)
|
||||
self.is_flat_periodical = len(sections) == 1
|
||||
|
||||
self.records = []
|
||||
|
||||
self.cncx = CNCX(oeb.toc, opts)
|
||||
self.cncx = CNCX(oeb.toc, self.is_periodical)
|
||||
|
||||
if self.is_periodical:
|
||||
self.indices = self.create_periodical_index()
|
||||
@ -405,7 +425,7 @@ class Indexer(object): # {{{
|
||||
buf.write(pack(b'>I', 0)) # Filled in later
|
||||
|
||||
# Number of index records 24-28
|
||||
buf.write(pack('b>I', len(self.records)))
|
||||
buf.write(pack(b'>I', len(self.records)))
|
||||
|
||||
# Index Encoding 28-32
|
||||
buf.write(pack(b'>I', 65001)) # utf-8
|
||||
@ -457,7 +477,7 @@ class Indexer(object): # {{{
|
||||
idxt_offset = buf.tell()
|
||||
|
||||
buf.write(b'IDXT')
|
||||
buf.write(header_length + len(tagx_block))
|
||||
buf.write(pack(b'>H', header_length + len(tagx_block)))
|
||||
buf.write(b'\0')
|
||||
buf.seek(20)
|
||||
buf.write(pack(b'>I', idxt_offset))
|
||||
@ -567,7 +587,7 @@ class Indexer(object): # {{{
|
||||
for s, x in enumerate(normalized_sections):
|
||||
sec, normalized_articles = x
|
||||
try:
|
||||
sec.length = normalized_sections[s+1].offset - sec.offset
|
||||
sec.length = normalized_sections[s+1][0].offset - sec.offset
|
||||
except:
|
||||
sec.length = self.serializer.body_end_offset - sec.offset
|
||||
for i, art in enumerate(normalized_articles):
|
||||
@ -583,17 +603,18 @@ class Indexer(object): # {{{
|
||||
normalized_articles))
|
||||
normalized_sections[i] = (sec, normalized_articles)
|
||||
|
||||
normalized_sections = list(filter(lambda x: x[0].size > 0 and x[1],
|
||||
normalized_sections = list(filter(lambda x: x[0].length > 0 and x[1],
|
||||
normalized_sections))
|
||||
|
||||
# Set indices
|
||||
i = 0
|
||||
for sec, normalized_articles in normalized_sections:
|
||||
for sec, articles in normalized_sections:
|
||||
i += 1
|
||||
sec.index = i
|
||||
sec.parent_index = 0
|
||||
|
||||
for sec, normalized_articles in normalized_sections:
|
||||
for art in normalized_articles:
|
||||
for sec, articles in normalized_sections:
|
||||
for art in articles:
|
||||
i += 1
|
||||
art.index = i
|
||||
art.parent_index = sec.index
|
||||
@ -606,7 +627,7 @@ class Indexer(object): # {{{
|
||||
for s, x in enumerate(normalized_sections):
|
||||
sec, articles = x
|
||||
try:
|
||||
next_offset = normalized_sections[s+1].offset
|
||||
next_offset = normalized_sections[s+1][0].offset
|
||||
except:
|
||||
next_offset = self.serializer.body_end_offset
|
||||
sec.length = next_offset - sec.offset
|
||||
@ -622,7 +643,7 @@ class Indexer(object): # {{{
|
||||
for s, x in enumerate(normalized_sections):
|
||||
sec, articles = x
|
||||
try:
|
||||
next_sec = normalized_sections[s+1]
|
||||
next_sec = normalized_sections[s+1][0]
|
||||
except:
|
||||
if (sec.length == 0 or sec.next_offset !=
|
||||
self.serializer.body_end_offset):
|
||||
@ -659,6 +680,7 @@ class Indexer(object): # {{{
|
||||
self.tbs_map = {}
|
||||
found_node = False
|
||||
sections = [i for i in self.indices if i.depth == 1]
|
||||
deepest = max(i.depth for i in self.indices)
|
||||
for i in xrange(self.number_of_text_records):
|
||||
offset = i * RECORD_SIZE
|
||||
next_offset = offset + RECORD_SIZE
|
||||
@ -683,7 +705,7 @@ class Indexer(object): # {{{
|
||||
if index.next_offset <= next_offset:
|
||||
# Node ends in current record
|
||||
data['ends'].append(index)
|
||||
else:
|
||||
elif index.depth == deepest:
|
||||
data['spans'] = index
|
||||
if (data['ends'] or data['completes'] or data['starts'] or
|
||||
data['spans'] is not None):
|
||||
|
@ -55,6 +55,7 @@ class MobiWriter(object):
|
||||
self.last_text_record_idx = 1
|
||||
|
||||
def __call__(self, oeb, path_or_stream):
|
||||
self.log = oeb.log
|
||||
if hasattr(path_or_stream, 'write'):
|
||||
return self.dump_stream(oeb, path_or_stream)
|
||||
with open(path_or_stream, 'w+b') as stream:
|
||||
@ -90,6 +91,7 @@ class MobiWriter(object):
|
||||
self.primary_index_record_idx = None
|
||||
try:
|
||||
self.indexer = Indexer(self.serializer, self.last_text_record_idx,
|
||||
len(self.records[self.last_text_record_idx]),
|
||||
self.opts, self.oeb)
|
||||
except:
|
||||
self.log.exception('Failed to generate MOBI index:')
|
||||
|
Loading…
x
Reference in New Issue
Block a user