mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-10-31 02:27:01 -04:00 
			
		
		
		
	...
This commit is contained in:
		
							parent
							
								
									919011f8e5
								
							
						
					
					
						commit
						e61b86cd24
					
				| @ -957,15 +957,17 @@ class TBSIndexing(object): # {{{ | |||||||
|             return str({bin4(k):v for k, v in extra.iteritems()}) |             return str({bin4(k):v for k, v in extra.iteritems()}) | ||||||
| 
 | 
 | ||||||
|         tbs_type = 0 |         tbs_type = 0 | ||||||
|  |         is_periodical = self.doc_type in (257, 258, 259) | ||||||
|         if len(byts): |         if len(byts): | ||||||
|             outermost_index, extra, consumed = decode_tbs(byts) |             outermost_index, extra, consumed = decode_tbs(byts, flag_size=4 if | ||||||
|  |                     is_periodical else 3) | ||||||
|             byts = byts[consumed:] |             byts = byts[consumed:] | ||||||
|             for k in extra: |             for k in extra: | ||||||
|                 tbs_type |= k |                 tbs_type |= k | ||||||
|             ans.append('\nTBS: %d (%s)'%(tbs_type, bin4(tbs_type))) |             ans.append('\nTBS: %d (%s)'%(tbs_type, bin4(tbs_type))) | ||||||
|             ans.append('Outermost index: %d'%outermost_index) |             ans.append('Outermost index: %d'%outermost_index) | ||||||
|             ans.append('Unknown extra start bytes: %s'%repr_extra(extra)) |             ans.append('Unknown extra start bytes: %s'%repr_extra(extra)) | ||||||
|             if self.doc_type in (257, 259): # Hierarchical periodical |             if is_periodical: # Hierarchical periodical | ||||||
|                 byts, a = self.interpret_periodical(tbs_type, byts, |                 byts, a = self.interpret_periodical(tbs_type, byts, | ||||||
|                         dat['geom'][0]) |                         dat['geom'][0]) | ||||||
|                 ans += a |                 ans += a | ||||||
|  | |||||||
| @ -66,11 +66,14 @@ def encint(value, forward=True): | |||||||
|     If forward is True the bytes returned are suitable for prepending to the |     If forward is True the bytes returned are suitable for prepending to the | ||||||
|     output buffer, otherwise they must be append to the output buffer. |     output buffer, otherwise they must be append to the output buffer. | ||||||
|     ''' |     ''' | ||||||
|  |     if value < 0: | ||||||
|  |         raise ValueError('Cannot encode negative numbers as vwi') | ||||||
|     # Encode vwi |     # Encode vwi | ||||||
|     byts = bytearray() |     byts = bytearray() | ||||||
|     while True: |     while True: | ||||||
|         b = value & 0b01111111 |         b = value & 0b01111111 | ||||||
|         value >>= 7 # shift value to the right by 7 bits |         value >>= 7 # shift value to the right by 7 bits | ||||||
|  | 
 | ||||||
|         byts.append(b) |         byts.append(b) | ||||||
|         if value == 0: |         if value == 0: | ||||||
|             break |             break | ||||||
| @ -198,24 +201,31 @@ def encode_trailing_data(raw): | |||||||
|         lsize += 1 |         lsize += 1 | ||||||
|     return raw + encoded |     return raw + encoded | ||||||
| 
 | 
 | ||||||
| def encode_fvwi(val, flags): | def encode_fvwi(val, flags, flag_size=4): | ||||||
|     ''' |     ''' | ||||||
|     Encode the value val and the 4 bit flags flags as a fvwi. This encoding is |     Encode the value val and the flag_size bits from flags as a fvwi. This encoding is | ||||||
|     used in the trailing byte sequences for indexing. Returns encoded |     used in the trailing byte sequences for indexing. Returns encoded | ||||||
|     bytestring. |     bytestring. | ||||||
|     ''' |     ''' | ||||||
|     ans = (val << 4) | (flags & 0b1111) |     ans = val << flag_size | ||||||
|  |     for i in xrange(flag_size): | ||||||
|  |         ans |= (flags & (1 << i)) | ||||||
|     return encint(ans) |     return encint(ans) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def decode_fvwi(byts): | def decode_fvwi(byts, flag_size=4): | ||||||
|     ''' |     ''' | ||||||
|     Decode encoded fvwi. Returns number, flags, consumed |     Decode encoded fvwi. Returns number, flags, consumed | ||||||
|     ''' |     ''' | ||||||
|     arg, consumed = decint(bytes(byts)) |     arg, consumed = decint(bytes(byts)) | ||||||
|     return (arg >> 4), (arg & 0b1111), consumed |     val = arg >> flag_size | ||||||
|  |     flags = 0 | ||||||
|  |     for i in xrange(flag_size): | ||||||
|  |         flags |= (arg & (1 << i)) | ||||||
|  |     return val, flags, consumed | ||||||
| 
 | 
 | ||||||
| def decode_tbs(byts): | 
 | ||||||
|  | def decode_tbs(byts, flag_size=4): | ||||||
|     ''' |     ''' | ||||||
|     Trailing byte sequences for indexing consists of series of fvwi numbers. |     Trailing byte sequences for indexing consists of series of fvwi numbers. | ||||||
|     This function reads the fvwi number and its associated flags. It them uses |     This function reads the fvwi number and its associated flags. It them uses | ||||||
| @ -226,10 +236,10 @@ def decode_tbs(byts): | |||||||
|     data and the number of bytes consumed. |     data and the number of bytes consumed. | ||||||
|     ''' |     ''' | ||||||
|     byts = bytes(byts) |     byts = bytes(byts) | ||||||
|     val, flags, consumed = decode_fvwi(byts) |     val, flags, consumed = decode_fvwi(byts, flag_size=flag_size) | ||||||
|     extra = {} |     extra = {} | ||||||
|     byts = byts[consumed:] |     byts = byts[consumed:] | ||||||
|     if flags & 0b1000: |     if flags & 0b1000 and flag_size > 3: | ||||||
|         extra[0b1000] = True |         extra[0b1000] = True | ||||||
|     if flags & 0b0010: |     if flags & 0b0010: | ||||||
|         x, consumed2 = decint(byts) |         x, consumed2 = decint(byts) | ||||||
| @ -247,7 +257,7 @@ def decode_tbs(byts): | |||||||
|         consumed += consumed2 |         consumed += consumed2 | ||||||
|     return val, extra, consumed |     return val, extra, consumed | ||||||
| 
 | 
 | ||||||
| def encode_tbs(val, extra): | def encode_tbs(val, extra, flag_size=4): | ||||||
|     ''' |     ''' | ||||||
|     Encode the number val and the extra data in the extra dict as an fvwi. See |     Encode the number val and the extra data in the extra dict as an fvwi. See | ||||||
|     decode_tbs above. |     decode_tbs above. | ||||||
| @ -255,7 +265,7 @@ def encode_tbs(val, extra): | |||||||
|     flags = 0 |     flags = 0 | ||||||
|     for flag in extra: |     for flag in extra: | ||||||
|         flags |= flag |         flags |= flag | ||||||
|     ans = encode_fvwi(val, flags) |     ans = encode_fvwi(val, flags, flag_size=flag_size) | ||||||
| 
 | 
 | ||||||
|     if 0b0010 in extra: |     if 0b0010 in extra: | ||||||
|         ans += encint(extra[0b0010]) |         ans += encint(extra[0b0010]) | ||||||
|  | |||||||
| @ -28,13 +28,12 @@ class CNCX(object): # {{{ | |||||||
| 
 | 
 | ||||||
|     MAX_STRING_LENGTH = 500 |     MAX_STRING_LENGTH = 500 | ||||||
| 
 | 
 | ||||||
|     def __init__(self, toc, opts): |     def __init__(self, toc, is_periodical): | ||||||
|         self.strings = OrderedDict() |         self.strings = OrderedDict() | ||||||
| 
 | 
 | ||||||
|         for item in toc: |         for item in toc.iterdescendants(): | ||||||
|             if item is self.toc: continue |  | ||||||
|             self.strings[item.title] = 0 |             self.strings[item.title] = 0 | ||||||
|             if opts.mobi_periodical: |             if is_periodical: | ||||||
|                 self.strings[item.klass] = 0 |                 self.strings[item.klass] = 0 | ||||||
| 
 | 
 | ||||||
|         self.records = [] |         self.records = [] | ||||||
| @ -91,6 +90,17 @@ class IndexEntry(object): # {{{ | |||||||
|         self.first_child_index = None |         self.first_child_index = None | ||||||
|         self.last_child_index = None |         self.last_child_index = None | ||||||
| 
 | 
 | ||||||
|  |     def __repr__(self): | ||||||
|  |         return ('IndexEntry(offset=%r, depth=%r, length=%r, index=%r,' | ||||||
|  |                 ' parent_index=%r)')%(self.offset, self.depth, self.length, | ||||||
|  |                         self.index, self.parent_index) | ||||||
|  | 
 | ||||||
|  |     @dynamic_property | ||||||
|  |     def size(self): | ||||||
|  |         def fget(self): return self.length | ||||||
|  |         def fset(self, val): self.length = val | ||||||
|  |         return property(fget=fget, fset=fset, doc='Alias for length') | ||||||
|  | 
 | ||||||
|     @classmethod |     @classmethod | ||||||
|     def tagx_block(cls, for_periodical=True): |     def tagx_block(cls, for_periodical=True): | ||||||
|         buf = bytearray() |         buf = bytearray() | ||||||
| @ -137,7 +147,7 @@ class IndexEntry(object): # {{{ | |||||||
|     def entry_type(self): |     def entry_type(self): | ||||||
|         ans = 0 |         ans = 0 | ||||||
|         for tag in self.tag_nums: |         for tag in self.tag_nums: | ||||||
|             ans |= (1 << self.BITMASKS[tag]) # 1 << x == 2**x |             ans |= (1 << self.BITMASKS.index(tag)) # 1 << x == 2**x | ||||||
|         return ans |         return ans | ||||||
| 
 | 
 | ||||||
|     @property |     @property | ||||||
| @ -152,7 +162,7 @@ class IndexEntry(object): # {{{ | |||||||
|             val = getattr(self, attr) |             val = getattr(self, attr) | ||||||
|             buf.write(encint(val)) |             buf.write(encint(val)) | ||||||
| 
 | 
 | ||||||
|         ans = buf.get_value() |         ans = buf.getvalue() | ||||||
|         return ans |         return ans | ||||||
| 
 | 
 | ||||||
| # }}} | # }}} | ||||||
| @ -175,13 +185,16 @@ class TBS(object): # {{{ | |||||||
|                 # The starting bytes. |                 # The starting bytes. | ||||||
|                 # The value is zero which I think indicates the periodical |                 # The value is zero which I think indicates the periodical | ||||||
|                 # index entry. The values for the various flags seem to be |                 # index entry. The values for the various flags seem to be | ||||||
|                 # unused. If the 0b0100 is present, it means that the record |                 # unused. If the 0b100 is present, it means that the record | ||||||
|                 # deals with section 1 (or is the final record with section |                 # deals with section 1 (or is the final record with section | ||||||
|                 # transitions). |                 # transitions). | ||||||
|                 self.type_010 = encode_tbs(0, {0b0010: 0}) |                 self.type_010 = encode_tbs(0, {0b010: 0}, flag_size=3) | ||||||
|                 self.type_011 = encode_tbs(0, {0b0010: 0, 0b0001: 0}) |                 self.type_011 = encode_tbs(0, {0b010: 0, 0b001: 0}, | ||||||
|                 self.type_110 = encode_tbs(0, {0b0100: 2, 0b0010: 0}) |                         flag_size=3) | ||||||
|                 self.type_111 = encode_tbs(0, {0b0100: 2, 0b0010: 0, 0b0001: 0}) |                 self.type_110 = encode_tbs(0, {0b100: 2, 0b010: 0}, | ||||||
|  |                         flag_size=3) | ||||||
|  |                 self.type_111 = encode_tbs(0, {0b100: 2, 0b010: 0, 0b001: | ||||||
|  |                     0}, flag_size=3) | ||||||
| 
 | 
 | ||||||
|                 depth_map = defaultdict(list) |                 depth_map = defaultdict(list) | ||||||
|                 for x in ('starts', 'ends', 'completes'): |                 for x in ('starts', 'ends', 'completes'): | ||||||
| @ -221,12 +234,18 @@ class TBS(object): # {{{ | |||||||
|                         self.type_010) |                         self.type_010) | ||||||
|             elif not depth_map[1]: |             elif not depth_map[1]: | ||||||
|                 # has only article nodes, i.e. spanned by a section |                 # has only article nodes, i.e. spanned by a section | ||||||
|                 parent_section_index = self.depth_map[2][0].parent_index |                 parent_section_index = depth_map[2][0].parent_index | ||||||
|                 typ = (self.type_111 if parent_section_index == 1 else |                 typ = (self.type_111 if parent_section_index == 1 else | ||||||
|                         self.type_010) |                         self.type_010) | ||||||
|             else: |             else: | ||||||
|                 # has section transitions |                 # has section transitions | ||||||
|                 parent_section_index = self.depth_map[2][0].parent_index |                 if depth_map[2]: | ||||||
|  |                     parent_section_index = depth_map[2][0].parent_index | ||||||
|  |                     typ = self.type_011 | ||||||
|  |                 else: | ||||||
|  |                     parent_section_index = depth_map[1][0].index | ||||||
|  |                     typ = (self.type_110 if parent_section_index == 1 else | ||||||
|  |                             self.type_011) | ||||||
| 
 | 
 | ||||||
|         buf.write(typ) |         buf.write(typ) | ||||||
| 
 | 
 | ||||||
| @ -243,9 +262,10 @@ class TBS(object): # {{{ | |||||||
| 
 | 
 | ||||||
|         if spanner is None: |         if spanner is None: | ||||||
|             articles = depth_map[2] |             articles = depth_map[2] | ||||||
|             sections = [self.section_map[a.parent_index] for a in articles] |             sections = set([self.section_map[a.parent_index] for a in | ||||||
|             sections.sort(key=lambda x:x.offset) |                 articles]) | ||||||
|             section_map = {s:[a for a in articles is a.parent_index == |             sections = sorted(sections, key=lambda x:x.offset) | ||||||
|  |             section_map = {s:[a for a in articles if a.parent_index == | ||||||
|                 s.index] for s in sections} |                 s.index] for s in sections} | ||||||
|             for i, section in enumerate(sections): |             for i, section in enumerate(sections): | ||||||
|                 # All the articles in this record that belong to section |                 # All the articles in this record that belong to section | ||||||
| @ -257,7 +277,7 @@ class TBS(object): # {{{ | |||||||
|                 try: |                 try: | ||||||
|                     next_sec = sections[i+1] |                     next_sec = sections[i+1] | ||||||
|                 except: |                 except: | ||||||
|                     next_sec == None |                     next_sec = None | ||||||
| 
 | 
 | ||||||
|                 extra = {} |                 extra = {} | ||||||
|                 if num > 1: |                 if num > 1: | ||||||
| @ -299,14 +319,14 @@ class Indexer(object): # {{{ | |||||||
|         self.log('Generating MOBI index for a %s'%('periodical' if |         self.log('Generating MOBI index for a %s'%('periodical' if | ||||||
|             self.is_periodical else 'book')) |             self.is_periodical else 'book')) | ||||||
|         self.is_flat_periodical = False |         self.is_flat_periodical = False | ||||||
|         if opts.mobi_periodical: |         if self.is_periodical: | ||||||
|             periodical_node = iter(oeb.toc).next() |             periodical_node = iter(oeb.toc).next() | ||||||
|             sections = tuple(periodical_node) |             sections = tuple(periodical_node) | ||||||
|             self.is_flat_periodical = len(sections) == 1 |             self.is_flat_periodical = len(sections) == 1 | ||||||
| 
 | 
 | ||||||
|         self.records = [] |         self.records = [] | ||||||
| 
 | 
 | ||||||
|         self.cncx = CNCX(oeb.toc, opts) |         self.cncx = CNCX(oeb.toc, self.is_periodical) | ||||||
| 
 | 
 | ||||||
|         if self.is_periodical: |         if self.is_periodical: | ||||||
|             self.indices = self.create_periodical_index() |             self.indices = self.create_periodical_index() | ||||||
| @ -405,7 +425,7 @@ class Indexer(object): # {{{ | |||||||
|         buf.write(pack(b'>I', 0)) # Filled in later |         buf.write(pack(b'>I', 0)) # Filled in later | ||||||
| 
 | 
 | ||||||
|         # Number of index records 24-28 |         # Number of index records 24-28 | ||||||
|         buf.write(pack('b>I', len(self.records))) |         buf.write(pack(b'>I', len(self.records))) | ||||||
| 
 | 
 | ||||||
|         # Index Encoding 28-32 |         # Index Encoding 28-32 | ||||||
|         buf.write(pack(b'>I', 65001)) # utf-8 |         buf.write(pack(b'>I', 65001)) # utf-8 | ||||||
| @ -457,7 +477,7 @@ class Indexer(object): # {{{ | |||||||
|         idxt_offset = buf.tell() |         idxt_offset = buf.tell() | ||||||
| 
 | 
 | ||||||
|         buf.write(b'IDXT') |         buf.write(b'IDXT') | ||||||
|         buf.write(header_length + len(tagx_block)) |         buf.write(pack(b'>H', header_length + len(tagx_block))) | ||||||
|         buf.write(b'\0') |         buf.write(b'\0') | ||||||
|         buf.seek(20) |         buf.seek(20) | ||||||
|         buf.write(pack(b'>I', idxt_offset)) |         buf.write(pack(b'>I', idxt_offset)) | ||||||
| @ -567,7 +587,7 @@ class Indexer(object): # {{{ | |||||||
|         for s, x in enumerate(normalized_sections): |         for s, x in enumerate(normalized_sections): | ||||||
|             sec, normalized_articles = x |             sec, normalized_articles = x | ||||||
|             try: |             try: | ||||||
|                 sec.length = normalized_sections[s+1].offset - sec.offset |                 sec.length = normalized_sections[s+1][0].offset - sec.offset | ||||||
|             except: |             except: | ||||||
|                 sec.length = self.serializer.body_end_offset - sec.offset |                 sec.length = self.serializer.body_end_offset - sec.offset | ||||||
|             for i, art in enumerate(normalized_articles): |             for i, art in enumerate(normalized_articles): | ||||||
| @ -583,17 +603,18 @@ class Indexer(object): # {{{ | |||||||
|                 normalized_articles)) |                 normalized_articles)) | ||||||
|             normalized_sections[i] = (sec, normalized_articles) |             normalized_sections[i] = (sec, normalized_articles) | ||||||
| 
 | 
 | ||||||
|         normalized_sections = list(filter(lambda x: x[0].size > 0 and x[1], |         normalized_sections = list(filter(lambda x: x[0].length > 0 and x[1], | ||||||
|             normalized_sections)) |             normalized_sections)) | ||||||
| 
 | 
 | ||||||
|         # Set indices |         # Set indices | ||||||
|         i = 0 |         i = 0 | ||||||
|         for sec, normalized_articles in normalized_sections: |         for sec, articles in normalized_sections: | ||||||
|             i += 1 |             i += 1 | ||||||
|             sec.index = i |             sec.index = i | ||||||
|  |             sec.parent_index = 0 | ||||||
| 
 | 
 | ||||||
|         for sec, normalized_articles in normalized_sections: |         for sec, articles in normalized_sections: | ||||||
|             for art in normalized_articles: |             for art in articles: | ||||||
|                 i += 1 |                 i += 1 | ||||||
|                 art.index = i |                 art.index = i | ||||||
|                 art.parent_index = sec.index |                 art.parent_index = sec.index | ||||||
| @ -606,7 +627,7 @@ class Indexer(object): # {{{ | |||||||
|         for s, x in enumerate(normalized_sections): |         for s, x in enumerate(normalized_sections): | ||||||
|             sec, articles = x |             sec, articles = x | ||||||
|             try: |             try: | ||||||
|                 next_offset = normalized_sections[s+1].offset |                 next_offset = normalized_sections[s+1][0].offset | ||||||
|             except: |             except: | ||||||
|                 next_offset = self.serializer.body_end_offset |                 next_offset = self.serializer.body_end_offset | ||||||
|             sec.length = next_offset - sec.offset |             sec.length = next_offset - sec.offset | ||||||
| @ -622,7 +643,7 @@ class Indexer(object): # {{{ | |||||||
|         for s, x in enumerate(normalized_sections): |         for s, x in enumerate(normalized_sections): | ||||||
|             sec, articles = x |             sec, articles = x | ||||||
|             try: |             try: | ||||||
|                 next_sec = normalized_sections[s+1] |                 next_sec = normalized_sections[s+1][0] | ||||||
|             except: |             except: | ||||||
|                 if (sec.length == 0 or sec.next_offset != |                 if (sec.length == 0 or sec.next_offset != | ||||||
|                         self.serializer.body_end_offset): |                         self.serializer.body_end_offset): | ||||||
| @ -659,6 +680,7 @@ class Indexer(object): # {{{ | |||||||
|         self.tbs_map = {} |         self.tbs_map = {} | ||||||
|         found_node = False |         found_node = False | ||||||
|         sections = [i for i in self.indices if i.depth == 1] |         sections = [i for i in self.indices if i.depth == 1] | ||||||
|  |         deepest = max(i.depth for i in self.indices) | ||||||
|         for i in xrange(self.number_of_text_records): |         for i in xrange(self.number_of_text_records): | ||||||
|             offset = i * RECORD_SIZE |             offset = i * RECORD_SIZE | ||||||
|             next_offset = offset + RECORD_SIZE |             next_offset = offset + RECORD_SIZE | ||||||
| @ -683,7 +705,7 @@ class Indexer(object): # {{{ | |||||||
|                     if index.next_offset <= next_offset: |                     if index.next_offset <= next_offset: | ||||||
|                         # Node ends in current record |                         # Node ends in current record | ||||||
|                         data['ends'].append(index) |                         data['ends'].append(index) | ||||||
|                     else: |                     elif index.depth == deepest: | ||||||
|                         data['spans'] = index |                         data['spans'] = index | ||||||
|             if (data['ends'] or data['completes'] or data['starts'] or |             if (data['ends'] or data['completes'] or data['starts'] or | ||||||
|                     data['spans'] is not None): |                     data['spans'] is not None): | ||||||
|  | |||||||
| @ -55,6 +55,7 @@ class MobiWriter(object): | |||||||
|         self.last_text_record_idx = 1 |         self.last_text_record_idx = 1 | ||||||
| 
 | 
 | ||||||
|     def __call__(self, oeb, path_or_stream): |     def __call__(self, oeb, path_or_stream): | ||||||
|  |         self.log = oeb.log | ||||||
|         if hasattr(path_or_stream, 'write'): |         if hasattr(path_or_stream, 'write'): | ||||||
|             return self.dump_stream(oeb, path_or_stream) |             return self.dump_stream(oeb, path_or_stream) | ||||||
|         with open(path_or_stream, 'w+b') as stream: |         with open(path_or_stream, 'w+b') as stream: | ||||||
| @ -90,6 +91,7 @@ class MobiWriter(object): | |||||||
|         self.primary_index_record_idx = None |         self.primary_index_record_idx = None | ||||||
|         try: |         try: | ||||||
|             self.indexer = Indexer(self.serializer, self.last_text_record_idx, |             self.indexer = Indexer(self.serializer, self.last_text_record_idx, | ||||||
|  |                     len(self.records[self.last_text_record_idx]), | ||||||
|                     self.opts, self.oeb) |                     self.opts, self.oeb) | ||||||
|         except: |         except: | ||||||
|             self.log.exception('Failed to generate MOBI index:') |             self.log.exception('Failed to generate MOBI index:') | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user