mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 10:44:09 -04:00
MOBI Output: Handle large descriptions when generating indexed files
This commit is contained in:
commit
52f49dcccf
@ -105,13 +105,11 @@ def decint(value, direction):
|
|||||||
bytes[-1] |= 0x80
|
bytes[-1] |= 0x80
|
||||||
return ''.join(chr(b) for b in reversed(bytes))
|
return ''.join(chr(b) for b in reversed(bytes))
|
||||||
|
|
||||||
|
|
||||||
def align_block(raw, multiple=4, pad='\0'):
|
def align_block(raw, multiple=4, pad='\0'):
|
||||||
extra = len(raw) % multiple
|
extra = len(raw) % multiple
|
||||||
if extra == 0: return raw
|
if extra == 0: return raw
|
||||||
return raw + pad*(multiple - extra)
|
return raw + pad*(multiple - extra)
|
||||||
|
|
||||||
|
|
||||||
def rescale_image(data, maxsizeb, dimen=None):
|
def rescale_image(data, maxsizeb, dimen=None):
|
||||||
image = Image.open(StringIO(data))
|
image = Image.open(StringIO(data))
|
||||||
format = image.format
|
format = image.format
|
||||||
@ -155,7 +153,6 @@ def rescale_image(data, maxsizeb, dimen=None):
|
|||||||
# Well, we tried?
|
# Well, we tried?
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
class Serializer(object):
|
class Serializer(object):
|
||||||
NSRMAP = {'': None, XML_NS: 'xml', XHTML_NS: '', MBP_NS: 'mbp'}
|
NSRMAP = {'': None, XML_NS: 'xml', XHTML_NS: '', MBP_NS: 'mbp'}
|
||||||
|
|
||||||
@ -329,8 +326,6 @@ class Serializer(object):
|
|||||||
buffer.seek(hoff)
|
buffer.seek(hoff)
|
||||||
buffer.write('%010d' % ioff)
|
buffer.write('%010d' % ioff)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class MobiWriter(object):
|
class MobiWriter(object):
|
||||||
COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
|
COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
|
||||||
|
|
||||||
@ -545,7 +540,6 @@ class MobiWriter(object):
|
|||||||
if self.opts.verbose > 3 : self._oeb.logger.info(" node %03d: %-15.15s... spans HTML records %03d - %03d \t offset: 0x%06X length: 0x%06X" % \
|
if self.opts.verbose > 3 : self._oeb.logger.info(" node %03d: %-15.15s... spans HTML records %03d - %03d \t offset: 0x%06X length: 0x%06X" % \
|
||||||
(myIndex, child.title if child.title.strip() > "" else "(missing)", myStartingRecord, myStartingRecord, offset, length) )
|
(myIndex, child.title if child.title.strip() > "" else "(missing)", myStartingRecord, myStartingRecord, offset, length) )
|
||||||
|
|
||||||
#last_name = "%04X" % myIndex
|
|
||||||
myIndex += 1
|
myIndex += 1
|
||||||
|
|
||||||
# Successfully parsed the entries
|
# Successfully parsed the entries
|
||||||
@ -717,13 +711,11 @@ class MobiWriter(object):
|
|||||||
if self.opts.verbose > 3 : self._oeb.logger.info(" node: %03d %-10.10s %-15.15s... spans HTML records %03d-%03d \t offset: 0x%06X length: 0x%06X" % \
|
if self.opts.verbose > 3 : self._oeb.logger.info(" node: %03d %-10.10s %-15.15s... spans HTML records %03d-%03d \t offset: 0x%06X length: 0x%06X" % \
|
||||||
(myIndex, self._ctoc_map[i]['klass'], child.title if child.title.strip() > "" else "(missing)", thisRecord, thisRecord, offset, length) )
|
(myIndex, self._ctoc_map[i]['klass'], child.title if child.title.strip() > "" else "(missing)", thisRecord, thisRecord, offset, length) )
|
||||||
|
|
||||||
#last_name = "%04X" % myIndex
|
|
||||||
myIndex += 1
|
myIndex += 1
|
||||||
|
|
||||||
# Successfully parsed the entries
|
# Successfully parsed the entries
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def _generate_tbs_book(self, nrecords, lastrecord):
|
def _generate_tbs_book(self, nrecords, lastrecord):
|
||||||
if self.opts.verbose > 3 :self._oeb.logger.info("Assembling TBS for Book: HTML record %03d of %03d" % \
|
if self.opts.verbose > 3 :self._oeb.logger.info("Assembling TBS for Book: HTML record %03d of %03d" % \
|
||||||
(nrecords, lastrecord) )
|
(nrecords, lastrecord) )
|
||||||
@ -787,7 +779,6 @@ class MobiWriter(object):
|
|||||||
|
|
||||||
self._tbSequence = tbSequence
|
self._tbSequence = tbSequence
|
||||||
|
|
||||||
|
|
||||||
def _generate_tbs_flat_periodical(self, nrecords, lastrecord):
|
def _generate_tbs_flat_periodical(self, nrecords, lastrecord):
|
||||||
# Flat periodicals <0x102> have a single section for all articles
|
# Flat periodicals <0x102> have a single section for all articles
|
||||||
# Structured periodicals <0x101 | 0x103> have one or more sections with articles
|
# Structured periodicals <0x101 | 0x103> have one or more sections with articles
|
||||||
@ -928,7 +919,6 @@ class MobiWriter(object):
|
|||||||
tbSequence += decint(arg3, DECINT_FORWARD) # arg3
|
tbSequence += decint(arg3, DECINT_FORWARD) # arg3
|
||||||
|
|
||||||
# Structured periodicals don't count periodical, section in nodeCount
|
# Structured periodicals don't count periodical, section in nodeCount
|
||||||
#tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount - 2) # nodeCount
|
|
||||||
tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) # nodeCount
|
tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) # nodeCount
|
||||||
tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len
|
tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len
|
||||||
else :
|
else :
|
||||||
@ -1222,7 +1212,6 @@ class MobiWriter(object):
|
|||||||
self._oeb.logger.info("%s" % " TOC structure conforms" if toc_conforms else " TOC structure non-conforming")
|
self._oeb.logger.info("%s" % " TOC structure conforms" if toc_conforms else " TOC structure non-conforming")
|
||||||
return toc_conforms
|
return toc_conforms
|
||||||
|
|
||||||
|
|
||||||
def _generate_text(self):
|
def _generate_text(self):
|
||||||
self._oeb.logger.info('Serializing markup content...')
|
self._oeb.logger.info('Serializing markup content...')
|
||||||
serializer = Serializer(self._oeb, self._images,
|
serializer = Serializer(self._oeb, self._images,
|
||||||
@ -1327,7 +1316,6 @@ class MobiWriter(object):
|
|||||||
nrecords += 1
|
nrecords += 1
|
||||||
self._text_nrecords = nrecords
|
self._text_nrecords = nrecords
|
||||||
|
|
||||||
|
|
||||||
def _generate_images(self):
|
def _generate_images(self):
|
||||||
self._oeb.logger.info('Serializing images...')
|
self._oeb.logger.info('Serializing images...')
|
||||||
images = [(index, href) for href, index in self._images.items()]
|
images = [(index, href) for href, index in self._images.items()]
|
||||||
@ -1717,8 +1705,8 @@ class MobiWriter(object):
|
|||||||
# 0x30 - 0x33 : Number of LIGT entries
|
# 0x30 - 0x33 : Number of LIGT entries
|
||||||
header.write('\0'*4)
|
header.write('\0'*4)
|
||||||
|
|
||||||
# 0x34 - 0x37 : Unknown
|
# 0x34 - 0x37 : Number of ctoc[] blocks
|
||||||
header.write(pack('>I', 1))
|
header.write(pack('>I', len(self._ctoc_records)))
|
||||||
|
|
||||||
# 0x38 - 0xb3 : Unknown (pad?)
|
# 0x38 - 0xb3 : Unknown (pad?)
|
||||||
header.write('\0'*124)
|
header.write('\0'*124)
|
||||||
@ -1740,7 +1728,6 @@ class MobiWriter(object):
|
|||||||
self._primary_index_record = len(self._records)
|
self._primary_index_record = len(self._records)
|
||||||
|
|
||||||
# GR: handle multiple ctoc records
|
# GR: handle multiple ctoc records
|
||||||
# self._records.extend([indx0, indx1, self._ctoc])
|
|
||||||
self._records.extend([indx0, indx1 ])
|
self._records.extend([indx0, indx1 ])
|
||||||
for (i,ctoc_record) in enumerate(self._ctoc_records):
|
for (i,ctoc_record) in enumerate(self._ctoc_records):
|
||||||
self._records.append(ctoc_record)
|
self._records.append(ctoc_record)
|
||||||
@ -1814,11 +1801,11 @@ class MobiWriter(object):
|
|||||||
# Is there enough room for this string in the current ctoc record?
|
# Is there enough room for this string in the current ctoc record?
|
||||||
if 0xfbf8 - self._ctoc.tell() < 2 + len(ctoc_str):
|
if 0xfbf8 - self._ctoc.tell() < 2 + len(ctoc_str):
|
||||||
# flush this ctoc, start a new one
|
# flush this ctoc, start a new one
|
||||||
print "closing ctoc_record at 0x%X" % self._ctoc.tell()
|
# print "closing ctoc_record at 0x%X" % self._ctoc.tell()
|
||||||
print "starting new ctoc with '%-50.50s ...'" % ctoc_str
|
# print "starting new ctoc with '%-50.50s ...'" % ctoc_str
|
||||||
# pad with 00
|
# pad with 00
|
||||||
pad = 0xfbf8 - self._ctoc.tell()
|
pad = 0xfbf8 - self._ctoc.tell()
|
||||||
print "padding %d bytes of 00" % pad
|
# print "padding %d bytes of 00" % pad
|
||||||
self._ctoc.write('\0' * (pad))
|
self._ctoc.write('\0' * (pad))
|
||||||
self._ctoc_records.append(self._ctoc.getvalue())
|
self._ctoc_records.append(self._ctoc.getvalue())
|
||||||
self._ctoc.truncate(0)
|
self._ctoc.truncate(0)
|
||||||
@ -1829,7 +1816,6 @@ class MobiWriter(object):
|
|||||||
self._ctoc.write(decint(len(ctoc_str), DECINT_FORWARD) + ctoc_str)
|
self._ctoc.write(decint(len(ctoc_str), DECINT_FORWARD) + ctoc_str)
|
||||||
return offset
|
return offset
|
||||||
|
|
||||||
|
|
||||||
def _add_flat_ctoc_node(self, node, ctoc, title=None):
|
def _add_flat_ctoc_node(self, node, ctoc, title=None):
|
||||||
# Process 'chapter' or 'article' nodes only, force either to 'chapter'
|
# Process 'chapter' or 'article' nodes only, force either to 'chapter'
|
||||||
t = node.title if title is None else title
|
t = node.title if title is None else title
|
||||||
@ -1846,8 +1832,6 @@ class MobiWriter(object):
|
|||||||
ctoc_name_map['klass'] = node.klass
|
ctoc_name_map['klass'] = node.klass
|
||||||
|
|
||||||
# Add title offset to name map
|
# Add title offset to name map
|
||||||
# ctoc_name_map['titleOffset'] = ctoc.tell()
|
|
||||||
# ctoc.write(decint(len(t), DECINT_FORWARD)+t)
|
|
||||||
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
||||||
self._chapterCount += 1
|
self._chapterCount += 1
|
||||||
|
|
||||||
@ -1856,7 +1840,6 @@ class MobiWriter(object):
|
|||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def _add_structured_ctoc_node(self, node, ctoc, title=None):
|
def _add_structured_ctoc_node(self, node, ctoc, title=None):
|
||||||
# Process 'periodical', 'section' and 'article'
|
# Process 'periodical', 'section' and 'article'
|
||||||
|
|
||||||
@ -1875,15 +1858,11 @@ class MobiWriter(object):
|
|||||||
|
|
||||||
if node.klass == 'chapter':
|
if node.klass == 'chapter':
|
||||||
# Add title offset to name map
|
# Add title offset to name map
|
||||||
# ctoc_name_map['titleOffset'] = ctoc.tell() + ctoc_offset
|
|
||||||
# ctoc.write(decint(len(t), DECINT_FORWARD)+t)
|
|
||||||
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
||||||
self._chapterCount += 1
|
self._chapterCount += 1
|
||||||
|
|
||||||
elif node.klass == 'periodical' :
|
elif node.klass == 'periodical' :
|
||||||
# Add title offset
|
# Add title offset
|
||||||
# ctoc_name_map['titleOffset'] = ctoc.tell() + ctoc_offset
|
|
||||||
# ctoc.write( decint(len(t), DECINT_FORWARD) + t )
|
|
||||||
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
||||||
|
|
||||||
# Look for existing class entry 'periodical' in _ctoc_map
|
# Look for existing class entry 'periodical' in _ctoc_map
|
||||||
@ -1896,16 +1875,12 @@ class MobiWriter(object):
|
|||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
# class names should always be in CNCX 0 - no offset
|
# class names should always be in CNCX 0 - no offset
|
||||||
# ctoc_name_map['classOffset'] = ctoc.tell()
|
|
||||||
# ctoc.write(decint(len(node.klass), DECINT_FORWARD)+node.klass)
|
|
||||||
ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)
|
ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)
|
||||||
|
|
||||||
self._periodicalCount += 1
|
self._periodicalCount += 1
|
||||||
|
|
||||||
elif node.klass == 'section' :
|
elif node.klass == 'section' :
|
||||||
# Add title offset
|
# Add title offset
|
||||||
# ctoc_name_map['titleOffset'] = ctoc.tell() + ctoc_offset
|
|
||||||
# ctoc.write(decint(len(t), DECINT_FORWARD)+t)
|
|
||||||
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
||||||
|
|
||||||
# Look for existing class entry 'section' in _ctoc_map
|
# Look for existing class entry 'section' in _ctoc_map
|
||||||
@ -1918,16 +1893,12 @@ class MobiWriter(object):
|
|||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
# class names should always be in CNCX 0 - no offset
|
# class names should always be in CNCX 0 - no offset
|
||||||
# ctoc_name_map['classOffset'] = ctoc.tell()
|
|
||||||
# ctoc.write(decint(len(node.klass), DECINT_FORWARD)+node.klass)
|
|
||||||
ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)
|
ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)
|
||||||
|
|
||||||
self._sectionCount += 1
|
self._sectionCount += 1
|
||||||
|
|
||||||
elif node.klass == 'article' :
|
elif node.klass == 'article' :
|
||||||
# Add title offset/title
|
# Add title offset/title
|
||||||
# ctoc_name_map['titleOffset'] = ctoc.tell() + ctoc_offset
|
|
||||||
# ctoc.write(decint(len(t), DECINT_FORWARD)+t)
|
|
||||||
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
||||||
|
|
||||||
# Look for existing class entry 'article' in _ctoc_map
|
# Look for existing class entry 'article' in _ctoc_map
|
||||||
@ -1939,15 +1910,11 @@ class MobiWriter(object):
|
|||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
# class names should always be in CNCX 0 - no offset
|
# class names should always be in CNCX 0 - no offset
|
||||||
# ctoc_name_map['classOffset'] = ctoc.tell()
|
|
||||||
# ctoc.write(decint(len(node.klass), DECINT_FORWARD)+node.klass)
|
|
||||||
ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)
|
ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)
|
||||||
|
|
||||||
# Add description offset/description
|
# Add description offset/description
|
||||||
if node.description :
|
if node.description :
|
||||||
d = self._clean_text_value(node.description)
|
d = self._clean_text_value(node.description)
|
||||||
# ctoc_name_map['descriptionOffset'] = ctoc.tell() + ctoc_offset
|
|
||||||
# ctoc.write(decint(len(d), DECINT_FORWARD)+d)
|
|
||||||
ctoc_name_map['descriptionOffset'] = self._add_to_ctoc(d, self._ctoc_offset)
|
ctoc_name_map['descriptionOffset'] = self._add_to_ctoc(d, self._ctoc_offset)
|
||||||
else :
|
else :
|
||||||
ctoc_name_map['descriptionOffset'] = None
|
ctoc_name_map['descriptionOffset'] = None
|
||||||
@ -1955,8 +1922,6 @@ class MobiWriter(object):
|
|||||||
# Add author offset/attribution
|
# Add author offset/attribution
|
||||||
if node.author :
|
if node.author :
|
||||||
a = self._clean_text_value(node.author)
|
a = self._clean_text_value(node.author)
|
||||||
# ctoc_name_map['authorOffset'] = ctoc.tell() + ctoc_offset
|
|
||||||
# ctoc.write(decint(len(a), DECINT_FORWARD)+a)
|
|
||||||
ctoc_name_map['authorOffset'] = self._add_to_ctoc(a, self._ctoc_offset)
|
ctoc_name_map['authorOffset'] = self._add_to_ctoc(a, self._ctoc_offset)
|
||||||
else :
|
else :
|
||||||
ctoc_name_map['authorOffset'] = None
|
ctoc_name_map['authorOffset'] = None
|
||||||
@ -1971,7 +1936,6 @@ class MobiWriter(object):
|
|||||||
# append this node's name_map to map
|
# append this node's name_map to map
|
||||||
self._ctoc_map.append(ctoc_name_map)
|
self._ctoc_map.append(ctoc_name_map)
|
||||||
|
|
||||||
|
|
||||||
def _generate_ctoc(self):
|
def _generate_ctoc(self):
|
||||||
# Generate the compiled TOC strings
|
# Generate the compiled TOC strings
|
||||||
# Each node has 1-4 CTOC entries:
|
# Each node has 1-4 CTOC entries:
|
||||||
@ -2261,7 +2225,6 @@ class MobiWriter(object):
|
|||||||
self._oeb.logger.info( " Unrecognized class %s in structured document" % section.klass)
|
self._oeb.logger.info( " Unrecognized class %s in structured document" % section.klass)
|
||||||
return sectionIndices, sectionParents
|
return sectionIndices, sectionParents
|
||||||
|
|
||||||
|
|
||||||
def _generate_section_article_indices(self, i, section, entries, sectionIndices, sectionParents):
|
def _generate_section_article_indices(self, i, section, entries, sectionIndices, sectionParents):
|
||||||
sectionArticles = list(section.iter())[1:]
|
sectionArticles = list(section.iter())[1:]
|
||||||
# Iterate over the section's articles
|
# Iterate over the section's articles
|
||||||
@ -2280,7 +2243,6 @@ class MobiWriter(object):
|
|||||||
myNewArticle = MobiArticle(mySectionParent, offset, length, ctoc_map_index )
|
myNewArticle = MobiArticle(mySectionParent, offset, length, ctoc_map_index )
|
||||||
mySectionParent.addArticle( myNewArticle )
|
mySectionParent.addArticle( myNewArticle )
|
||||||
|
|
||||||
|
|
||||||
def _add_book_chapters(self, myDoc, indxt, indices):
|
def _add_book_chapters(self, myDoc, indxt, indices):
|
||||||
chapterCount = myDoc.documentStructure.chapterCount()
|
chapterCount = myDoc.documentStructure.chapterCount()
|
||||||
if self.opts.verbose > 3 :
|
if self.opts.verbose > 3 :
|
||||||
|
Loading…
x
Reference in New Issue
Block a user