eReader PDB Output: Generate chapter and link indexes properly.

This commit is contained in:
John Schember 2009-10-11 20:34:33 -04:00
parent 86a7524b11
commit 1424435bff

View File

@ -42,17 +42,11 @@ class Writer(FormatWriter):
pml = unicode(pmlmlizer.extract_content(oeb_book, self.opts)).encode('cp1252', 'replace') pml = unicode(pmlmlizer.extract_content(oeb_book, self.opts)).encode('cp1252', 'replace')
text, text_sizes = self._text(pml) text, text_sizes = self._text(pml)
#chapter_index = self._chapter_index(pml) chapter_index = self._chapter_index(pml)
#chapter_index = [chapter_index] if chapter_index != '' else [] link_index = self._link_index(pml)
chapter_index = []
#link_index = self._link_index(pml)
#link_index = [link_index] if link_index != '' else []
link_index = []
images = self._images(oeb_book.manifest, pmlmlizer.image_hrefs) images = self._images(oeb_book.manifest, pmlmlizer.image_hrefs)
metadata = [self._metadata(metadata)] metadata = [self._metadata(metadata)]
chapter_index_count = len(chapter_index[0].split('\x00')) - 1 if len(chapter_index) >= 1 else 0 hr = [self._header_record(len(text), len(chapter_index), len(link_index), len(images))]
link_index_count = len(link_index[0].split('\x00')) - 1 if len(link_index) >= 1 else 0
hr = [self._header_record(len(text), chapter_index_count, link_index_count, len(images))]
''' '''
Record order as generated by Dropbook. Record order as generated by Dropbook.
@ -110,12 +104,15 @@ class Writer(FormatWriter):
def _index_item(self, mo): def _index_item(self, mo):
index = '' index = ''
if 'text' in mo.groupdict().keys(): if 'text' in mo.groupdict().keys():
index += struct.pack('>L', mo.start('text')) index += struct.pack('>L', mo.start())
text = mo.group('text')
# Strip all PML tags from text # Strip all PML tags from text
text = re.sub(r'\\.', '', mo.group('text')) text = re.sub(r'\\U[0-9a-z]{4}', '', text)
text = re.sub(r'\\a\d{3}', '', text)
text = re.sub(r'\\.', '', text)
# Add appropriate spacing to denote the various levels of headings # Add appropriate spacing to denote the various levels of headings
if 'val' in mo.groupdict().keys(): if 'val' in mo.groupdict().keys():
text = '%s%s' % ('\x20' * 4 * int(mo.group('val')), text) text = '%s%s' % (' ' * 4 * int(mo.group('val')), text)
index += text index += text
index += '\x00' index += '\x00'
return index return index
@ -126,16 +123,16 @@ class Writer(FormatWriter):
r'(?s)\\X(?P<val>[0-4])(?P<text>.*?)\\X[0-4]', r'(?s)\\X(?P<val>[0-4])(?P<text>.*?)\\X[0-4]',
r'(?s)\\C(?P<val>\d)="(?P<text>.+?)"', r'(?s)\\C(?P<val>\d)="(?P<text>.+?)"',
] ]
index = '' index = []
for chapter_mark in chapter_marks: for chapter_mark in chapter_marks:
for mo in re.finditer(chapter_mark, pml): for mo in re.finditer(chapter_mark, pml):
index += self._index_item(mo) index.append(self._index_item(mo))
return index return index
def _link_index(self, pml): def _link_index(self, pml):
index = '' index = []
for mo in re.finditer(r'(?s)\\Q="(?P<text>.+?)"', pml): for mo in re.finditer(r'(?s)\\Q="(?P<text>.+?)"', pml):
index += self._index_item(mo) index.append(self._index_item(mo))
return index return index
def _images(self, manifest, image_hrefs): def _images(self, manifest, image_hrefs):
@ -213,27 +210,26 @@ class Writer(FormatWriter):
non_text_offset = text_count + 1 non_text_offset = text_count + 1
if chapter_count > 0: if chapter_count > 0:
chapter_offset = text_count + 1 chapter_offset = non_text_offset
else: else:
chapter_offset = text_count chapter_offset = text_count
if link_count > 0: if link_count > 0:
link_offset = chapter_offset + 1 link_offset = chapter_offset + chapter_count
else: else:
link_offset = chapter_offset link_offset = chapter_offset
if image_count > 0: if image_count > 0:
image_data_offset = link_offset + 1 image_data_offset = link_offset + link_count
meta_data_offset = image_data_offset + image_count meta_data_offset = image_data_offset + image_count
last_data_offset = meta_data_offset + 1 last_data_offset = meta_data_offset + 1
else: else:
meta_data_offset = link_offset + 1 meta_data_offset = link_offset + link_count
last_data_offset = meta_data_offset + 1 last_data_offset = meta_data_offset + 1
image_data_offset = last_data_offset image_data_offset = last_data_offset
if chapter_count <= 0: if chapter_count == 0:
chapter_offset = last_data_offset chapter_offset = last_data_offset
if link_count <= 0: if link_count == 0:
link_offset = last_data_offset link_offset = last_data_offset
record = '' record = ''