From 1424435bff6674ecc52e8d5f13b0d0801b595573 Mon Sep 17 00:00:00 2001 From: John Schember Date: Sun, 11 Oct 2009 20:34:33 -0400 Subject: [PATCH] eReader PDB Output: Generate chapter and link indexes properly. --- src/calibre/ebooks/pdb/ereader/writer.py | 42 +++++++++++------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/src/calibre/ebooks/pdb/ereader/writer.py b/src/calibre/ebooks/pdb/ereader/writer.py index b8f2cddd0b..a1203aa9f2 100644 --- a/src/calibre/ebooks/pdb/ereader/writer.py +++ b/src/calibre/ebooks/pdb/ereader/writer.py @@ -42,17 +42,11 @@ class Writer(FormatWriter): pml = unicode(pmlmlizer.extract_content(oeb_book, self.opts)).encode('cp1252', 'replace') text, text_sizes = self._text(pml) - #chapter_index = self._chapter_index(pml) - #chapter_index = [chapter_index] if chapter_index != '' else [] - chapter_index = [] - #link_index = self._link_index(pml) - #link_index = [link_index] if link_index != '' else [] - link_index = [] + chapter_index = self._chapter_index(pml) + link_index = self._link_index(pml) images = self._images(oeb_book.manifest, pmlmlizer.image_hrefs) metadata = [self._metadata(metadata)] - chapter_index_count = len(chapter_index[0].split('\x00')) - 1 if len(chapter_index) >= 1 else 0 - link_index_count = len(link_index[0].split('\x00')) - 1 if len(link_index) >= 1 else 0 - hr = [self._header_record(len(text), chapter_index_count, link_index_count, len(images))] + hr = [self._header_record(len(text), len(chapter_index), len(link_index), len(images))] ''' Record order as generated by Dropbook. @@ -110,12 +104,15 @@ class Writer(FormatWriter): def _index_item(self, mo): index = '' if 'text' in mo.groupdict().keys(): - index += struct.pack('>L', mo.start('text')) + index += struct.pack('>L', mo.start()) + text = mo.group('text') # Strip all PML tags from text - text = re.sub(r'\\.', '', mo.group('text')) + text = re.sub(r'\\U[0-9a-z]{4}', '', text) + text = re.sub(r'\\a\d{3}', '', text) + text = re.sub(r'\\.', '', text) # Add appropriate spacing to denote the various levels of headings if 'val' in mo.groupdict().keys(): - text = '%s%s' % ('\x20' * 4 * int(mo.group('val')), text) + text = '%s%s' % (' ' * 4 * int(mo.group('val')), text) index += text index += '\x00' return index @@ -126,16 +123,16 @@ class Writer(FormatWriter): r'(?s)\\X(?P[0-4])(?P.*?)\\X[0-4]', r'(?s)\\C(?P\d)="(?P.+?)"', ] - index = '' + index = [] for chapter_mark in chapter_marks: for mo in re.finditer(chapter_mark, pml): - index += self._index_item(mo) + index.append(self._index_item(mo)) return index def _link_index(self, pml): - index = '' + index = [] for mo in re.finditer(r'(?s)\\Q="(?P.+?)"', pml): - index += self._index_item(mo) + index.append(self._index_item(mo)) return index def _images(self, manifest, image_hrefs): @@ -213,27 +210,26 @@ class Writer(FormatWriter): non_text_offset = text_count + 1 if chapter_count > 0: - chapter_offset = text_count + 1 + chapter_offset = non_text_offset else: chapter_offset = text_count - if link_count > 0: - link_offset = chapter_offset + 1 + link_offset = chapter_offset + chapter_count else: link_offset = chapter_offset if image_count > 0: - image_data_offset = link_offset + 1 + image_data_offset = link_offset + link_count meta_data_offset = image_data_offset + image_count last_data_offset = meta_data_offset + 1 else: - meta_data_offset = link_offset + 1 + meta_data_offset = link_offset + link_count last_data_offset = meta_data_offset + 1 image_data_offset = last_data_offset - if chapter_count <= 0: + if chapter_count == 0: chapter_offset = last_data_offset - if link_count <= 0: + if link_count == 0: link_offset = last_data_offset record = ''