eReader PDB Output: Generate chapter and link indexes properly.

2025-07-09 03:04:10 -04:00 · 2009-10-11 20:34:33 -04:00 · 2009-10-11 20:34:33 -04:00 · 1424435bff
commit 1424435bff
parent 86a7524b11
1 changed files with 19 additions and 23 deletions
--- a/src/calibre/ebooks/pdb/ereader/writer.py
+++ b/src/calibre/ebooks/pdb/ereader/writer.py
@ -42,17 +42,11 @@ class Writer(FormatWriter):
        pml = unicode(pmlmlizer.extract_content(oeb_book, self.opts)).encode('cp1252', 'replace')
        text, text_sizes = self._text(pml)
-        #chapter_index = self._chapter_index(pml)
+        chapter_index = self._chapter_index(pml)
-        #chapter_index = [chapter_index] if chapter_index != '' else []
+        link_index = self._link_index(pml)
        chapter_index = []
        #link_index = self._link_index(pml)
        #link_index = [link_index] if link_index != '' else []
        link_index = []
        images = self._images(oeb_book.manifest, pmlmlizer.image_hrefs)
        metadata = [self._metadata(metadata)]
-        chapter_index_count = len(chapter_index[0].split('\x00')) - 1 if len(chapter_index) >= 1 else 0
+        hr = [self._header_record(len(text), len(chapter_index), len(link_index), len(images))]
        link_index_count = len(link_index[0].split('\x00')) - 1 if len(link_index) >= 1 else 0
        hr = [self._header_record(len(text), chapter_index_count, link_index_count, len(images))]
        '''
        Record order as generated by Dropbook.
@ -110,12 +104,15 @@ class Writer(FormatWriter):
    def _index_item(self, mo):
        index = ''
        if 'text' in mo.groupdict().keys():
-            index += struct.pack('>L', mo.start('text'))
+            index += struct.pack('>L', mo.start())
            text = mo.group('text')
            # Strip all PML tags from text
-            text = re.sub(r'\\.', '', mo.group('text'))
+            text = re.sub(r'\\U[0-9a-z]{4}', '', text)
            text = re.sub(r'\\a\d{3}', '', text)
            text = re.sub(r'\\.', '', text)
            # Add appropriate spacing to denote the various levels of headings
            if 'val' in mo.groupdict().keys():
-                text = '%s%s' % ('\x20' * 4 * int(mo.group('val')), text)
+                text = '%s%s' % (' ' * 4 * int(mo.group('val')), text)
            index += text
            index += '\x00'
        return index
@ -126,16 +123,16 @@ class Writer(FormatWriter):
            r'(?s)\\X(?P<val>[0-4])(?P<text>.*?)\\X[0-4]',
            r'(?s)\\C(?P<val>\d)="(?P<text>.+?)"',
        ]
-        index = ''
+        index = []
        for chapter_mark in chapter_marks:
            for mo in re.finditer(chapter_mark, pml):
-                index += self._index_item(mo)
+                index.append(self._index_item(mo))
        return index
    def _link_index(self, pml):
-        index = ''
+        index = []
        for mo in re.finditer(r'(?s)\\Q="(?P<text>.+?)"', pml):
-            index += self._index_item(mo)
+            index.append(self._index_item(mo))
        return index
    def _images(self, manifest, image_hrefs):
@ -213,27 +210,26 @@ class Writer(FormatWriter):
        non_text_offset = text_count + 1
        if chapter_count > 0:
-            chapter_offset = text_count + 1
+            chapter_offset = non_text_offset
        else:
            chapter_offset = text_count
        if link_count > 0:
-            link_offset = chapter_offset + 1
+            link_offset = chapter_offset + chapter_count
        else:
            link_offset = chapter_offset
        if image_count > 0:
-            image_data_offset = link_offset + 1
+            image_data_offset = link_offset + link_count
            meta_data_offset = image_data_offset + image_count
            last_data_offset = meta_data_offset + 1
        else:
-            meta_data_offset = link_offset + 1
+            meta_data_offset = link_offset + link_count
            last_data_offset = meta_data_offset + 1
            image_data_offset = last_data_offset
-        if chapter_count <= 0:
+        if chapter_count == 0:
            chapter_offset = last_data_offset
-        if link_count <= 0:
+        if link_count == 0:
            link_offset = last_data_offset
        record = ''