diff --git a/src/calibre/ebooks/pdb/ereader/writer.py b/src/calibre/ebooks/pdb/ereader/writer.py index 263f6964bf..a379899af5 100644 --- a/src/calibre/ebooks/pdb/ereader/writer.py +++ b/src/calibre/ebooks/pdb/ereader/writer.py @@ -42,8 +42,8 @@ class Writer(FormatWriter): pml = unicode(pmlmlizer.extract_content(oeb_book, self.opts)).encode('cp1252', 'replace') text, text_sizes = self._text(pml) - chapter_index = self._chapter_index(pml) - link_index = self._link_index(pml) + chapter_index = self._index_item(r'(?s)\\C(?P\d)="(?P.+?)"', pml) + link_index = self._index_item(r'(?s)\\Q="(?P.+?)"', pml) images = self._images(oeb_book.manifest, pmlmlizer.image_hrefs) metadata = [self._metadata(metadata)] hr = [self._header_record(len(text), len(chapter_index), len(link_index), len(images))] @@ -101,38 +101,24 @@ class Writer(FormatWriter): return pml_pages, text_sizes - def _index_item(self, mo): - index = '' - if 'text' in mo.groupdict().keys(): - index += struct.pack('>L', mo.start()) - text = mo.group('text') - # Strip all PML tags from text - text = re.sub(r'\\U[0-9a-z]{4}', '', text) - text = re.sub(r'\\a\d{3}', '', text) - text = re.sub(r'\\.', '', text) - # Add appropriate spacing to denote the various levels of headings - if 'val' in mo.groupdict().keys(): - text = '%s%s' % (' ' * 4 * int(mo.group('val')), text) - index += text - index += '\x00' - return index - - def _chapter_index(self, pml): - chapter_marks = [ - r'(?s)\\x(?P.+?)\\x', - r'(?s)\\X(?P[0-4])(?P.*?)\\X[0-4]', - r'(?s)\\C(?P\d)="(?P.+?)"', - ] + def _index_item(self, regex, pml): index = [] - for chapter_mark in chapter_marks: - for mo in re.finditer(chapter_mark, pml): - index.append(self._index_item(mo)) - return index - - def _link_index(self, pml): - index = [] - for mo in re.finditer(r'(?s)\\Q="(?P.+?)"', pml): - index.append(self._index_item(mo)) + for mo in re.finditer(regex, pml): + item = '' + if 'text' in mo.groupdict().keys(): + item += struct.pack('>L', mo.start()) + text = mo.group('text') + # Strip all PML tags from text + text = re.sub(r'\\U[0-9a-z]{4}', '', text) + text = re.sub(r'\\a\d{3}', '', text) + text = re.sub(r'\\.', '', text) + # Add appropriate spacing to denote the various levels of headings + if 'val' in mo.groupdict().keys(): + text = '%s%s' % (' ' * 4 * int(mo.group('val')), text) + item += text + item += '\x00' + if item: + index.append(item) return index def _images(self, manifest, image_hrefs): diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py index b23cd40813..ccce95fce6 100644 --- a/src/calibre/ebooks/pml/pmlml.py +++ b/src/calibre/ebooks/pml/pmlml.py @@ -79,6 +79,16 @@ class PMLMLizer(object): self.log.info('Converting XHTML to PML markup...') self.oeb_book = oeb_book self.opts = opts + + # This is used for adding \CX tags chapter markers. This is separate + # from the optional inline toc. + self.toc = {} + for item in oeb_book.toc: + page, mid, id = item.href.partition('#') + if not self.toc.get(page, None): + self.toc[page] = {} + self.toc[page][id] = item.title + return self.pmlmlize_spine() def pmlmlize_spine(self): @@ -107,7 +117,11 @@ class PMLMLizer(object): return output def get_toc(self): - toc = [u''] + ''' + Generation of inline TOC + ''' + + toc = [] if self.opts.inline_toc: self.log.debug('Generating table of contents...') toc.append(u'\\X0%s\\X0\n\n' % _('Table of Contents:')) @@ -177,14 +191,14 @@ class PMLMLizer(object): def dump_text(self, elem, stylizer, page, tag_stack=[]): if not isinstance(elem.tag, basestring) \ or namespace(elem.tag) != XHTML_NS: - return [u''] + return [] - text = [u''] + text = [] style = stylizer.style(elem) if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \ or style['visibility'] == 'hidden': - return [u''] + return [] tag = barename(elem.tag) tag_count = 0 @@ -213,6 +227,12 @@ class PMLMLizer(object): else: w += '="50%"' text.append(w) + toc_id = elem.attrib.get('id', None) + if toc_id: + if self.toc.get(page.href, None): + toc_title = self.toc[page.href].get(toc_id, None) + if toc_title: + text.append('\\C1="%s"' % toc_title) # Process style information that needs holds a single tag # Commented out because every page in an OEB book starts with this style @@ -287,4 +307,3 @@ class PMLMLizer(object): if tag != 'block': text.append('\\%s' % tag) return text -