mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 02:34:06 -04:00
PML Output: Generate \CX Tags as chapter anchors. PDB eReader Output: Use \CX tags to generate chapter index.
This commit is contained in:
parent
288b64529c
commit
35fc570d24
@ -42,8 +42,8 @@ class Writer(FormatWriter):
|
||||
pml = unicode(pmlmlizer.extract_content(oeb_book, self.opts)).encode('cp1252', 'replace')
|
||||
|
||||
text, text_sizes = self._text(pml)
|
||||
chapter_index = self._chapter_index(pml)
|
||||
link_index = self._link_index(pml)
|
||||
chapter_index = self._index_item(r'(?s)\\C(?P<val>\d)="(?P<text>.+?)"', pml)
|
||||
link_index = self._index_item(r'(?s)\\Q="(?P<text>.+?)"', pml)
|
||||
images = self._images(oeb_book.manifest, pmlmlizer.image_hrefs)
|
||||
metadata = [self._metadata(metadata)]
|
||||
hr = [self._header_record(len(text), len(chapter_index), len(link_index), len(images))]
|
||||
@ -101,10 +101,12 @@ class Writer(FormatWriter):
|
||||
|
||||
return pml_pages, text_sizes
|
||||
|
||||
def _index_item(self, mo):
|
||||
index = ''
|
||||
def _index_item(self, regex, pml):
|
||||
index = []
|
||||
for mo in re.finditer(regex, pml):
|
||||
item = ''
|
||||
if 'text' in mo.groupdict().keys():
|
||||
index += struct.pack('>L', mo.start())
|
||||
item += struct.pack('>L', mo.start())
|
||||
text = mo.group('text')
|
||||
# Strip all PML tags from text
|
||||
text = re.sub(r'\\U[0-9a-z]{4}', '', text)
|
||||
@ -113,26 +115,10 @@ class Writer(FormatWriter):
|
||||
# Add appropriate spacing to denote the various levels of headings
|
||||
if 'val' in mo.groupdict().keys():
|
||||
text = '%s%s' % (' ' * 4 * int(mo.group('val')), text)
|
||||
index += text
|
||||
index += '\x00'
|
||||
return index
|
||||
|
||||
def _chapter_index(self, pml):
|
||||
chapter_marks = [
|
||||
r'(?s)\\x(?P<text>.+?)\\x',
|
||||
r'(?s)\\X(?P<val>[0-4])(?P<text>.*?)\\X[0-4]',
|
||||
r'(?s)\\C(?P<val>\d)="(?P<text>.+?)"',
|
||||
]
|
||||
index = []
|
||||
for chapter_mark in chapter_marks:
|
||||
for mo in re.finditer(chapter_mark, pml):
|
||||
index.append(self._index_item(mo))
|
||||
return index
|
||||
|
||||
def _link_index(self, pml):
|
||||
index = []
|
||||
for mo in re.finditer(r'(?s)\\Q="(?P<text>.+?)"', pml):
|
||||
index.append(self._index_item(mo))
|
||||
item += text
|
||||
item += '\x00'
|
||||
if item:
|
||||
index.append(item)
|
||||
return index
|
||||
|
||||
def _images(self, manifest, image_hrefs):
|
||||
|
@ -79,6 +79,16 @@ class PMLMLizer(object):
|
||||
self.log.info('Converting XHTML to PML markup...')
|
||||
self.oeb_book = oeb_book
|
||||
self.opts = opts
|
||||
|
||||
# This is used for adding \CX tags chapter markers. This is separate
|
||||
# from the optional inline toc.
|
||||
self.toc = {}
|
||||
for item in oeb_book.toc:
|
||||
page, mid, id = item.href.partition('#')
|
||||
if not self.toc.get(page, None):
|
||||
self.toc[page] = {}
|
||||
self.toc[page][id] = item.title
|
||||
|
||||
return self.pmlmlize_spine()
|
||||
|
||||
def pmlmlize_spine(self):
|
||||
@ -107,7 +117,11 @@ class PMLMLizer(object):
|
||||
return output
|
||||
|
||||
def get_toc(self):
|
||||
toc = [u'']
|
||||
'''
|
||||
Generation of inline TOC
|
||||
'''
|
||||
|
||||
toc = []
|
||||
if self.opts.inline_toc:
|
||||
self.log.debug('Generating table of contents...')
|
||||
toc.append(u'\\X0%s\\X0\n\n' % _('Table of Contents:'))
|
||||
@ -177,14 +191,14 @@ class PMLMLizer(object):
|
||||
def dump_text(self, elem, stylizer, page, tag_stack=[]):
|
||||
if not isinstance(elem.tag, basestring) \
|
||||
or namespace(elem.tag) != XHTML_NS:
|
||||
return [u'']
|
||||
return []
|
||||
|
||||
text = [u'']
|
||||
text = []
|
||||
style = stylizer.style(elem)
|
||||
|
||||
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
|
||||
or style['visibility'] == 'hidden':
|
||||
return [u'']
|
||||
return []
|
||||
|
||||
tag = barename(elem.tag)
|
||||
tag_count = 0
|
||||
@ -213,6 +227,12 @@ class PMLMLizer(object):
|
||||
else:
|
||||
w += '="50%"'
|
||||
text.append(w)
|
||||
toc_id = elem.attrib.get('id', None)
|
||||
if toc_id:
|
||||
if self.toc.get(page.href, None):
|
||||
toc_title = self.toc[page.href].get(toc_id, None)
|
||||
if toc_title:
|
||||
text.append('\\C1="%s"' % toc_title)
|
||||
|
||||
# Process style information that needs holds a single tag
|
||||
# Commented out because every page in an OEB book starts with this style
|
||||
@ -287,4 +307,3 @@ class PMLMLizer(object):
|
||||
if tag != 'block':
|
||||
text.append('\\%s' % tag)
|
||||
return text
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user