mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 02:34:06 -04:00
PML Output: Generate \CX Tags as chapter anchors. PDB eReader Output: Use \CX tags to generate chapter index.
This commit is contained in:
parent
288b64529c
commit
35fc570d24
@ -42,8 +42,8 @@ class Writer(FormatWriter):
|
|||||||
pml = unicode(pmlmlizer.extract_content(oeb_book, self.opts)).encode('cp1252', 'replace')
|
pml = unicode(pmlmlizer.extract_content(oeb_book, self.opts)).encode('cp1252', 'replace')
|
||||||
|
|
||||||
text, text_sizes = self._text(pml)
|
text, text_sizes = self._text(pml)
|
||||||
chapter_index = self._chapter_index(pml)
|
chapter_index = self._index_item(r'(?s)\\C(?P<val>\d)="(?P<text>.+?)"', pml)
|
||||||
link_index = self._link_index(pml)
|
link_index = self._index_item(r'(?s)\\Q="(?P<text>.+?)"', pml)
|
||||||
images = self._images(oeb_book.manifest, pmlmlizer.image_hrefs)
|
images = self._images(oeb_book.manifest, pmlmlizer.image_hrefs)
|
||||||
metadata = [self._metadata(metadata)]
|
metadata = [self._metadata(metadata)]
|
||||||
hr = [self._header_record(len(text), len(chapter_index), len(link_index), len(images))]
|
hr = [self._header_record(len(text), len(chapter_index), len(link_index), len(images))]
|
||||||
@ -101,38 +101,24 @@ class Writer(FormatWriter):
|
|||||||
|
|
||||||
return pml_pages, text_sizes
|
return pml_pages, text_sizes
|
||||||
|
|
||||||
def _index_item(self, mo):
|
def _index_item(self, regex, pml):
|
||||||
index = ''
|
|
||||||
if 'text' in mo.groupdict().keys():
|
|
||||||
index += struct.pack('>L', mo.start())
|
|
||||||
text = mo.group('text')
|
|
||||||
# Strip all PML tags from text
|
|
||||||
text = re.sub(r'\\U[0-9a-z]{4}', '', text)
|
|
||||||
text = re.sub(r'\\a\d{3}', '', text)
|
|
||||||
text = re.sub(r'\\.', '', text)
|
|
||||||
# Add appropriate spacing to denote the various levels of headings
|
|
||||||
if 'val' in mo.groupdict().keys():
|
|
||||||
text = '%s%s' % (' ' * 4 * int(mo.group('val')), text)
|
|
||||||
index += text
|
|
||||||
index += '\x00'
|
|
||||||
return index
|
|
||||||
|
|
||||||
def _chapter_index(self, pml):
|
|
||||||
chapter_marks = [
|
|
||||||
r'(?s)\\x(?P<text>.+?)\\x',
|
|
||||||
r'(?s)\\X(?P<val>[0-4])(?P<text>.*?)\\X[0-4]',
|
|
||||||
r'(?s)\\C(?P<val>\d)="(?P<text>.+?)"',
|
|
||||||
]
|
|
||||||
index = []
|
index = []
|
||||||
for chapter_mark in chapter_marks:
|
for mo in re.finditer(regex, pml):
|
||||||
for mo in re.finditer(chapter_mark, pml):
|
item = ''
|
||||||
index.append(self._index_item(mo))
|
if 'text' in mo.groupdict().keys():
|
||||||
return index
|
item += struct.pack('>L', mo.start())
|
||||||
|
text = mo.group('text')
|
||||||
def _link_index(self, pml):
|
# Strip all PML tags from text
|
||||||
index = []
|
text = re.sub(r'\\U[0-9a-z]{4}', '', text)
|
||||||
for mo in re.finditer(r'(?s)\\Q="(?P<text>.+?)"', pml):
|
text = re.sub(r'\\a\d{3}', '', text)
|
||||||
index.append(self._index_item(mo))
|
text = re.sub(r'\\.', '', text)
|
||||||
|
# Add appropriate spacing to denote the various levels of headings
|
||||||
|
if 'val' in mo.groupdict().keys():
|
||||||
|
text = '%s%s' % (' ' * 4 * int(mo.group('val')), text)
|
||||||
|
item += text
|
||||||
|
item += '\x00'
|
||||||
|
if item:
|
||||||
|
index.append(item)
|
||||||
return index
|
return index
|
||||||
|
|
||||||
def _images(self, manifest, image_hrefs):
|
def _images(self, manifest, image_hrefs):
|
||||||
|
@ -79,6 +79,16 @@ class PMLMLizer(object):
|
|||||||
self.log.info('Converting XHTML to PML markup...')
|
self.log.info('Converting XHTML to PML markup...')
|
||||||
self.oeb_book = oeb_book
|
self.oeb_book = oeb_book
|
||||||
self.opts = opts
|
self.opts = opts
|
||||||
|
|
||||||
|
# This is used for adding \CX tags chapter markers. This is separate
|
||||||
|
# from the optional inline toc.
|
||||||
|
self.toc = {}
|
||||||
|
for item in oeb_book.toc:
|
||||||
|
page, mid, id = item.href.partition('#')
|
||||||
|
if not self.toc.get(page, None):
|
||||||
|
self.toc[page] = {}
|
||||||
|
self.toc[page][id] = item.title
|
||||||
|
|
||||||
return self.pmlmlize_spine()
|
return self.pmlmlize_spine()
|
||||||
|
|
||||||
def pmlmlize_spine(self):
|
def pmlmlize_spine(self):
|
||||||
@ -107,7 +117,11 @@ class PMLMLizer(object):
|
|||||||
return output
|
return output
|
||||||
|
|
||||||
def get_toc(self):
|
def get_toc(self):
|
||||||
toc = [u'']
|
'''
|
||||||
|
Generation of inline TOC
|
||||||
|
'''
|
||||||
|
|
||||||
|
toc = []
|
||||||
if self.opts.inline_toc:
|
if self.opts.inline_toc:
|
||||||
self.log.debug('Generating table of contents...')
|
self.log.debug('Generating table of contents...')
|
||||||
toc.append(u'\\X0%s\\X0\n\n' % _('Table of Contents:'))
|
toc.append(u'\\X0%s\\X0\n\n' % _('Table of Contents:'))
|
||||||
@ -177,14 +191,14 @@ class PMLMLizer(object):
|
|||||||
def dump_text(self, elem, stylizer, page, tag_stack=[]):
|
def dump_text(self, elem, stylizer, page, tag_stack=[]):
|
||||||
if not isinstance(elem.tag, basestring) \
|
if not isinstance(elem.tag, basestring) \
|
||||||
or namespace(elem.tag) != XHTML_NS:
|
or namespace(elem.tag) != XHTML_NS:
|
||||||
return [u'']
|
return []
|
||||||
|
|
||||||
text = [u'']
|
text = []
|
||||||
style = stylizer.style(elem)
|
style = stylizer.style(elem)
|
||||||
|
|
||||||
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
|
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
|
||||||
or style['visibility'] == 'hidden':
|
or style['visibility'] == 'hidden':
|
||||||
return [u'']
|
return []
|
||||||
|
|
||||||
tag = barename(elem.tag)
|
tag = barename(elem.tag)
|
||||||
tag_count = 0
|
tag_count = 0
|
||||||
@ -213,6 +227,12 @@ class PMLMLizer(object):
|
|||||||
else:
|
else:
|
||||||
w += '="50%"'
|
w += '="50%"'
|
||||||
text.append(w)
|
text.append(w)
|
||||||
|
toc_id = elem.attrib.get('id', None)
|
||||||
|
if toc_id:
|
||||||
|
if self.toc.get(page.href, None):
|
||||||
|
toc_title = self.toc[page.href].get(toc_id, None)
|
||||||
|
if toc_title:
|
||||||
|
text.append('\\C1="%s"' % toc_title)
|
||||||
|
|
||||||
# Process style information that needs holds a single tag
|
# Process style information that needs holds a single tag
|
||||||
# Commented out because every page in an OEB book starts with this style
|
# Commented out because every page in an OEB book starts with this style
|
||||||
@ -287,4 +307,3 @@ class PMLMLizer(object):
|
|||||||
if tag != 'block':
|
if tag != 'block':
|
||||||
text.append('\\%s' % tag)
|
text.append('\\%s' % tag)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user