PML Input: Create Toc from CX tags.

This commit is contained in:
John Schember 2009-11-22 12:00:00 -05:00
parent d396739429
commit 07f9db1b20
2 changed files with 28 additions and 9 deletions

View File

@ -11,7 +11,8 @@ import shutil
from calibre.customize.conversion import InputFormatPlugin from calibre.customize.conversion import InputFormatPlugin
from calibre.ptempfile import TemporaryDirectory from calibre.ptempfile import TemporaryDirectory
from calibre.utils.zipfile import ZipFile from calibre.utils.zipfile import ZipFile
from calibre.ebooks.pml.pmlconverter import pml_to_html from calibre.ebooks.pml.pmlconverter import PML_HTMLizer
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.opf2 import OPFCreator
class PMLInput(InputFormatPlugin): class PMLInput(InputFormatPlugin):
@ -22,7 +23,7 @@ class PMLInput(InputFormatPlugin):
# pmlz is a zip file containing pml files and png images. # pmlz is a zip file containing pml files and png images.
file_types = set(['pml', 'pmlz']) file_types = set(['pml', 'pmlz'])
def process_pml(self, pml_path, html_path): def process_pml(self, pml_path, html_path, close_all=False):
pclose = False pclose = False
hclose = False hclose = False
@ -44,7 +45,8 @@ class PMLInput(InputFormatPlugin):
ienc = self.options.input_encoding ienc = self.options.input_encoding
self.log.debug('Converting PML to HTML...') self.log.debug('Converting PML to HTML...')
html = pml_to_html(pml_stream.read().decode(ienc)) hizer = PML_HTMLizer(close_all)
html = hizer.parse_pml(pml_stream.read().decode(ienc), html_path)
html_stream.write('<html><head><title /></head><body>%s</body></html>' % html.encode('utf-8', 'replace')) html_stream.write('<html><head><title /></head><body>%s</body></html>' % html.encode('utf-8', 'replace'))
if pclose: if pclose:
@ -52,11 +54,14 @@ class PMLInput(InputFormatPlugin):
if hclose: if hclose:
html_stream.close() html_stream.close()
return hizer.get_toc()
def convert(self, stream, options, file_ext, log, def convert(self, stream, options, file_ext, log,
accelerators): accelerators):
self.options = options self.options = options
self.log = log self.log = log
pages, images = [], [] pages, images = [], []
toc = TOC()
if file_ext == 'pmlz': if file_ext == 'pmlz':
log.debug('De-compressing content to temporary directory...') log.debug('De-compressing content to temporary directory...')
@ -71,7 +76,8 @@ class PMLInput(InputFormatPlugin):
pages.append(html_name) pages.append(html_name)
log.debug('Processing PML item %s...' % pml) log.debug('Processing PML item %s...' % pml)
self.process_pml(pml, html_path) ttoc = self.process_pml(pml, html_path)
toc += ttoc
imgs = glob.glob(os.path.join(tdir, '*.png')) imgs = glob.glob(os.path.join(tdir, '*.png'))
if len(imgs) > 0: if len(imgs) > 0:
@ -84,7 +90,7 @@ class PMLInput(InputFormatPlugin):
shutil.move(img, pimg_path) shutil.move(img, pimg_path)
else: else:
self.process_pml(stream, 'index.html') toc = self.process_pml(stream, 'index.html')
pages.append('index.html') pages.append('index.html')
images = [] images = []
@ -103,7 +109,9 @@ class PMLInput(InputFormatPlugin):
log.debug('Generating manifest...') log.debug('Generating manifest...')
opf.create_manifest(manifest_items) opf.create_manifest(manifest_items)
opf.create_spine(pages) opf.create_spine(pages)
opf.set_toc(toc)
with open('metadata.opf', 'wb') as opffile: with open('metadata.opf', 'wb') as opffile:
opf.render(opffile) with open('toc.ncx', 'wb') as tocfile:
opf.render(opffile, tocfile, 'toc.ncx')
return os.path.join(os.getcwd(), 'metadata.opf') return os.path.join(os.getcwd(), 'metadata.opf')

View File

@ -12,6 +12,7 @@ import re
import StringIO import StringIO
from calibre import my_unichr, prepare_string_for_xml from calibre import my_unichr, prepare_string_for_xml
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.pdb.ereader import image_name from calibre.ebooks.pdb.ereader import image_name
class PML_HTMLizer(object): class PML_HTMLizer(object):
@ -118,6 +119,8 @@ class PML_HTMLizer(object):
def __init__(self, close_all): def __init__(self, close_all):
self.close_all = close_all self.close_all = close_all
self.state = {} self.state = {}
self.toc = TOC()
self.file_name = ''
def prepare_pml(self, pml): def prepare_pml(self, pml):
# Remove comments # Remove comments
@ -290,11 +293,14 @@ class PML_HTMLizer(object):
return value.strip() return value.strip()
def parse_pml(self, pml): def parse_pml(self, pml, file_name=''):
pml = self.prepare_pml(pml) pml = self.prepare_pml(pml)
output = [] output = []
self.state = {} self.state = {}
self.toc = TOC()
self.file_name = file_name
for s in self.STATES: for s in self.STATES:
self.state[s] = [False, '']; self.state[s] = [False, ''];
@ -350,8 +356,10 @@ class PML_HTMLizer(object):
text = '<br /><br style="page-break-after: always;" />' text = '<br /><br style="page-break-after: always;" />'
elif c == 'C': elif c == 'C':
# This should be made to create a TOC entry # This should be made to create a TOC entry
line.read(1) l = line.read(1)
self.code_value(line) id = 'pml_toc-%s' % len(self.toc)
self.toc.add_item(self.file_name, id, self.code_value(line))
text = '<span id="%s"></span>' % id
elif c == 'n': elif c == 'n':
pass pass
elif c == 'F': elif c == 'F':
@ -384,6 +392,9 @@ class PML_HTMLizer(object):
return u'\n'.join(output) return u'\n'.join(output)
def get_toc(self):
return self.toc
def pml_to_html(pml, close_all=False): def pml_to_html(pml, close_all=False):
''' '''