PML input plugin.

This commit is contained in:
John Schember 2009-05-02 09:45:51 -04:00
parent 9787215d55
commit db159de066
4 changed files with 106 additions and 2 deletions

View File

@ -292,6 +292,7 @@ from calibre.ebooks.oeb.output import OEBOutput
from calibre.ebooks.epub.output import EPUBOutput
from calibre.ebooks.txt.output import TXTOutput
from calibre.ebooks.pdf.output import PDFOutput
from calibre.ebooks.pml.input import PMLInput
from calibre.customize.profiles import input_profiles, output_profiles
from calibre.devices.prs500.driver import PRS500
@ -306,7 +307,7 @@ from calibre.devices.jetbook.driver import JETBOOK
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, ComicInput,
FB2Input, ODTInput, RTFInput, EPUBOutput, RecipeInput]
FB2Input, ODTInput, RTFInput, EPUBOutput, RecipeInput, PMLInput]
plugins += [PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2, BLACKBERRY, EB600, \
JETBOOK]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \

View File

View File

@ -0,0 +1,99 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import glob, os, shutil
from calibre.customize.conversion import InputFormatPlugin
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.zipfile import ZipFile
from calibre.ebooks.pml.pmlconverter import pml_to_html
from calibre.ebooks.metadata.opf2 import OPFCreator
class PMLInput(InputFormatPlugin):
name = 'PML Input'
author = 'John Schember'
description = 'Convert PML to OEB'
# pmlz is a zip file containing pml files and png images.
file_types = set(['pml', 'pmlz'])
def process_pml(self, pml_path, html_path):
pclose = False
hclose = False
if not hasattr(pml_path, 'read'):
pml_stream = open(pml_path, 'rb')
pclose = True
else:
pml_stream = pml_path
if not hasattr(html_path, 'write'):
html_stream = open(html_path, 'wb')
hclose = True
else:
html_stream = html_path
ienc = pml_stream.encoding if pml_stream.encoding else 'utf-8'
if self.options.input_encoding:
ienc = self.options.input_encoding
html = pml_to_html(pml_stream.read().decode(ienc))
html_stream.write('<html><head><title /></head><body>' + html + '</body></html>')
if pclose:
pml_stream.close()
if hclose:
html_stream.close()
def convert(self, stream, options, file_ext, log,
accelerators):
self.options = options
pages, images = [], []
if file_ext == 'pmlz':
with TemporaryDirectory('_unpmlz') as tdir:
zf = ZipFile(stream)
zf.extractall(tdir)
pmls = glob.glob(os.path.join(tdir, '*.pml'))
for pml in pmls:
html_name = os.path.splitext(os.path.basename(pml))[0]+'.html'
html_path = os.path.join(os.getcwd(), html_name)
pages.append(html_name)
self.process_pml(pml, html_path)
imgs = glob.glob(os.path.join(tdir, '*.png'))
for img in imgs:
pimg_name = os.path.basename(img)
pimg_path = os.path.join(os.getcwd(), pimg_name)
images.append(pimg_name)
shutil.move(img, pimg_path)
else:
self.process_pml(stream, 'index.html')
pages.append('index.html')
images = []
# We want pages to be orded alphabetically.
pages.sort()
manifest_items = []
for item in pages+images:
manifest_items.append((item, None))
from calibre.ebooks.metadata.meta import get_metadata
mi = get_metadata(stream, 'pml')
opf = OPFCreator(os.getcwd(), mi)
opf.create_manifest(manifest_items)
opf.create_spine(pages)
with open('metadata.opf', 'wb') as opffile:
opf.render(opffile)
return os.path.join(os.getcwd(), 'metadata.opf')

View File

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import with_statement
'''
Convert pml markup to and from html
'''
@ -47,6 +47,10 @@ PML_HTML_RULES = [
(re.compile(r'\\Sd="(?P<target>.+?)"(?P<text>.+?)\\Sd'), lambda match: '<a href="#sidebar-%s">%s</a>' % (match.group('target'), match.group('text'))),
(re.compile(r'\\I'), lambda match: ''),
# Sidebar and Footnotes
(re.compile(r'<sidebar\s+id="(?P<target>.+?)">\s*(?P<text>.+?)\s*</sidebar>', re.DOTALL), lambda match: '<div id="sidebar-%s">%s</div>' % (match.group('target'), match.group('text'))),
(re.compile(r'<footnote\s+id="(?P<target>.+?)">\s*(?P<text>.+?)\s*</footnote>', re.DOTALL), lambda match: '<div id="footnote-%s">%s</div>' % (match.group('target'), match.group('text'))),
# eReader files are one paragraph per line.
# This forces the lines to wrap properly.
(re.compile('^(?P<text>.+)$', re.MULTILINE), lambda match: '<p>%s</p>' % match.group('text')),