diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index d284beca3b..e68b6b80a8 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -292,6 +292,7 @@ from calibre.ebooks.oeb.output import OEBOutput from calibre.ebooks.epub.output import EPUBOutput from calibre.ebooks.txt.output import TXTOutput from calibre.ebooks.pdf.output import PDFOutput +from calibre.ebooks.pml.input import PMLInput from calibre.customize.profiles import input_profiles, output_profiles from calibre.devices.prs500.driver import PRS500 @@ -306,7 +307,7 @@ from calibre.devices.jetbook.driver import JETBOOK plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput, TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, ComicInput, - FB2Input, ODTInput, RTFInput, EPUBOutput, RecipeInput] + FB2Input, ODTInput, RTFInput, EPUBOutput, RecipeInput, PMLInput] plugins += [PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2, BLACKBERRY, EB600, \ JETBOOK] plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ diff --git a/src/calibre/ebooks/pml/__init__.py b/src/calibre/ebooks/pml/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/calibre/ebooks/pml/input.py b/src/calibre/ebooks/pml/input.py new file mode 100644 index 0000000000..36a9e3b526 --- /dev/null +++ b/src/calibre/ebooks/pml/input.py @@ -0,0 +1,99 @@ +# -*- coding: utf-8 -*- + +__license__ = 'GPL v3' +__copyright__ = '2009, John Schember ' +__docformat__ = 'restructuredtext en' + +import glob, os, shutil + +from calibre.customize.conversion import InputFormatPlugin +from calibre.ptempfile import TemporaryDirectory +from calibre.utils.zipfile import ZipFile +from calibre.ebooks.pml.pmlconverter import pml_to_html +from calibre.ebooks.metadata.opf2 import OPFCreator + +class PMLInput(InputFormatPlugin): + + name = 'PML Input' + author = 'John Schember' + description = 'Convert PML to OEB' + # pmlz is a zip file containing pml files and png images. + file_types = set(['pml', 'pmlz']) + + def process_pml(self, pml_path, html_path): + pclose = False + hclose = False + + if not hasattr(pml_path, 'read'): + pml_stream = open(pml_path, 'rb') + pclose = True + else: + pml_stream = pml_path + + if not hasattr(html_path, 'write'): + html_stream = open(html_path, 'wb') + hclose = True + else: + html_stream = html_path + + ienc = pml_stream.encoding if pml_stream.encoding else 'utf-8' + if self.options.input_encoding: + ienc = self.options.input_encoding + + html = pml_to_html(pml_stream.read().decode(ienc)) + html_stream.write('</head><body>' + html + '</body></html>') + + if pclose: + pml_stream.close() + if hclose: + html_stream.close() + + def convert(self, stream, options, file_ext, log, + accelerators): + self.options = options + pages, images = [], [] + + if file_ext == 'pmlz': + with TemporaryDirectory('_unpmlz') as tdir: + zf = ZipFile(stream) + zf.extractall(tdir) + + pmls = glob.glob(os.path.join(tdir, '*.pml')) + for pml in pmls: + html_name = os.path.splitext(os.path.basename(pml))[0]+'.html' + html_path = os.path.join(os.getcwd(), html_name) + + pages.append(html_name) + self.process_pml(pml, html_path) + + imgs = glob.glob(os.path.join(tdir, '*.png')) + for img in imgs: + pimg_name = os.path.basename(img) + pimg_path = os.path.join(os.getcwd(), pimg_name) + + images.append(pimg_name) + + shutil.move(img, pimg_path) + else: + self.process_pml(stream, 'index.html') + + pages.append('index.html') + images = [] + + # We want pages to be orded alphabetically. + pages.sort() + + manifest_items = [] + for item in pages+images: + manifest_items.append((item, None)) + + from calibre.ebooks.metadata.meta import get_metadata + mi = get_metadata(stream, 'pml') + opf = OPFCreator(os.getcwd(), mi) + opf.create_manifest(manifest_items) + opf.create_spine(pages) + with open('metadata.opf', 'wb') as opffile: + opf.render(opffile) + + return os.path.join(os.getcwd(), 'metadata.opf') + diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py index 391f70a504..14a6280338 100644 --- a/src/calibre/ebooks/pml/pmlconverter.py +++ b/src/calibre/ebooks/pml/pmlconverter.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -from __future__ import with_statement + ''' Convert pml markup to and from html ''' @@ -47,6 +47,10 @@ PML_HTML_RULES = [ (re.compile(r'\\Sd="(?P<target>.+?)"(?P<text>.+?)\\Sd'), lambda match: '<a href="#sidebar-%s">%s</a>' % (match.group('target'), match.group('text'))), (re.compile(r'\\I'), lambda match: ''), + # Sidebar and Footnotes + (re.compile(r'<sidebar\s+id="(?P<target>.+?)">\s*(?P<text>.+?)\s*</sidebar>', re.DOTALL), lambda match: '<div id="sidebar-%s">%s</div>' % (match.group('target'), match.group('text'))), + (re.compile(r'<footnote\s+id="(?P<target>.+?)">\s*(?P<text>.+?)\s*</footnote>', re.DOTALL), lambda match: '<div id="footnote-%s">%s</div>' % (match.group('target'), match.group('text'))), + # eReader files are one paragraph per line. # This forces the lines to wrap properly. (re.compile('^(?P<text>.+)$', re.MULTILINE), lambda match: '<p>%s</p>' % match.group('text')),