PML input plugin.

2025-07-07 18:24:30 -04:00 · 2009-05-02 09:45:51 -04:00 · 2009-05-02 09:45:51 -04:00 · db159de066
commit db159de066
parent 9787215d55
4 changed files with 106 additions and 2 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -292,6 +292,7 @@ from calibre.ebooks.oeb.output import OEBOutput
 from calibre.ebooks.epub.output import EPUBOutput
 from calibre.ebooks.txt.output import TXTOutput
 from calibre.ebooks.pdf.output import PDFOutput
 from calibre.ebooks.pml.input import PMLInput
 from calibre.customize.profiles import input_profiles, output_profiles
 from calibre.devices.prs500.driver import PRS500
@ -306,7 +307,7 @@ from calibre.devices.jetbook.driver import JETBOOK
 plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
        TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, ComicInput,
-        FB2Input, ODTInput, RTFInput, EPUBOutput, RecipeInput]
+        FB2Input, ODTInput, RTFInput, EPUBOutput, RecipeInput, PMLInput]
 plugins += [PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2, BLACKBERRY, EB600, \
        JETBOOK]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
--- a/src/calibre/ebooks/pml/init.py
+++ b/src/calibre/ebooks/pml/init.py
--- a/src/calibre/ebooks/pml/input.py
+++ b/src/calibre/ebooks/pml/input.py
@ -0,0 +1,99 @@
 # -*- coding: utf-8 -*-
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 import glob, os, shutil
 from calibre.customize.conversion import InputFormatPlugin
 from calibre.ptempfile import TemporaryDirectory
 from calibre.utils.zipfile import ZipFile
 from calibre.ebooks.pml.pmlconverter import pml_to_html
 from calibre.ebooks.metadata.opf2 import OPFCreator
 class PMLInput(InputFormatPlugin):
    name        = 'PML Input'
    author      = 'John Schember'
    description = 'Convert PML to OEB'
    # pmlz is a zip file containing pml files and png images.
    file_types  = set(['pml', 'pmlz'])
    def process_pml(self, pml_path, html_path):
        pclose = False
        hclose = False
        if not hasattr(pml_path, 'read'):
            pml_stream = open(pml_path, 'rb')
            pclose = True
        else:
            pml_stream = pml_path
        if not hasattr(html_path, 'write'):
            html_stream = open(html_path, 'wb')
            hclose = True
        else:
            html_stream = html_path
        ienc = pml_stream.encoding if pml_stream.encoding else 'utf-8'
        if self.options.input_encoding:
            ienc = self.options.input_encoding
        html = pml_to_html(pml_stream.read().decode(ienc)) 
        html_stream.write('<html><head><title /></head><body>' + html + '</body></html>')
        if pclose:
            pml_stream.close()
        if hclose:
            html_stream.close()
    def convert(self, stream, options, file_ext, log,
                accelerators):
        self.options = options
        pages, images = [], []
        if file_ext == 'pmlz':
            with TemporaryDirectory('_unpmlz') as tdir:
                zf = ZipFile(stream)
                zf.extractall(tdir)
                pmls = glob.glob(os.path.join(tdir, '*.pml'))
                for pml in pmls:
                    html_name = os.path.splitext(os.path.basename(pml))[0]+'.html'
                    html_path = os.path.join(os.getcwd(), html_name)
                    pages.append(html_name)                
                    self.process_pml(pml, html_path)
                imgs = glob.glob(os.path.join(tdir, '*.png'))
                for img in imgs:
                    pimg_name = os.path.basename(img)
                    pimg_path = os.path.join(os.getcwd(), pimg_name)
                    images.append(pimg_name)
                    shutil.move(img, pimg_path)
        else:
            self.process_pml(stream, 'index.html')
            pages.append('index.html')
            images = []
        # We want pages to be orded alphabetically.
        pages.sort()
        manifest_items = []
        for item in pages+images:
            manifest_items.append((item, None))
        from calibre.ebooks.metadata.meta import get_metadata
        mi = get_metadata(stream, 'pml')
        opf = OPFCreator(os.getcwd(), mi)
        opf.create_manifest(manifest_items)
        opf.create_spine(pages)
        with open('metadata.opf', 'wb') as opffile:
            opf.render(opffile)
        return os.path.join(os.getcwd(), 'metadata.opf')
--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-from __future__ import with_statement
+
 '''
 Convert pml markup to and from html
 '''
@ -47,6 +47,10 @@ PML_HTML_RULES = [
    (re.compile(r'\\Sd="(?P<target>.+?)"(?P<text>.+?)\\Sd'), lambda match: '<a href="#sidebar-%s">%s</a>' % (match.group('target'), match.group('text'))),
    (re.compile(r'\\I'), lambda match: ''),
    # Sidebar and Footnotes
    (re.compile(r'<sidebar\s+id="(?P<target>.+?)">\s*(?P<text>.+?)\s*</sidebar>', re.DOTALL), lambda match: '<div id="sidebar-%s">%s</div>' % (match.group('target'), match.group('text'))),
    (re.compile(r'<footnote\s+id="(?P<target>.+?)">\s*(?P<text>.+?)\s*</footnote>', re.DOTALL), lambda match: '<div id="footnote-%s">%s</div>' % (match.group('target'), match.group('text'))),
    # eReader files are one paragraph per line.
    # This forces the lines to wrap properly.
    (re.compile('^(?P<text>.+)$', re.MULTILINE), lambda match: '<p>%s</p>' % match.group('text')),