PML metadata reader

2025-07-09 03:04:10 -04:00 · 2009-11-13 15:56:20 -07:00 · 2009-11-13 15:56:20 -07:00 · 8293b5008d
commit 8293b5008d
parent f4d821f3f6
3 changed files with 66 additions and 1 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -197,6 +197,17 @@ class PDFMetadataReader(MetadataReaderPlugin):
            return get_quick_metadata(stream)
        return get_metadata(stream)
 class PMLMetadataReader(MetadataReaderPlugin):
    name        = 'Read PML metadata'
    file_types  = set(['pml', 'pmlz'])
    description = _('Read metadata from %s files') % 'PML'
    author      = 'John Schember'
    def get_metadata(self, stream, ftype):
        from calibre.ebooks.metadata.pml import get_metadata
        return get_metadata(stream)
 class RARMetadataReader(MetadataReaderPlugin):
    name = 'Read RAR metadata'
--- a/src/calibre/ebooks/metadata/pml.py
+++ b/src/calibre/ebooks/metadata/pml.py
@ -0,0 +1,53 @@
 '''Read meta information from TXT files'''
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 import os
 import glob
 import re
 from calibre.ebooks.metadata import MetaInformation
 from calibre.ptempfile import TemporaryDirectory
 from calibre.utils.zipfile import ZipFile
 def get_metadata(stream, extract_cover=True):
    """ Return metadata as a L{MetaInfo} object """
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    stream.seek(0)
    pml = ''
    if stream.name.endswith('.pmlz'):
        with TemporaryDirectory('_unpmlz') as tdir:
            zf = ZipFile(stream)
            zf.extractall(tdir)
            pmls = glob.glob(os.path.join(tdir, '*.pml'))
            for p in pmls:
                with open(p, 'r+b') as p_stream:
                    pml += p_stream.read()
    else:
        pml = stream.read()
    for comment in re.findall(r'(?mus)\\v.*?\\v', pml):
        m = re.search(r'TITLE="(.*?)"', comment)
        if m:
            mi.title = m.group(1).strip().decode('cp1252', 'replace')
        m = re.search(r'AUTHOR="(.*?)"', comment)
        if m:
            if mi.authors == [_('Unknown')]:
                mi.authors = []
            mi.authors.append(m.group(1).strip().decode('cp1252', 'replace'))
        m = re.search(r'PUBLISHER="(.*?)"', comment)
        if m:
            mi.publisher = m.group(1).strip().decode('cp1252', 'replace')
        m = re.search(r'COPYRIGHT="(.*?)"', comment)
        if m:
            mi.rights = m.group(1).strip().decode('cp1252', 'replace')
        m = re.search(r'ISBN="(.*?)"', comment)
        if m:
            mi.isbn = m.group(1).strip().decode('cp1252', 'replace')
    return mi
--- a/src/calibre/ebooks/pml/input.py
+++ b/src/calibre/ebooks/pml/input.py
@ -31,6 +31,7 @@ class PMLInput(InputFormatPlugin):
            pclose = True
        else:
            pml_stream = pml_path
            pml_stream.seek(0)
        if not hasattr(html_path, 'write'):
            html_stream = open(html_path, 'wb')
@ -38,7 +39,7 @@ class PMLInput(InputFormatPlugin):
        else:
            html_stream = html_path
-        ienc = pml_stream.encoding if pml_stream.encoding else 'utf-8'
+        ienc = pml_stream.encoding if pml_stream.encoding else 'cp1252'
        if self.options.input_encoding:
            ienc = self.options.input_encoding