diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 562200c6e2..45a9bd29d1 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -197,6 +197,17 @@ class PDFMetadataReader(MetadataReaderPlugin): return get_quick_metadata(stream) return get_metadata(stream) +class PMLMetadataReader(MetadataReaderPlugin): + + name = 'Read PML metadata' + file_types = set(['pml', 'pmlz']) + description = _('Read metadata from %s files') % 'PML' + author = 'John Schember' + + def get_metadata(self, stream, ftype): + from calibre.ebooks.metadata.pml import get_metadata + return get_metadata(stream) + class RARMetadataReader(MetadataReaderPlugin): name = 'Read RAR metadata' diff --git a/src/calibre/ebooks/metadata/pml.py b/src/calibre/ebooks/metadata/pml.py new file mode 100644 index 0000000000..57ca29172a --- /dev/null +++ b/src/calibre/ebooks/metadata/pml.py @@ -0,0 +1,53 @@ +'''Read meta information from TXT files''' + +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2009, John Schember ' + +import os +import glob +import re + +from calibre.ebooks.metadata import MetaInformation +from calibre.ptempfile import TemporaryDirectory +from calibre.utils.zipfile import ZipFile + +def get_metadata(stream, extract_cover=True): + """ Return metadata as a L{MetaInfo} object """ + mi = MetaInformation(_('Unknown'), [_('Unknown')]) + stream.seek(0) + + pml = '' + if stream.name.endswith('.pmlz'): + with TemporaryDirectory('_unpmlz') as tdir: + zf = ZipFile(stream) + zf.extractall(tdir) + + pmls = glob.glob(os.path.join(tdir, '*.pml')) + for p in pmls: + with open(p, 'r+b') as p_stream: + pml += p_stream.read() + else: + pml = stream.read() + + for comment in re.findall(r'(?mus)\\v.*?\\v', pml): + m = re.search(r'TITLE="(.*?)"', comment) + if m: + mi.title = m.group(1).strip().decode('cp1252', 'replace') + m = re.search(r'AUTHOR="(.*?)"', comment) + if m: + if mi.authors == [_('Unknown')]: + mi.authors = [] + mi.authors.append(m.group(1).strip().decode('cp1252', 'replace')) + m = re.search(r'PUBLISHER="(.*?)"', comment) + if m: + mi.publisher = m.group(1).strip().decode('cp1252', 'replace') + m = re.search(r'COPYRIGHT="(.*?)"', comment) + if m: + mi.rights = m.group(1).strip().decode('cp1252', 'replace') + m = re.search(r'ISBN="(.*?)"', comment) + if m: + mi.isbn = m.group(1).strip().decode('cp1252', 'replace') + + return mi diff --git a/src/calibre/ebooks/pml/input.py b/src/calibre/ebooks/pml/input.py index f2d00742ba..2475e40c34 100644 --- a/src/calibre/ebooks/pml/input.py +++ b/src/calibre/ebooks/pml/input.py @@ -31,6 +31,7 @@ class PMLInput(InputFormatPlugin): pclose = True else: pml_stream = pml_path + pml_stream.seek(0) if not hasattr(html_path, 'write'): html_stream = open(html_path, 'wb') @@ -38,7 +39,7 @@ class PMLInput(InputFormatPlugin): else: html_stream = html_path - ienc = pml_stream.encoding if pml_stream.encoding else 'utf-8' + ienc = pml_stream.encoding if pml_stream.encoding else 'cp1252' if self.options.input_encoding: ienc = self.options.input_encoding