PML metadata reader

This commit is contained in:
Kovid Goyal 2009-11-13 15:56:20 -07:00
parent f4d821f3f6
commit 8293b5008d
3 changed files with 66 additions and 1 deletions

View File

@ -197,6 +197,17 @@ class PDFMetadataReader(MetadataReaderPlugin):
return get_quick_metadata(stream) return get_quick_metadata(stream)
return get_metadata(stream) return get_metadata(stream)
class PMLMetadataReader(MetadataReaderPlugin):
name = 'Read PML metadata'
file_types = set(['pml', 'pmlz'])
description = _('Read metadata from %s files') % 'PML'
author = 'John Schember'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.pml import get_metadata
return get_metadata(stream)
class RARMetadataReader(MetadataReaderPlugin): class RARMetadataReader(MetadataReaderPlugin):
name = 'Read RAR metadata' name = 'Read RAR metadata'

View File

@ -0,0 +1,53 @@
'''Read meta information from TXT files'''
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
import os
import glob
import re
from calibre.ebooks.metadata import MetaInformation
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.zipfile import ZipFile
def get_metadata(stream, extract_cover=True):
""" Return metadata as a L{MetaInfo} object """
mi = MetaInformation(_('Unknown'), [_('Unknown')])
stream.seek(0)
pml = ''
if stream.name.endswith('.pmlz'):
with TemporaryDirectory('_unpmlz') as tdir:
zf = ZipFile(stream)
zf.extractall(tdir)
pmls = glob.glob(os.path.join(tdir, '*.pml'))
for p in pmls:
with open(p, 'r+b') as p_stream:
pml += p_stream.read()
else:
pml = stream.read()
for comment in re.findall(r'(?mus)\\v.*?\\v', pml):
m = re.search(r'TITLE="(.*?)"', comment)
if m:
mi.title = m.group(1).strip().decode('cp1252', 'replace')
m = re.search(r'AUTHOR="(.*?)"', comment)
if m:
if mi.authors == [_('Unknown')]:
mi.authors = []
mi.authors.append(m.group(1).strip().decode('cp1252', 'replace'))
m = re.search(r'PUBLISHER="(.*?)"', comment)
if m:
mi.publisher = m.group(1).strip().decode('cp1252', 'replace')
m = re.search(r'COPYRIGHT="(.*?)"', comment)
if m:
mi.rights = m.group(1).strip().decode('cp1252', 'replace')
m = re.search(r'ISBN="(.*?)"', comment)
if m:
mi.isbn = m.group(1).strip().decode('cp1252', 'replace')
return mi

View File

@ -31,6 +31,7 @@ class PMLInput(InputFormatPlugin):
pclose = True pclose = True
else: else:
pml_stream = pml_path pml_stream = pml_path
pml_stream.seek(0)
if not hasattr(html_path, 'write'): if not hasattr(html_path, 'write'):
html_stream = open(html_path, 'wb') html_stream = open(html_path, 'wb')
@ -38,7 +39,7 @@ class PMLInput(InputFormatPlugin):
else: else:
html_stream = html_path html_stream = html_path
ienc = pml_stream.encoding if pml_stream.encoding else 'utf-8' ienc = pml_stream.encoding if pml_stream.encoding else 'cp1252'
if self.options.input_encoding: if self.options.input_encoding:
ienc = self.options.input_encoding ienc = self.options.input_encoding