mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 18:54:09 -04:00
PML metadata reader
This commit is contained in:
parent
f4d821f3f6
commit
8293b5008d
@ -197,6 +197,17 @@ class PDFMetadataReader(MetadataReaderPlugin):
|
||||
return get_quick_metadata(stream)
|
||||
return get_metadata(stream)
|
||||
|
||||
class PMLMetadataReader(MetadataReaderPlugin):
|
||||
|
||||
name = 'Read PML metadata'
|
||||
file_types = set(['pml', 'pmlz'])
|
||||
description = _('Read metadata from %s files') % 'PML'
|
||||
author = 'John Schember'
|
||||
|
||||
def get_metadata(self, stream, ftype):
|
||||
from calibre.ebooks.metadata.pml import get_metadata
|
||||
return get_metadata(stream)
|
||||
|
||||
class RARMetadataReader(MetadataReaderPlugin):
|
||||
|
||||
name = 'Read RAR metadata'
|
||||
|
53
src/calibre/ebooks/metadata/pml.py
Normal file
53
src/calibre/ebooks/metadata/pml.py
Normal file
@ -0,0 +1,53 @@
|
||||
'''Read meta information from TXT files'''
|
||||
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
|
||||
import os
|
||||
import glob
|
||||
import re
|
||||
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
|
||||
def get_metadata(stream, extract_cover=True):
|
||||
""" Return metadata as a L{MetaInfo} object """
|
||||
mi = MetaInformation(_('Unknown'), [_('Unknown')])
|
||||
stream.seek(0)
|
||||
|
||||
pml = ''
|
||||
if stream.name.endswith('.pmlz'):
|
||||
with TemporaryDirectory('_unpmlz') as tdir:
|
||||
zf = ZipFile(stream)
|
||||
zf.extractall(tdir)
|
||||
|
||||
pmls = glob.glob(os.path.join(tdir, '*.pml'))
|
||||
for p in pmls:
|
||||
with open(p, 'r+b') as p_stream:
|
||||
pml += p_stream.read()
|
||||
else:
|
||||
pml = stream.read()
|
||||
|
||||
for comment in re.findall(r'(?mus)\\v.*?\\v', pml):
|
||||
m = re.search(r'TITLE="(.*?)"', comment)
|
||||
if m:
|
||||
mi.title = m.group(1).strip().decode('cp1252', 'replace')
|
||||
m = re.search(r'AUTHOR="(.*?)"', comment)
|
||||
if m:
|
||||
if mi.authors == [_('Unknown')]:
|
||||
mi.authors = []
|
||||
mi.authors.append(m.group(1).strip().decode('cp1252', 'replace'))
|
||||
m = re.search(r'PUBLISHER="(.*?)"', comment)
|
||||
if m:
|
||||
mi.publisher = m.group(1).strip().decode('cp1252', 'replace')
|
||||
m = re.search(r'COPYRIGHT="(.*?)"', comment)
|
||||
if m:
|
||||
mi.rights = m.group(1).strip().decode('cp1252', 'replace')
|
||||
m = re.search(r'ISBN="(.*?)"', comment)
|
||||
if m:
|
||||
mi.isbn = m.group(1).strip().decode('cp1252', 'replace')
|
||||
|
||||
return mi
|
@ -31,6 +31,7 @@ class PMLInput(InputFormatPlugin):
|
||||
pclose = True
|
||||
else:
|
||||
pml_stream = pml_path
|
||||
pml_stream.seek(0)
|
||||
|
||||
if not hasattr(html_path, 'write'):
|
||||
html_stream = open(html_path, 'wb')
|
||||
@ -38,7 +39,7 @@ class PMLInput(InputFormatPlugin):
|
||||
else:
|
||||
html_stream = html_path
|
||||
|
||||
ienc = pml_stream.encoding if pml_stream.encoding else 'utf-8'
|
||||
ienc = pml_stream.encoding if pml_stream.encoding else 'cp1252'
|
||||
if self.options.input_encoding:
|
||||
ienc = self.options.input_encoding
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user