mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
PML metadata reader
This commit is contained in:
parent
f4d821f3f6
commit
8293b5008d
@ -197,6 +197,17 @@ class PDFMetadataReader(MetadataReaderPlugin):
|
|||||||
return get_quick_metadata(stream)
|
return get_quick_metadata(stream)
|
||||||
return get_metadata(stream)
|
return get_metadata(stream)
|
||||||
|
|
||||||
|
class PMLMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
|
name = 'Read PML metadata'
|
||||||
|
file_types = set(['pml', 'pmlz'])
|
||||||
|
description = _('Read metadata from %s files') % 'PML'
|
||||||
|
author = 'John Schember'
|
||||||
|
|
||||||
|
def get_metadata(self, stream, ftype):
|
||||||
|
from calibre.ebooks.metadata.pml import get_metadata
|
||||||
|
return get_metadata(stream)
|
||||||
|
|
||||||
class RARMetadataReader(MetadataReaderPlugin):
|
class RARMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
name = 'Read RAR metadata'
|
name = 'Read RAR metadata'
|
||||||
|
53
src/calibre/ebooks/metadata/pml.py
Normal file
53
src/calibre/ebooks/metadata/pml.py
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
'''Read meta information from TXT files'''
|
||||||
|
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
|
||||||
|
import os
|
||||||
|
import glob
|
||||||
|
import re
|
||||||
|
|
||||||
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
|
from calibre.utils.zipfile import ZipFile
|
||||||
|
|
||||||
|
def get_metadata(stream, extract_cover=True):
|
||||||
|
""" Return metadata as a L{MetaInfo} object """
|
||||||
|
mi = MetaInformation(_('Unknown'), [_('Unknown')])
|
||||||
|
stream.seek(0)
|
||||||
|
|
||||||
|
pml = ''
|
||||||
|
if stream.name.endswith('.pmlz'):
|
||||||
|
with TemporaryDirectory('_unpmlz') as tdir:
|
||||||
|
zf = ZipFile(stream)
|
||||||
|
zf.extractall(tdir)
|
||||||
|
|
||||||
|
pmls = glob.glob(os.path.join(tdir, '*.pml'))
|
||||||
|
for p in pmls:
|
||||||
|
with open(p, 'r+b') as p_stream:
|
||||||
|
pml += p_stream.read()
|
||||||
|
else:
|
||||||
|
pml = stream.read()
|
||||||
|
|
||||||
|
for comment in re.findall(r'(?mus)\\v.*?\\v', pml):
|
||||||
|
m = re.search(r'TITLE="(.*?)"', comment)
|
||||||
|
if m:
|
||||||
|
mi.title = m.group(1).strip().decode('cp1252', 'replace')
|
||||||
|
m = re.search(r'AUTHOR="(.*?)"', comment)
|
||||||
|
if m:
|
||||||
|
if mi.authors == [_('Unknown')]:
|
||||||
|
mi.authors = []
|
||||||
|
mi.authors.append(m.group(1).strip().decode('cp1252', 'replace'))
|
||||||
|
m = re.search(r'PUBLISHER="(.*?)"', comment)
|
||||||
|
if m:
|
||||||
|
mi.publisher = m.group(1).strip().decode('cp1252', 'replace')
|
||||||
|
m = re.search(r'COPYRIGHT="(.*?)"', comment)
|
||||||
|
if m:
|
||||||
|
mi.rights = m.group(1).strip().decode('cp1252', 'replace')
|
||||||
|
m = re.search(r'ISBN="(.*?)"', comment)
|
||||||
|
if m:
|
||||||
|
mi.isbn = m.group(1).strip().decode('cp1252', 'replace')
|
||||||
|
|
||||||
|
return mi
|
@ -31,6 +31,7 @@ class PMLInput(InputFormatPlugin):
|
|||||||
pclose = True
|
pclose = True
|
||||||
else:
|
else:
|
||||||
pml_stream = pml_path
|
pml_stream = pml_path
|
||||||
|
pml_stream.seek(0)
|
||||||
|
|
||||||
if not hasattr(html_path, 'write'):
|
if not hasattr(html_path, 'write'):
|
||||||
html_stream = open(html_path, 'wb')
|
html_stream = open(html_path, 'wb')
|
||||||
@ -38,7 +39,7 @@ class PMLInput(InputFormatPlugin):
|
|||||||
else:
|
else:
|
||||||
html_stream = html_path
|
html_stream = html_path
|
||||||
|
|
||||||
ienc = pml_stream.encoding if pml_stream.encoding else 'utf-8'
|
ienc = pml_stream.encoding if pml_stream.encoding else 'cp1252'
|
||||||
if self.options.input_encoding:
|
if self.options.input_encoding:
|
||||||
ienc = self.options.input_encoding
|
ienc = self.options.input_encoding
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user