eReader metadata: extract cover and add a plugin to automatically convert PML to PMLZ on import

This commit is contained in:
Kovid Goyal 2009-12-04 08:39:29 -07:00
parent aff0778be4
commit 65e7a184ec
4 changed files with 88 additions and 23 deletions

View File

@ -1,3 +1,4 @@
import os.path
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
@ -49,6 +50,43 @@ every time you add an HTML file to the library.\
'include: cp1252, latin1, iso-8859-1 and utf-8.') 'include: cp1252, latin1, iso-8859-1 and utf-8.')
class PML2PMLZ(FileTypePlugin):
name = 'PML to PMLZ'
author = 'John Schember'
description = textwrap.dedent(_('''\
Create a PMLZ archive containing the PML file \
and all images in the directory pmlname_img or images \
file containing all linked files. This plugin is run \
every time you add an PML file to the library.\
'''))
version = numeric_version
file_types = set(['pml'])
supported_platforms = ['windows', 'osx', 'linux']
on_import = True
def run(self, pmlfile):
import zipfile
from calibre.ptempfile import TemporaryDirectory
with TemporaryDirectory('_plugin_pml2pmlz') as tdir:
name = os.path.join(tdir, '_plugin_pml2pmlz.pmlz')
pmlz = zipfile.ZipFile(name, 'w')
pmlz.write(pmlfile)
pml_img = os.path.basename(pmlfile)[0] + '_img'
img_dir = pml_img if os.path.exists(pml_img) else 'images' if \
os.path.exists('images') else ''
if img_dir:
for image in glob.glob(os.path.join(img_dir, '*.png')):
pmlz.write(image)
pmlz.close()
return name
def customization_help(self, gui=False):
return _('Character encoding for the input PML files. Should ways be: cp1252.')
class ComicMetadataReader(MetadataReaderPlugin): class ComicMetadataReader(MetadataReaderPlugin):
name = 'Read comic metadata' name = 'Read comic metadata'
@ -387,7 +425,7 @@ from calibre.devices.nuut2.driver import NUUT2
from calibre.devices.iriver.driver import IRIVER_STORY from calibre.devices.iriver.driver import IRIVER_STORY
from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon
plugins = [HTML2ZIP, GoogleBooks, ISBNDB, Amazon] plugins = [HTML2ZIP, PML2PMLZ, GoogleBooks, ISBNDB, Amazon]
plugins += [ plugins += [
ComicInput, ComicInput,
EPUBInput, EPUBInput,

View File

@ -16,6 +16,18 @@ from calibre.ebooks.pdb.ereader.reader132 import HeaderRecord
from calibre.ebooks.pdb.header import PdbHeaderBuilder from calibre.ebooks.pdb.header import PdbHeaderBuilder
from calibre.ebooks.pdb.header import PdbHeaderReader from calibre.ebooks.pdb.header import PdbHeaderReader
def get_cover(pheader, eheader):
cover_data = None
for i in range(eheader.image_count):
raw = pheader.section_data(eheader.image_data_offset + i)
if raw[4:4 + 32].strip('\x00') == 'cover.png':
cover_data = raw[62:]
break
return ('png', cover_data)
def get_metadata(stream, extract_cover=True): def get_metadata(stream, extract_cover=True):
""" """
Return metadata as a L{MetaInfo} object Return metadata as a L{MetaInfo} object
@ -29,7 +41,7 @@ def get_metadata(stream, extract_cover=True):
if len(pheader.section_data(0)) == 132: if len(pheader.section_data(0)) == 132:
hr = HeaderRecord(pheader.section_data(0)) hr = HeaderRecord(pheader.section_data(0))
if hr.version in (2, 10) and hr.has_metadata == 1: if hr.compression in (2, 10) and hr.has_metadata == 1:
try: try:
mdata = pheader.section_data(hr.metadata_offset) mdata = pheader.section_data(hr.metadata_offset)
@ -41,6 +53,9 @@ def get_metadata(stream, extract_cover=True):
except: except:
pass pass
if extract_cover:
mi.cover_data = get_cover(pheader, hr)
if not mi.title: if not mi.title:
mi.title = pheader.title if pheader.title else _('Unknown') mi.title = pheader.title if pheader.title else _('Unknown')
@ -56,7 +71,7 @@ def set_metadata(stream, mi):
sections = [pheader.section_data(x) for x in range(0, pheader.section_count())] sections = [pheader.section_data(x) for x in range(0, pheader.section_count())]
hr = HeaderRecord(sections[0]) hr = HeaderRecord(sections[0])
if hr.version not in (2, 10): if hr.compression not in (2, 10):
return return
# Create a metadata record for the file if one does not alreay exist # Create a metadata record for the file if one does not alreay exist

View File

@ -29,12 +29,19 @@ class HeaderRecord(object):
''' '''
def __init__(self, raw): def __init__(self, raw):
self.version, = struct.unpack('>H', raw[0:2]) self.compression, = struct.unpack('>H', raw[0:2])
self.non_text_offset, = struct.unpack('>H', raw[12:14]) self.non_text_offset, = struct.unpack('>H', raw[12:14])
self.chapter_count, = struct.unpack('>H', raw[14:16])
self.image_count, = struct.unpack('>H', raw[20:22])
self.link_count, = struct.unpack('>H', raw[22:24])
self.has_metadata, = struct.unpack('>H', raw[24:26]) self.has_metadata, = struct.unpack('>H', raw[24:26])
self.footnote_rec, = struct.unpack('>H', raw[28:30]) self.footnote_count, = struct.unpack('>H', raw[28:30])
self.sidebar_rec, = struct.unpack('>H', raw[30:32]) self.sidebar_count, = struct.unpack('>H', raw[30:32])
self.chapter_offset, = struct.unpack('>H', raw[32:34])
self.small_font_page_offset, = struct.unpack('>H', raw[36:38])
self.large_font_page_offset, = struct.unpack('>H', raw[38:40])
self.image_data_offset, = struct.unpack('>H', raw[40:42]) self.image_data_offset, = struct.unpack('>H', raw[40:42])
self.link_offset, = struct.unpack('>H', raw[42:44])
self.metadata_offset, = struct.unpack('>H', raw[44:46]) self.metadata_offset, = struct.unpack('>H', raw[44:46])
self.footnote_offset, = struct.unpack('>H', raw[48:50]) self.footnote_offset, = struct.unpack('>H', raw[48:50])
self.sidebar_offset, = struct.unpack('>H', raw[50:52]) self.sidebar_offset, = struct.unpack('>H', raw[50:52])
@ -58,11 +65,11 @@ class Reader132(FormatReader):
self.header_record = HeaderRecord(self.section_data(0)) self.header_record = HeaderRecord(self.section_data(0))
if self.header_record.version not in (2, 10): if self.header_record.compression not in (2, 10):
if self.header_record.version in (260, 272): if self.header_record.compression in (260, 272):
raise DRMError('eReader DRM is not supported.') raise DRMError('eReader DRM is not supported.')
else: else:
raise EreaderError('Unknown book version %i.' % self.header_record.version) raise EreaderError('Unknown book compression %i.' % self.header_record.compression)
from calibre.ebooks.metadata.pdb import get_metadata from calibre.ebooks.metadata.pdb import get_metadata
self.mi = get_metadata(stream, False) self.mi = get_metadata(stream, False)
@ -71,9 +78,9 @@ class Reader132(FormatReader):
return self.sections[number] return self.sections[number]
def decompress_text(self, number): def decompress_text(self, number):
if self.header_record.version == 2: if self.header_record.compression == 2:
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace') return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
if self.header_record.version == 10: if self.header_record.compression == 10:
return zlib.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace') return zlib.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
def get_image(self, number): def get_image(self, number):
@ -115,19 +122,19 @@ class Reader132(FormatReader):
pml += self.get_text_page(i) pml += self.get_text_page(i)
html += pml_to_html(pml) html += pml_to_html(pml)
if self.header_record.footnote_rec > 0: if self.header_record.footnote_count > 0:
html += '<br /><h1>%s</h1>' % _('Footnotes') html += '<br /><h1>%s</h1>' % _('Footnotes')
footnoteids = re.findall('\w+(?=\x00)', self.section_data(self.header_record.footnote_offset).decode('cp1252' if self.encoding is None else self.encoding)) footnoteids = re.findall('\w+(?=\x00)', self.section_data(self.header_record.footnote_offset).decode('cp1252' if self.encoding is None else self.encoding))
for fid, i in enumerate(range(self.header_record.footnote_offset + 1, self.header_record.footnote_offset + self.header_record.footnote_rec)): for fid, i in enumerate(range(self.header_record.footnote_offset + 1, self.header_record.footnote_offset + self.header_record.footnote_count)):
self.log.debug('Extracting footnote page %i' % i) self.log.debug('Extracting footnote page %i' % i)
html += '<dl>' html += '<dl>'
html += footnote_sidebar_to_html(footnoteids[fid], self.decompress_text(i)) html += footnote_sidebar_to_html(footnoteids[fid], self.decompress_text(i))
html += '</dl>' html += '</dl>'
if self.header_record.sidebar_rec > 0: if self.header_record.sidebar_count > 0:
html += '<br /><h1>%s</h1>' % _('Sidebar') html += '<br /><h1>%s</h1>' % _('Sidebar')
sidebarids = re.findall('\w+(?=\x00)', self.section_data(self.header_record.sidebar_offset).decode('cp1252' if self.encoding is None else self.encoding)) sidebarids = re.findall('\w+(?=\x00)', self.section_data(self.header_record.sidebar_offset).decode('cp1252' if self.encoding is None else self.encoding))
for sid, i in enumerate(range(self.header_record.sidebar_offset + 1, self.header_record.sidebar_offset + self.header_record.sidebar_rec)): for sid, i in enumerate(range(self.header_record.sidebar_offset + 1, self.header_record.sidebar_offset + self.header_record.sidebar_count)):
self.log.debug('Extracting sidebar page %i' % i) self.log.debug('Extracting sidebar page %i' % i)
html += '<dl>' html += '<dl>'
html += footnote_sidebar_to_html(sidebarids[sid], self.decompress_text(i)) html += footnote_sidebar_to_html(sidebarids[sid], self.decompress_text(i))
@ -157,12 +164,15 @@ class Reader132(FormatReader):
def create_opf(self, output_dir, images): def create_opf(self, output_dir, images):
with CurrentDir(output_dir): with CurrentDir(output_dir):
if 'cover.png' in images:
self.mi.cover = os.path.join('images', 'cover.png')
opf = OPFCreator(output_dir, self.mi) opf = OPFCreator(output_dir, self.mi)
manifest = [('index.html', None)] manifest = [('index.html', None)]
for i in images: for i in images:
manifest.append((os.path.join('images/', i), None)) manifest.append((os.path.join('images', i), None))
opf.create_manifest(manifest) opf.create_manifest(manifest)
opf.create_spine(['index.html']) opf.create_spine(['index.html'])

View File

@ -123,6 +123,8 @@ class PMLInput(InputFormatPlugin):
from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.meta import get_metadata
log.debug('Reading metadata from input file...') log.debug('Reading metadata from input file...')
mi = get_metadata(stream, 'pml') mi = get_metadata(stream, 'pml')
if 'images/cover.png' in images:
mi.cover = 'images/cover.png'
opf = OPFCreator(os.getcwd(), mi) opf = OPFCreator(os.getcwd(), mi)
log.debug('Generating manifest...') log.debug('Generating manifest...')
opf.create_manifest(manifest_items) opf.create_manifest(manifest_items)