From d16b8274c6291077d4dadaa99068e1240f8806e2 Mon Sep 17 00:00:00 2001 From: John Schember Date: Sat, 16 May 2009 12:14:32 -0400 Subject: [PATCH] pdb/eReader metadata writer --- src/calibre/customize/builtins.py | 11 ++++++ src/calibre/ebooks/metadata/ereader.py | 47 ++++++++++++++++++++++-- src/calibre/ebooks/metadata/pdb.py | 28 ++++++++++++-- src/calibre/ebooks/pdb/ereader/reader.py | 9 +++-- src/calibre/ebooks/pdb/ereader/writer.py | 6 +-- 5 files changed, 84 insertions(+), 17 deletions(-) diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index bdbe1f9762..8bbccce7c4 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -297,6 +297,17 @@ class PDFMetadataWriter(MetadataWriterPlugin): from calibre.ebooks.metadata.pdf import set_metadata set_metadata(stream, mi) +class PDBMetadataWriter(MetadataWriterPlugin): + + name = 'Set PDB metadata' + file_types = set(['pdb']) + description = _('Set metadata from %s files') % 'PDB' + author = 'John Schember' + + def set_metadata(self, stream, mi, type): + from calibre.ebooks.metadata.pdb import set_metadata + set_metadata(stream, mi) + from calibre.ebooks.epub.input import EPUBInput from calibre.ebooks.mobi.input import MOBIInput diff --git a/src/calibre/ebooks/metadata/ereader.py b/src/calibre/ebooks/metadata/ereader.py index f37ff9ab6d..b1edee10b0 100644 --- a/src/calibre/ebooks/metadata/ereader.py +++ b/src/calibre/ebooks/metadata/ereader.py @@ -10,8 +10,8 @@ __docformat__ = 'restructuredtext en' import re -from calibre.ebooks.metadata import MetaInformation -from calibre.ebooks.pdb.header import PdbHeaderReader +from calibre.ebooks.metadata import MetaInformation, authors_to_string +from calibre.ebooks.pdb.header import PdbHeaderReader, PdbHeaderBuilder from calibre.ebooks.pdb.ereader.reader import HeaderRecord def get_metadata(stream, extract_cover=True): @@ -24,8 +24,8 @@ def get_metadata(stream, extract_cover=True): pheader = PdbHeaderReader(stream) hr = HeaderRecord(pheader.section_data(0)) - if hr.version in (2, 10): - try: + if hr.version in (2, 10) and hr.has_metadata == 1: + try: mdata = pheader.section_data(hr.metadata_offset) mdata = mdata.split('\x00') @@ -41,3 +41,42 @@ def get_metadata(stream, extract_cover=True): return mi +def set_metadata(stream, mi): + pheader = PdbHeaderReader(stream) + sections = [pheader.section_data(x) for x in range(0, pheader.section_count())] + hr = HeaderRecord(sections[0]) + + if hr.version not in (2, 10): + return + + # Create a metadata record for the file if one does not alreay exist + if not hr.has_metadata: + sections += ['', 'MeTaInFo\x00'] + last_data = len(sections) - 1 + + for i in range(0, 132, 2): + val, = struct.unpack('>H', sections[0][i:i+2]) + if val >= hr.last_data_offset: + sections[0][i:i+2] = struct.pack('>H', last_data) + + sections[0][24:26] = struct.pack('>H', 1) # Set has metadata + sections[0][44:46] = struct.pack('>H', last_data - 1) # Set location of metadata + sections[0][52:54] = struct.pack('>H', last_data) # Ensure last data offset is updated + + # Merge the metadata into the file + file_mi = get_metadata(stream, False) + file_mi.smart_update(mi) + sections[hr.metadata_offset] = '%s\x00%s\x00%s\x00%s\x00%s\x00' % \ + (file_mi.title, authors_to_string(file_mi.authors), '', file_mi.publisher, file_mi.isbn) + + # Rebuild the PDB wrapper because the offsets have changed due to the + # new metadata. + pheader_builder = PdbHeaderBuilder(pheader.ident, pheader.title) + stream.seek(0) + stream.truncate(0) + pheader_builder.build_header([len(x) for x in sections], stream) + + # Write the data back to the file + for item in sections: + stream.write(item) + diff --git a/src/calibre/ebooks/metadata/pdb.py b/src/calibre/ebooks/metadata/pdb.py index a6f7c6796b..f3d2782d16 100644 --- a/src/calibre/ebooks/metadata/pdb.py +++ b/src/calibre/ebooks/metadata/pdb.py @@ -12,11 +12,18 @@ import re from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.pdb.header import PdbHeaderReader -from calibre.ebooks.metadata.ereader import get_metadata as eReader +from calibre.ebooks.metadata.ereader import get_metadata as get_eReader -MREADER = { - 'PNPdPPrs' : eReader, - 'PNRdPPrs' : eReader, +MREADER = { + 'PNPdPPrs' : get_eReader, + 'PNRdPPrs' : get_eReader, +} + +from calibre.ebooks.metadata.ereader import set_metadata as set_eReader + +MWRITER = { + 'PNPdPPrs' : set_eReader, + 'PNRdPPrs' : set_eReader, } def get_metadata(stream, extract_cover=True): @@ -34,3 +41,16 @@ def get_metadata(stream, extract_cover=True): return MetadataReader(stream, extract_cover) +def set_metadata(stream, mi): + stream.seek(0) + + pheader = PdbHeaderReader(stream) + + MetadataWriter = MWRITER.get(pheader.ident, None) + + if MetadataWriter: + MetadataWriter(stream, mi) + + stream.seek(0) + stream.write(re.sub('[^-A-Za-z0-9]+', '_', mi.title).ljust(32, '\x00')[:32]) + diff --git a/src/calibre/ebooks/pdb/ereader/reader.py b/src/calibre/ebooks/pdb/ereader/reader.py index 90138180d2..13429c5a98 100644 --- a/src/calibre/ebooks/pdb/ereader/reader.py +++ b/src/calibre/ebooks/pdb/ereader/reader.py @@ -12,7 +12,6 @@ import os, re, struct, zlib from calibre import CurrentDir from calibre.ebooks import DRMError -from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.pdb.formatreader import FormatReader from calibre.ebooks.pdb.ereader import EreaderError from calibre.ebooks.pml.pmlconverter import pml_to_html, \ @@ -31,6 +30,7 @@ class HeaderRecord(object): def __init__(self, raw): self.version, = struct.unpack('>H', raw[0:2]) self.non_text_offset, = struct.unpack('>H', raw[12:14]) + self.has_metadata, = struct.unpack('>H', raw[24:26]) self.footnote_rec, = struct.unpack('>H', raw[28:30]) self.sidebar_rec, = struct.unpack('>H', raw[30:32]) self.bookmark_offset, = struct.unpack('>H', raw[32:34]) @@ -62,6 +62,9 @@ class Reader(FormatReader): else: raise EreaderError('Unknown book version %i.' % self.header_record.version) + from calibre.ebooks.metadata.pdb import get_metadata + self.mi = get_metadata(stream, False) + def section_data(self, number): return self.sections[number] @@ -144,10 +147,8 @@ class Reader(FormatReader): return opf_path def create_opf(self, output_dir, images): - mi = MetaInformation(None, None) - with CurrentDir(output_dir): - opf = OPFCreator(output_dir, mi) + opf = OPFCreator(output_dir, self.mi) manifest = [('index.html', None)] diff --git a/src/calibre/ebooks/pdb/ereader/writer.py b/src/calibre/ebooks/pdb/ereader/writer.py index 3f2e0d9225..f49aa4e125 100644 --- a/src/calibre/ebooks/pdb/ereader/writer.py +++ b/src/calibre/ebooks/pdb/ereader/writer.py @@ -18,11 +18,7 @@ from calibre.ebooks.pdb.header import PdbHeaderBuilder from calibre.ebooks.pdb.ereader import image_name from calibre.ebooks.pml.pmlconverter import html_to_pml -# We are using the older identity because we do not user newer features -# (sidebar, footnotes). This will ensure compatibility with older readers. -# If newer features are used (anything supported by dropbook but not by makebook -# change the identity to the newer PNRdPPrs. -IDENTITY = 'PNPdPPrs' +IDENTITY = 'PNRdPPrs' # This is an arbitrary number that is small enough to work. The actual maximum # record size is unknown.