From 5b8974dbb1680e1ea796c884acc2e8d413fb0b10 Mon Sep 17 00:00:00 2001 From: John Schember Date: Sun, 3 May 2009 18:44:09 -0400 Subject: [PATCH] PDB ztxt writer. --- src/calibre/ebooks/pdb/__init__.py | 4 +- src/calibre/ebooks/pdb/palmdoc/writer.py | 2 +- src/calibre/ebooks/pdb/ztxt/writer.py | 78 ++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 2 deletions(-) create mode 100644 src/calibre/ebooks/pdb/ztxt/writer.py diff --git a/src/calibre/ebooks/pdb/__init__.py b/src/calibre/ebooks/pdb/__init__.py index f60cc91c93..70a12ceb96 100644 --- a/src/calibre/ebooks/pdb/__init__.py +++ b/src/calibre/ebooks/pdb/__init__.py @@ -21,9 +21,11 @@ FORMAT_READERS = { } from calibre.ebooks.pdb.palmdoc.writer import Writer as palmdoc_writer +from calibre.ebooks.pdb.ztxt.writer import Writer as ztxt_writer FORMAT_WRITERS = { - 'doc' : palmdoc_writer, + 'doc' : palmdoc_writer, + 'ztxt' : ztxt_writer, } IDENTITY_TO_NAME = { diff --git a/src/calibre/ebooks/pdb/palmdoc/writer.py b/src/calibre/ebooks/pdb/palmdoc/writer.py index a93bc94d26..705b01daee 100644 --- a/src/calibre/ebooks/pdb/palmdoc/writer.py +++ b/src/calibre/ebooks/pdb/palmdoc/writer.py @@ -34,7 +34,7 @@ class Writer(FormatWriter): txt_records[i] = compress_doc(txt_records[i].encode('utf-8')) section_lengths.append(len(txt_records[i])) - out_stream.seek(0) + out_stream.seek(0) hb = PdbHeaderBuilder('TEXtREAd', title) hb.build_header(section_lengths, out_stream) diff --git a/src/calibre/ebooks/pdb/ztxt/writer.py b/src/calibre/ebooks/pdb/ztxt/writer.py new file mode 100644 index 0000000000..81a0df48af --- /dev/null +++ b/src/calibre/ebooks/pdb/ztxt/writer.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- + +''' +Writer content to ztxt pdb file. +''' + +__license__ = 'GPL v3' +__copyright__ = '2009, John Schember ' +__docformat__ = 'restructuredtext en' + +import struct, zlib + +from calibre.ebooks.pdb.formatwriter import FormatWriter +from calibre.ebooks.txt.writer import TxtWriter, TxtNewlines, TxtMetadata +from calibre.ebooks.pdb.header import PdbHeaderBuilder + +MAX_RECORD_SIZE = 8192 + +class Writer(FormatWriter): + + def __init__(self, opts, log): + self.opts = opts + self.log = log + + def write_content(self, oeb_book, out_stream): + title = self.opts.title if self.opts.title else oeb_book.metadata.title[0].value if oeb_book.metadata.title != [] else _('Unknown') + + txt_records, txt_length = self._generate_text(oeb_book.spine) + + crc32 = 0 + section_lengths = [] + compressor = zlib.compressobj(9) + for i in range(0, len(txt_records)): + txt_records[i] = compressor.compress(txt_records[i].encode('utf-8')) + txt_records[i] = txt_records[i] + compressor.flush(zlib.Z_FULL_FLUSH) + section_lengths.append(len(txt_records[i])) + crc32 = zlib.crc32(txt_records[i], crc32) & 0xffffffff + + header_record = self._header_record(txt_length, len(txt_records), crc32) + section_lengths.insert(0, len(header_record)) + + out_stream.seek(0) + hb = PdbHeaderBuilder('zTXTGPlm', title) + hb.build_header(section_lengths, out_stream) + + for record in [header_record]+txt_records: + out_stream.write(record) + + def _generate_text(self, spine): + txt_writer = TxtWriter(TxtNewlines('system').newline, self.log) + txt = txt_writer.dump(spine, TxtMetadata()) + + txt_length = len(txt) + + txt_records = [] + for i in range(0, (len(txt) / MAX_RECORD_SIZE) + 1): + txt_records.append(txt[i * MAX_RECORD_SIZE : (i * MAX_RECORD_SIZE) + MAX_RECORD_SIZE]) + + return txt_records, txt_length + + def _header_record(self, txt_length, record_count, crc32): + record = '' + + record += struct.pack('>H', 0x012c) # [0:2], version. 0x012c = 1.44 + record += struct.pack('>H', record_count) # [2:4], Number of PDB records used for the text of the book. + record += struct.pack('>L', txt_length) # [4:8], Uncompressed length of the entire text of the book. + record += struct.pack('>H', MAX_RECORD_SIZE) # [8:10], Maximum size of each record containing text + record += struct.pack('>H', 0) # [10:12], Number of bookmarks. + record += struct.pack('>H', 0) # [12:14], Bookmark record. 0 if there are no bookmarks. + record += struct.pack('>H', 0) # [14:16], Number of annotations. + record += struct.pack('>H', 0) # [16:18], Annotation record. 0 if there are no annotations. + record += struct.pack('>B', 1) # [18:19], Flags. Bitmask, 0x01 = Random Access. 0x02 = Non-Uniform text block size. + record += struct.pack('>B', 0) # [19:20], Reserved. + record += struct.pack('>L', crc32) # [20:24], crc32 + record += struct.pack('>LL', 0, 0) # [24:32], padding + + return record +