Pull from driver-dev

2025-07-09 03:04:10 -04:00 · 2009-05-03 23:35:52 -07:00 · 2009-05-03 23:35:52 -07:00 · 74267dc516
commit 74267dc516
parent 3c1ec39656 d77f83f9a6
11 changed files with 263 additions and 19 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -302,6 +302,7 @@ from calibre.web.feeds.input import RecipeInput
 from calibre.ebooks.oeb.output import OEBOutput
 from calibre.ebooks.epub.output import EPUBOutput
 from calibre.ebooks.mobi.output import MOBIOutput
+from calibre.ebooks.pdb.output import PDBOutput
 from calibre.ebooks.lrf.output import LRFOutput
 from calibre.ebooks.lit.output import LITOutput
 from calibre.ebooks.txt.output import TXTOutput
@ -323,7 +324,7 @@ from calibre.devices.jetbook.driver import JETBOOK
 plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
        TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, ComicInput,
        FB2Input, ODTInput, RTFInput, EPUBOutput, RecipeInput, PMLInput,
-        PMLOutput, MOBIOutput, LRFOutput, LITOutput]
+        PMLOutput, MOBIOutput, PDBOutput, LRFOutput, LITOutput]
 plugins += [PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2, BLACKBERRY,
        EB600, JETBOOK]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
--- a/src/calibre/ebooks/pdb/init.py
+++ b/src/calibre/ebooks/pdb/init.py
@ -5,15 +5,27 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

-from calibre.ebooks.pdb.ereader.reader import Reader as eReader
-from calibre.ebooks.pdb.ztxt.reader import Reader as zTXT
-from calibre.ebooks.pdb.palmdoc.reader import Reader as PalmDoc
+class PDBError(Exception):
+    pass
+    

-FORMATS = {
-    'PNPdPPrs' : eReader,
-    'PNRdPPrs' : eReader,
-    'zTXTGPlm' : zTXT,
-    'TEXtREAd' : PalmDoc,
+from calibre.ebooks.pdb.ereader.reader import Reader as ereader_reader
+from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader
+from calibre.ebooks.pdb.palmdoc.reader import Reader as palmdoc_reader
+
+FORMAT_READERS = {
+    'PNPdPPrs' : ereader_reader,
+    'PNRdPPrs' : ereader_reader,
+    'zTXTGPlm' : ztxt_reader,
+    'TEXtREAd' : palmdoc_reader,
+}
+
+from calibre.ebooks.pdb.palmdoc.writer import Writer as palmdoc_writer
+from calibre.ebooks.pdb.ztxt.writer import Writer as ztxt_writer
+
+FORMAT_WRITERS = {
+    'doc'  : palmdoc_writer,
+    'ztxt' : ztxt_writer,
 }

 IDENTITY_TO_NAME = {
@ -48,15 +60,15 @@ IDENTITY_TO_NAME = {
    'BDOCWrdS' : 'WordSmith',
 }

-class PDBError(Exception):
-    pass
-    
-
 def get_reader(identity):
    '''
    Returns None if no reader is found for the identity.
    '''
-    if identity in FORMATS.keys():
-        return FORMATS[identity]
-    else:
-        return None
+    return FORMAT_READERS.get(identity, None)
+        
+def get_writer(extension):
+    '''
+    Returns None if no writer is found for extension.
+    '''
+    return FORMAT_WRITERS.get(extension, None)
+    
--- a/src/calibre/ebooks/pdb/formatreader.py
+++ b/src/calibre/ebooks/pdb/formatreader.py
@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-from __future__ import with_statement
+
 '''
 Interface defining the necessary public functions for a pdb format reader.
 '''
--- a/src/calibre/ebooks/pdb/formatwriter.py
+++ b/src/calibre/ebooks/pdb/formatwriter.py
@ -0,0 +1,18 @@
+# -*- coding: utf-8 -*-
+
+'''
+Interface defining the necessary public functions for a pdb format writer.
+'''
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+
+class FormatWriter(object):
+
+    def __init__(self, opts, log):
+        raise NotImplementedError()
+        
+    def write_content(self, oeb_book, output_stream, ):
+        raise NotImplementedError()
--- a/src/calibre/ebooks/pdb/input.py
+++ b/src/calibre/ebooks/pdb/input.py
@ -1,5 +1,4 @@
 # -*- coding: utf-8 -*-
-from __future__ import with_statement

 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
--- a/src/calibre/ebooks/pdb/output.py
+++ b/src/calibre/ebooks/pdb/output.py
@ -0,0 +1,45 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import os
+
+from calibre.customize.conversion import OutputFormatPlugin
+from calibre.ebooks.pdb import PDBError, get_writer
+
+class PDBOutput(OutputFormatPlugin):
+
+    name = 'PDB Output'
+    author = 'John Schember'
+    file_type = 'pdb'
+    
+    def convert(self, oeb_book, output_path, input_plugin, opts, log):
+        close = False
+        if not hasattr(output_path, 'write'):
+            # Determine the format to write based upon the sub extension
+            format = os.path.splitext(os.path.splitext(output_path)[0])[1][1:]
+            close = True
+            if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '':
+                os.makedirs(os.path.dirname(output_path))
+            out_stream = open(output_path, 'wb')
+        else:
+            format = os.path.splitext(os.path.splitext(output_path.name)[0])[1][1:]
+            out_stream = output_path
+            
+        Writer = get_writer(format)
+        
+        if Writer is None:
+            raise PDBError('No writer avaliable for format %s.' % format)
+        
+        writer = Writer(opts, log)
+        
+        out_stream.seek(0)
+        out_stream.truncate()
+        
+        writer.write_content(oeb_book, out_stream)
+
+        if close:
+            out_stream.close()
+            
--- a/src/calibre/ebooks/pdb/palmdoc/reader.py
+++ b/src/calibre/ebooks/pdb/palmdoc/reader.py
@ -53,9 +53,12 @@ class Reader(FormatReader):
    def extract_content(self, output_dir):
        txt = ''
        
+        self.log.info('Decompressing text...')
        for i in range(1, self.header_record.num_records + 1):
+            self.log.debug('\tDecompressing text section %i' % i)
            txt += self.decompress_text(i)

+        self.log.info('Converting text to OEB...')
        html = txt_to_markdown(txt)
        with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
            index.write(html.encode('utf-8'))
--- a/src/calibre/ebooks/pdb/palmdoc/writer.py
+++ b/src/calibre/ebooks/pdb/palmdoc/writer.py
@ -0,0 +1,69 @@
+# -*- coding: utf-8 -*-
+
+'''
+Writer content to palmdoc pdb file.
+'''
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import struct
+
+from calibre.ebooks.pdb.formatwriter import FormatWriter
+from calibre.ebooks.txt.writer import TxtWriter, TxtNewlines, TxtMetadata
+from calibre.ebooks.mobi.palmdoc import compress_doc
+from calibre.ebooks.pdb.header import PdbHeaderBuilder
+
+MAX_RECORD_SIZE = 4096
+
+class Writer(FormatWriter):
+
+    def __init__(self, opts, log):
+        self.opts = opts
+        self.log = log
+        
+    def write_content(self, oeb_book, out_stream):
+        title = self.opts.title if self.opts.title else oeb_book.metadata.title[0].value if oeb_book.metadata.title != [] else _('Unknown')
+
+        txt_records, txt_length = self._generate_text(oeb_book.spine)
+        header_record = self._header_record(txt_length, len(txt_records))
+        
+        section_lengths = [len(header_record)]
+        self.log.info('Compessing data...')
+        for i in range(0, len(txt_records)):
+            self.log.debug('\tCompressing record %i' % i)
+            txt_records[i] = compress_doc(txt_records[i].encode('utf-8'))
+            section_lengths.append(len(txt_records[i]))
+            
+        out_stream.seek(0)
+        hb = PdbHeaderBuilder('TEXtREAd', title)
+        hb.build_header(section_lengths, out_stream)
+        
+        for record in [header_record]+txt_records:
+            out_stream.write(record)
+        
+    def _generate_text(self, spine):
+        txt_writer = TxtWriter(TxtNewlines('system').newline, self.log)
+        txt = txt_writer.dump(spine, TxtMetadata())
+        
+        txt_length = len(txt)
+        
+        txt_records = []
+        for i in range(0, (len(txt) / MAX_RECORD_SIZE) + 1):
+            txt_records.append(txt[i * MAX_RECORD_SIZE : (i * MAX_RECORD_SIZE) + MAX_RECORD_SIZE])
+            
+        return txt_records, txt_length
+        
+    def _header_record(self, txt_length, record_count):
+        record = ''
+        
+        record += struct.pack('>H', 2)                  # [0:2],   PalmDoc compression. (1 = No compression).
+        record += struct.pack('>H', 0)                  # [2:4],   Always 0.
+        record += struct.pack('>L', txt_length)         # [4:8],   Uncompressed length of the entire text of the book.
+        record += struct.pack('>H', record_count)       # [8:10],  Number of PDB records used for the text of the book.
+        record += struct.pack('>H', MAX_RECORD_SIZE)    # [10-12], Maximum size of each record containing text, always 4096.
+        record += struct.pack('>L', 0)                  # [12-16], Current reading position, as an offset into the uncompressed text.
+        
+        return record
+        
--- a/src/calibre/ebooks/pdb/ztxt/reader.py
+++ b/src/calibre/ebooks/pdb/ztxt/reader.py
@ -14,6 +14,8 @@ from calibre.ebooks.pdb.formatreader import FormatReader
 from calibre.ebooks.pdb.ztxt import zTXTError
 from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer

+SUPPORTED_VERSION = (1, 40)
+
 class HeaderRecord(object):
    '''
    The first record in the file is always the header record. It holds
@ -27,6 +29,7 @@ class HeaderRecord(object):
        self.num_records, = struct.unpack('>H', raw[2:4])
        self.size, = struct.unpack('>L', raw[4:8])
        self.record_size, = struct.unpack('>H', raw[8:10])
+        self.flags, = struct.unpack('>B', raw[18:19])
        
    
 class Reader(FormatReader):
@ -41,6 +44,16 @@ class Reader(FormatReader):
            self.sections.append(header.section_data(i))

        self.header_record = HeaderRecord(self.section_data(0))
+        
+        vmajor = (self.header_record.version & 0x0000FF00) >> 8
+        vminor = self.header_record.version & 0x000000FF
+        if vmajor < 1 or (vmajor == 1 and vminor < 40):
+            raise zTXTError('Unsupported ztxt version (%i.%i). Only versions newer than %i.%i are supported.' % (vmajor, vminor, SUPPORTED_VERSION[0], SUPPORTED_VERSION[1]))
+
+        if (self.header_record.flags & 0x01) == 0:
+            raise zTXTError('Only compression method 1 (random access) is supported')
+
+        self.log.debug('Foud ztxt version: %i.%i' % (vmajor, vminor))

        # Initalize the decompressor
        self.uncompressor = zlib.decompressobj()
@ -57,9 +70,12 @@ class Reader(FormatReader):
    def extract_content(self, output_dir):
        txt = ''
        
+        self.log.info('Decompressing text...')
        for i in range(1, self.header_record.num_records + 1):
+            self.log.debug('\tDecompressing text section %i' % i)
            txt += self.decompress_text(i)

+        self.log.info('Converting text to OEB...')
        html = txt_to_markdown(txt)
        with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
            index.write(html.encode('utf-8'))
--- a/src/calibre/ebooks/pdb/ztxt/writer.py
+++ b/src/calibre/ebooks/pdb/ztxt/writer.py
@ -0,0 +1,80 @@
+# -*- coding: utf-8 -*-
+
+'''
+Writer content to ztxt pdb file.
+'''
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import struct, zlib
+
+from calibre.ebooks.pdb.formatwriter import FormatWriter
+from calibre.ebooks.txt.writer import TxtWriter, TxtNewlines, TxtMetadata
+from calibre.ebooks.pdb.header import PdbHeaderBuilder
+
+MAX_RECORD_SIZE = 8192
+
+class Writer(FormatWriter):
+
+    def __init__(self, opts, log):
+        self.opts = opts
+        self.log = log
+        
+    def write_content(self, oeb_book, out_stream):
+        title = self.opts.title if self.opts.title else oeb_book.metadata.title[0].value if oeb_book.metadata.title != [] else _('Unknown')
+
+        txt_records, txt_length = self._generate_text(oeb_book.spine)
+        
+        crc32 = 0
+        section_lengths = []
+        compressor = zlib.compressobj(9)
+        self.log.info('Compressing data...')
+        for i in range(0, len(txt_records)):
+            self.log.debug('\tCompressing record %i' % i)
+            txt_records[i] = compressor.compress(txt_records[i].encode('utf-8'))
+            txt_records[i] = txt_records[i] + compressor.flush(zlib.Z_FULL_FLUSH)
+            section_lengths.append(len(txt_records[i]))
+            crc32 = zlib.crc32(txt_records[i], crc32) & 0xffffffff
+
+        header_record = self._header_record(txt_length, len(txt_records), crc32)
+        section_lengths.insert(0, len(header_record))
+            
+        out_stream.seek(0)
+        hb = PdbHeaderBuilder('zTXTGPlm', title)
+        hb.build_header(section_lengths, out_stream)
+
+        for record in [header_record]+txt_records:
+            out_stream.write(record)
+        
+    def _generate_text(self, spine):
+        txt_writer = TxtWriter(TxtNewlines('system').newline, self.log)
+        txt = txt_writer.dump(spine, TxtMetadata())
+        
+        txt_length = len(txt)
+        
+        txt_records = []
+        for i in range(0, (len(txt) / MAX_RECORD_SIZE) + 1):
+            txt_records.append(txt[i * MAX_RECORD_SIZE : (i * MAX_RECORD_SIZE) + MAX_RECORD_SIZE])
+            
+        return txt_records, txt_length
+        
+    def _header_record(self, txt_length, record_count, crc32):
+        record = ''
+        
+        record += struct.pack('>H', 0x012c)             # [0:2], version. 0x012c = 1.44
+        record += struct.pack('>H', record_count)       # [2:4], Number of PDB records used for the text of the book.
+        record += struct.pack('>L', txt_length)         # [4:8], Uncompressed length of the entire text of the book.
+        record += struct.pack('>H', MAX_RECORD_SIZE)    # [8:10], Maximum size of each record containing text
+        record += struct.pack('>H', 0)                  # [10:12], Number of bookmarks.
+        record += struct.pack('>H', 0)                  # [12:14], Bookmark record. 0 if there are no bookmarks.
+        record += struct.pack('>H', 0)                  # [14:16], Number of annotations.
+        record += struct.pack('>H', 0)                  # [16:18], Annotation record. 0 if there are no annotations.
+        record += struct.pack('>B', 1)                  # [18:19], Flags. Bitmask, 0x01 = Random Access. 0x02 = Non-Uniform text block size.
+        record += struct.pack('>B', 0)                  # [19:20], Reserved.
+        record += struct.pack('>L', crc32)              # [20:24], crc32
+        record += struct.pack('>LL', 0, 0)              # [24:32], padding
+        
+        return record
+        
--- a/src/calibre/ebooks/txt/output.py
+++ b/src/calibre/ebooks/txt/output.py
@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+
 __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'