Pull from driver-dev

This commit is contained in:
Kovid Goyal 2009-05-03 23:35:52 -07:00
commit 74267dc516
11 changed files with 263 additions and 19 deletions

View File

@ -302,6 +302,7 @@ from calibre.web.feeds.input import RecipeInput
from calibre.ebooks.oeb.output import OEBOutput from calibre.ebooks.oeb.output import OEBOutput
from calibre.ebooks.epub.output import EPUBOutput from calibre.ebooks.epub.output import EPUBOutput
from calibre.ebooks.mobi.output import MOBIOutput from calibre.ebooks.mobi.output import MOBIOutput
from calibre.ebooks.pdb.output import PDBOutput
from calibre.ebooks.lrf.output import LRFOutput from calibre.ebooks.lrf.output import LRFOutput
from calibre.ebooks.lit.output import LITOutput from calibre.ebooks.lit.output import LITOutput
from calibre.ebooks.txt.output import TXTOutput from calibre.ebooks.txt.output import TXTOutput
@ -323,7 +324,7 @@ from calibre.devices.jetbook.driver import JETBOOK
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput, plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, ComicInput, TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, ComicInput,
FB2Input, ODTInput, RTFInput, EPUBOutput, RecipeInput, PMLInput, FB2Input, ODTInput, RTFInput, EPUBOutput, RecipeInput, PMLInput,
PMLOutput, MOBIOutput, LRFOutput, LITOutput] PMLOutput, MOBIOutput, PDBOutput, LRFOutput, LITOutput]
plugins += [PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2, BLACKBERRY, plugins += [PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2, BLACKBERRY,
EB600, JETBOOK] EB600, JETBOOK]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ plugins += [x for x in list(locals().values()) if isinstance(x, type) and \

View File

@ -5,15 +5,27 @@ __license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>' __copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
from calibre.ebooks.pdb.ereader.reader import Reader as eReader class PDBError(Exception):
from calibre.ebooks.pdb.ztxt.reader import Reader as zTXT pass
from calibre.ebooks.pdb.palmdoc.reader import Reader as PalmDoc
FORMATS = { from calibre.ebooks.pdb.ereader.reader import Reader as ereader_reader
'PNPdPPrs' : eReader, from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader
'PNRdPPrs' : eReader, from calibre.ebooks.pdb.palmdoc.reader import Reader as palmdoc_reader
'zTXTGPlm' : zTXT,
'TEXtREAd' : PalmDoc, FORMAT_READERS = {
'PNPdPPrs' : ereader_reader,
'PNRdPPrs' : ereader_reader,
'zTXTGPlm' : ztxt_reader,
'TEXtREAd' : palmdoc_reader,
}
from calibre.ebooks.pdb.palmdoc.writer import Writer as palmdoc_writer
from calibre.ebooks.pdb.ztxt.writer import Writer as ztxt_writer
FORMAT_WRITERS = {
'doc' : palmdoc_writer,
'ztxt' : ztxt_writer,
} }
IDENTITY_TO_NAME = { IDENTITY_TO_NAME = {
@ -48,15 +60,15 @@ IDENTITY_TO_NAME = {
'BDOCWrdS' : 'WordSmith', 'BDOCWrdS' : 'WordSmith',
} }
class PDBError(Exception):
pass
def get_reader(identity): def get_reader(identity):
''' '''
Returns None if no reader is found for the identity. Returns None if no reader is found for the identity.
''' '''
if identity in FORMATS.keys(): return FORMAT_READERS.get(identity, None)
return FORMATS[identity]
else: def get_writer(extension):
return None '''
Returns None if no writer is found for extension.
'''
return FORMAT_WRITERS.get(extension, None)

View File

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import with_statement
''' '''
Interface defining the necessary public functions for a pdb format reader. Interface defining the necessary public functions for a pdb format reader.
''' '''

View File

@ -0,0 +1,18 @@
# -*- coding: utf-8 -*-
'''
Interface defining the necessary public functions for a pdb format writer.
'''
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
class FormatWriter(object):
def __init__(self, opts, log):
raise NotImplementedError()
def write_content(self, oeb_book, output_stream, ):
raise NotImplementedError()

View File

@ -1,5 +1,4 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import with_statement
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>' __copyright__ = '2009, John Schember <john@nachtimwald.com>'

View File

@ -0,0 +1,45 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os
from calibre.customize.conversion import OutputFormatPlugin
from calibre.ebooks.pdb import PDBError, get_writer
class PDBOutput(OutputFormatPlugin):
name = 'PDB Output'
author = 'John Schember'
file_type = 'pdb'
def convert(self, oeb_book, output_path, input_plugin, opts, log):
close = False
if not hasattr(output_path, 'write'):
# Determine the format to write based upon the sub extension
format = os.path.splitext(os.path.splitext(output_path)[0])[1][1:]
close = True
if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '':
os.makedirs(os.path.dirname(output_path))
out_stream = open(output_path, 'wb')
else:
format = os.path.splitext(os.path.splitext(output_path.name)[0])[1][1:]
out_stream = output_path
Writer = get_writer(format)
if Writer is None:
raise PDBError('No writer avaliable for format %s.' % format)
writer = Writer(opts, log)
out_stream.seek(0)
out_stream.truncate()
writer.write_content(oeb_book, out_stream)
if close:
out_stream.close()

View File

@ -53,9 +53,12 @@ class Reader(FormatReader):
def extract_content(self, output_dir): def extract_content(self, output_dir):
txt = '' txt = ''
self.log.info('Decompressing text...')
for i in range(1, self.header_record.num_records + 1): for i in range(1, self.header_record.num_records + 1):
self.log.debug('\tDecompressing text section %i' % i)
txt += self.decompress_text(i) txt += self.decompress_text(i)
self.log.info('Converting text to OEB...')
html = txt_to_markdown(txt) html = txt_to_markdown(txt)
with open(os.path.join(output_dir, 'index.html'), 'wb') as index: with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
index.write(html.encode('utf-8')) index.write(html.encode('utf-8'))

View File

@ -0,0 +1,69 @@
# -*- coding: utf-8 -*-
'''
Writer content to palmdoc pdb file.
'''
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import struct
from calibre.ebooks.pdb.formatwriter import FormatWriter
from calibre.ebooks.txt.writer import TxtWriter, TxtNewlines, TxtMetadata
from calibre.ebooks.mobi.palmdoc import compress_doc
from calibre.ebooks.pdb.header import PdbHeaderBuilder
MAX_RECORD_SIZE = 4096
class Writer(FormatWriter):
def __init__(self, opts, log):
self.opts = opts
self.log = log
def write_content(self, oeb_book, out_stream):
title = self.opts.title if self.opts.title else oeb_book.metadata.title[0].value if oeb_book.metadata.title != [] else _('Unknown')
txt_records, txt_length = self._generate_text(oeb_book.spine)
header_record = self._header_record(txt_length, len(txt_records))
section_lengths = [len(header_record)]
self.log.info('Compessing data...')
for i in range(0, len(txt_records)):
self.log.debug('\tCompressing record %i' % i)
txt_records[i] = compress_doc(txt_records[i].encode('utf-8'))
section_lengths.append(len(txt_records[i]))
out_stream.seek(0)
hb = PdbHeaderBuilder('TEXtREAd', title)
hb.build_header(section_lengths, out_stream)
for record in [header_record]+txt_records:
out_stream.write(record)
def _generate_text(self, spine):
txt_writer = TxtWriter(TxtNewlines('system').newline, self.log)
txt = txt_writer.dump(spine, TxtMetadata())
txt_length = len(txt)
txt_records = []
for i in range(0, (len(txt) / MAX_RECORD_SIZE) + 1):
txt_records.append(txt[i * MAX_RECORD_SIZE : (i * MAX_RECORD_SIZE) + MAX_RECORD_SIZE])
return txt_records, txt_length
def _header_record(self, txt_length, record_count):
record = ''
record += struct.pack('>H', 2) # [0:2], PalmDoc compression. (1 = No compression).
record += struct.pack('>H', 0) # [2:4], Always 0.
record += struct.pack('>L', txt_length) # [4:8], Uncompressed length of the entire text of the book.
record += struct.pack('>H', record_count) # [8:10], Number of PDB records used for the text of the book.
record += struct.pack('>H', MAX_RECORD_SIZE) # [10-12], Maximum size of each record containing text, always 4096.
record += struct.pack('>L', 0) # [12-16], Current reading position, as an offset into the uncompressed text.
return record

View File

@ -14,6 +14,8 @@ from calibre.ebooks.pdb.formatreader import FormatReader
from calibre.ebooks.pdb.ztxt import zTXTError from calibre.ebooks.pdb.ztxt import zTXTError
from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer
SUPPORTED_VERSION = (1, 40)
class HeaderRecord(object): class HeaderRecord(object):
''' '''
The first record in the file is always the header record. It holds The first record in the file is always the header record. It holds
@ -27,6 +29,7 @@ class HeaderRecord(object):
self.num_records, = struct.unpack('>H', raw[2:4]) self.num_records, = struct.unpack('>H', raw[2:4])
self.size, = struct.unpack('>L', raw[4:8]) self.size, = struct.unpack('>L', raw[4:8])
self.record_size, = struct.unpack('>H', raw[8:10]) self.record_size, = struct.unpack('>H', raw[8:10])
self.flags, = struct.unpack('>B', raw[18:19])
class Reader(FormatReader): class Reader(FormatReader):
@ -41,6 +44,16 @@ class Reader(FormatReader):
self.sections.append(header.section_data(i)) self.sections.append(header.section_data(i))
self.header_record = HeaderRecord(self.section_data(0)) self.header_record = HeaderRecord(self.section_data(0))
vmajor = (self.header_record.version & 0x0000FF00) >> 8
vminor = self.header_record.version & 0x000000FF
if vmajor < 1 or (vmajor == 1 and vminor < 40):
raise zTXTError('Unsupported ztxt version (%i.%i). Only versions newer than %i.%i are supported.' % (vmajor, vminor, SUPPORTED_VERSION[0], SUPPORTED_VERSION[1]))
if (self.header_record.flags & 0x01) == 0:
raise zTXTError('Only compression method 1 (random access) is supported')
self.log.debug('Foud ztxt version: %i.%i' % (vmajor, vminor))
# Initalize the decompressor # Initalize the decompressor
self.uncompressor = zlib.decompressobj() self.uncompressor = zlib.decompressobj()
@ -57,9 +70,12 @@ class Reader(FormatReader):
def extract_content(self, output_dir): def extract_content(self, output_dir):
txt = '' txt = ''
self.log.info('Decompressing text...')
for i in range(1, self.header_record.num_records + 1): for i in range(1, self.header_record.num_records + 1):
self.log.debug('\tDecompressing text section %i' % i)
txt += self.decompress_text(i) txt += self.decompress_text(i)
self.log.info('Converting text to OEB...')
html = txt_to_markdown(txt) html = txt_to_markdown(txt)
with open(os.path.join(output_dir, 'index.html'), 'wb') as index: with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
index.write(html.encode('utf-8')) index.write(html.encode('utf-8'))

View File

@ -0,0 +1,80 @@
# -*- coding: utf-8 -*-
'''
Writer content to ztxt pdb file.
'''
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import struct, zlib
from calibre.ebooks.pdb.formatwriter import FormatWriter
from calibre.ebooks.txt.writer import TxtWriter, TxtNewlines, TxtMetadata
from calibre.ebooks.pdb.header import PdbHeaderBuilder
MAX_RECORD_SIZE = 8192
class Writer(FormatWriter):
def __init__(self, opts, log):
self.opts = opts
self.log = log
def write_content(self, oeb_book, out_stream):
title = self.opts.title if self.opts.title else oeb_book.metadata.title[0].value if oeb_book.metadata.title != [] else _('Unknown')
txt_records, txt_length = self._generate_text(oeb_book.spine)
crc32 = 0
section_lengths = []
compressor = zlib.compressobj(9)
self.log.info('Compressing data...')
for i in range(0, len(txt_records)):
self.log.debug('\tCompressing record %i' % i)
txt_records[i] = compressor.compress(txt_records[i].encode('utf-8'))
txt_records[i] = txt_records[i] + compressor.flush(zlib.Z_FULL_FLUSH)
section_lengths.append(len(txt_records[i]))
crc32 = zlib.crc32(txt_records[i], crc32) & 0xffffffff
header_record = self._header_record(txt_length, len(txt_records), crc32)
section_lengths.insert(0, len(header_record))
out_stream.seek(0)
hb = PdbHeaderBuilder('zTXTGPlm', title)
hb.build_header(section_lengths, out_stream)
for record in [header_record]+txt_records:
out_stream.write(record)
def _generate_text(self, spine):
txt_writer = TxtWriter(TxtNewlines('system').newline, self.log)
txt = txt_writer.dump(spine, TxtMetadata())
txt_length = len(txt)
txt_records = []
for i in range(0, (len(txt) / MAX_RECORD_SIZE) + 1):
txt_records.append(txt[i * MAX_RECORD_SIZE : (i * MAX_RECORD_SIZE) + MAX_RECORD_SIZE])
return txt_records, txt_length
def _header_record(self, txt_length, record_count, crc32):
record = ''
record += struct.pack('>H', 0x012c) # [0:2], version. 0x012c = 1.44
record += struct.pack('>H', record_count) # [2:4], Number of PDB records used for the text of the book.
record += struct.pack('>L', txt_length) # [4:8], Uncompressed length of the entire text of the book.
record += struct.pack('>H', MAX_RECORD_SIZE) # [8:10], Maximum size of each record containing text
record += struct.pack('>H', 0) # [10:12], Number of bookmarks.
record += struct.pack('>H', 0) # [12:14], Bookmark record. 0 if there are no bookmarks.
record += struct.pack('>H', 0) # [14:16], Number of annotations.
record += struct.pack('>H', 0) # [16:18], Annotation record. 0 if there are no annotations.
record += struct.pack('>B', 1) # [18:19], Flags. Bitmask, 0x01 = Random Access. 0x02 = Non-Uniform text block size.
record += struct.pack('>B', 0) # [19:20], Reserved.
record += struct.pack('>L', crc32) # [20:24], crc32
record += struct.pack('>LL', 0, 0) # [24:32], padding
return record

View File

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
__license__ = 'GPL 3' __license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>' __copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'