Pull from driver dev

This commit is contained in:
Kovid Goyal 2009-05-03 10:59:12 -07:00
commit 4afc1a7106
12 changed files with 228 additions and 18 deletions

View File

@ -29,7 +29,7 @@ def get_metadata(stream, extract_cover=True):
MetadataReader = MREADER.get(pheader.ident, None)
if MetadataReader is None:
return MetaInformation(_('Unknown'), [_('Unknown')])
return MetaInformation(pheader.title, [_('Unknown')])
return MetadataReader(stream, extract_cover)

View File

@ -6,15 +6,46 @@ __copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
from calibre.ebooks.pdb.ereader.reader import Reader as eReader
from calibre.ebooks.pdb.ztxt.reader import Reader as zTXT
from calibre.ebooks.pdb.palmdoc.reader import Reader as PalmDoc
FORMATS = {
'PNPdPPrs' : eReader,
'PNRdPPrs' : eReader,
'zTXTGPlm' : zTXT,
'TEXtREAd' : PalmDoc,
}
IDENTITY_TO_NAME = {
'PNPdPPrs' : 'eReader',
'PNRdPPrs' : 'eReader',
'zTXTGPlm' : 'zTXT',
'TEXtREAd' : 'PalmDOC',
'.pdfADBE' : 'Adobe Reader',
'BVokBDIC' : 'BDicty',
'DB99DBOS' : 'DB (Database program)',
'vIMGView' : 'FireViewer (ImageViewer)',
'PmDBPmDB' : 'HanDBase',
'InfoINDB' : 'InfoView',
'ToGoToGo' : 'iSilo',
'SDocSilX' : 'iSilo 3',
'JbDbJBas' : 'JFile',
'JfDbJFil' : 'JFile Pro',
'DATALSdb' : 'LIST',
'Mdb1Mdb1' : 'MobileDB',
'BOOKMOBI' : 'MobiPocket',
'DataPlkr' : 'Plucker',
'DataSprd' : 'QuickSheet',
'SM01SMem' : 'SuperMemo',
'TEXtTlDc' : 'TealDoc',
'InfoTlIf' : 'TealInfo',
'DataTlMl' : 'TealMeal',
'DataTlPt' : 'TealPaint',
'dataTDBP' : 'ThinkDB',
'TdatTide' : 'Tides',
'ToRaTRPW' : 'TomeRaider',
'BDOCWrdS' : 'WordSmith',
}
class PDBError(Exception):

View File

@ -1,5 +1,4 @@
# -*- coding: utf-8 -*-
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'

View File

@ -16,7 +16,7 @@ from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.pdb.formatreader import FormatReader
from calibre.ebooks.pdb.ereader import EreaderError
from calibre.ebooks.pml.pmlconverter import pml_to_html, \
footnote_sidebar_to_html
footnote_sidebar_to_html
from calibre.ebooks.mobi.palmdoc import decompress_doc
from calibre.ebooks.metadata.opf2 import OPFCreator

View File

@ -24,7 +24,7 @@ class PDBInput(InputFormatPlugin):
Reader = get_reader(header.ident)
if Reader is None:
raise PDBError('Unknown format in pdb file. Identity is %s' % header.identity)
raise PDBError('No reader avaliable for format within container.\n Identity is %s. Book type is %s' % (header.ident, IDENTITY_TO_NAME.get(header.ident, _('Unknown'))))
log.debug('Detected ebook format as: %s with identity: %s' % (IDENTITY_TO_NAME[header.ident], header.ident))

View File

@ -0,0 +1,70 @@
# -*- coding: utf-8 -*-
'''
Read content from palmdoc pdb file.
'''
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os, struct, zlib
from calibre.ebooks.pdb.formatreader import FormatReader
from calibre.ebooks.mobi.palmdoc import decompress_doc
from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer
class HeaderRecord(object):
'''
The first record in the file is always the header record. It holds
information related to the location of text, images, and so on
in the file. This is used in conjunction with the sections
defined in the file header.
'''
def __init__(self, raw):
self.compression, = struct.unpack('>H', raw[0:2])
self.num_records, = struct.unpack('>H', raw[8:10])
class Reader(FormatReader):
def __init__(self, header, stream, log, encoding=None):
self.stream = stream
self.log = log
self.encoding = encoding
self.sections = []
for i in range(header.num_sections):
self.sections.append(header.section_data(i))
self.header_record = HeaderRecord(self.section_data(0))
def section_data(self, number):
return self.sections[number]
def decompress_text(self, number):
if self.header_record.compression == 1:
return self.section_data(number).decode('cp1252' if self.encoding is None else self.encoding)
if self.header_record.compression == 2:
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
return ''
def extract_content(self, output_dir):
txt = ''
for i in range(1, self.header_record.num_records + 1):
txt += self.decompress_text(i)
html = txt_to_markdown(txt)
with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
index.write(html.encode('utf-8'))
from calibre.ebooks.metadata.meta import get_metadata
mi = get_metadata(self.stream, 'pdb')
manifest = [('index.html', None)]
spine = ['index.html']
opf_writer(output_dir, 'metadata.opf', manifest, spine, mi)
return os.path.join(output_dir, 'metadata.opf')

View File

@ -0,0 +1,11 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os
class zTXTError(Exception):
pass

View File

@ -0,0 +1,74 @@
# -*- coding: utf-8 -*-
'''
Read content from ztxt pdb file.
'''
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os, struct, zlib
from calibre.ebooks.pdb.formatreader import FormatReader
from calibre.ebooks.pdb.ztxt import zTXTError
from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer
class HeaderRecord(object):
'''
The first record in the file is always the header record. It holds
information related to the location of text, images, and so on
in the file. This is used in conjunction with the sections
defined in the file header.
'''
def __init__(self, raw):
self.version, = struct.unpack('>H', raw[0:2])
self.num_records, = struct.unpack('>H', raw[2:4])
self.size, = struct.unpack('>L', raw[4:8])
self.record_size, = struct.unpack('>H', raw[8:10])
class Reader(FormatReader):
def __init__(self, header, stream, log, encoding=None):
self.stream = stream
self.log = log
self.encoding = encoding
self.sections = []
for i in range(header.num_sections):
self.sections.append(header.section_data(i))
self.header_record = HeaderRecord(self.section_data(0))
# Initalize the decompressor
self.uncompressor = zlib.decompressobj()
self.uncompressor.decompress(self.section_data(1))
def section_data(self, number):
return self.sections[number]
def decompress_text(self, number):
if number == 1:
self.uncompressor = zlib.decompressobj()
return self.uncompressor.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
def extract_content(self, output_dir):
txt = ''
for i in range(1, self.header_record.num_records + 1):
txt += self.decompress_text(i)
html = txt_to_markdown(txt)
with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
index.write(html.encode('utf-8'))
from calibre.ebooks.metadata.meta import get_metadata
mi = get_metadata(self.stream, 'pdb')
manifest = [('index.html', None)]
spine = ['index.html']
opf_writer(output_dir, 'metadata.opf', manifest, spine, mi)
return os.path.join(output_dir, 'metadata.opf')

View File

@ -62,12 +62,12 @@ class PDFOutput(OutputFormatPlugin):
self.write(ImagePDFWriter, images)
def convert_text(self, oeb_book):
with TemporaryDirectory('_pdf_out') as oebdir:
with TemporaryDirectory('_pdf_out') as oeb_dir:
from calibre.customize.ui import plugin_for_output_format
oeb_output = plugin_for_output_format('oeb')
oeb_output.convert(oeb, oeb_dir, self.input_plugin, self.opts, self.log)
oeb_output.convert(oeb_book, oeb_dir, self.input_plugin, self.opts, self.log)
opfpath = glob.glob(os.path.join(oebdir, '*.opf'))[0]
opfpath = glob.glob(os.path.join(oeb_dir, '*.opf'))[0]
opf = OPF(opfpath, os.path.dirname(opfpath))
self.write(PDFWriter, [s.path for s in opf.spine])

View File

@ -8,8 +8,7 @@ __docformat__ = 'restructuredtext en'
import os
from calibre.customize.conversion import InputFormatPlugin
from calibre.ebooks.markdown import markdown
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer
class TXTInput(InputFormatPlugin):
@ -25,19 +24,15 @@ class TXTInput(InputFormatPlugin):
ienc = options.input_encoding
txt = stream.read().decode(ienc)
md = markdown.Markdown(
extensions=['footnotes', 'tables', 'toc'],
safe_mode=False,)
html = '<html><head><title /></head><body>'+md.convert(txt)+'</body></html>'
html = txt_to_markdown(txt)
with open('index.html', 'wb') as index:
index.write(html.encode('utf-8'))
from calibre.ebooks.metadata.meta import get_metadata
mi = get_metadata(stream, 'txt')
opf = OPFCreator(os.getcwd(), mi)
opf.create_manifest([('index.html', None)])
opf.create_spine(['index.html'])
with open('metadata.opf', 'wb') as opffile:
opf.render(opffile)
manifest = [('index.html', None)]
spine = ['index.html']
opf_writer(os.getcwd(), 'metadata.opf', manifest, spine, mi)
return os.path.join(os.getcwd(), 'metadata.opf')

View File

@ -0,0 +1,30 @@
# -*- coding: utf-8 -*-
'''
Read content from txt file.
'''
import os
from calibre.ebooks.markdown import markdown
from calibre.ebooks.metadata.opf2 import OPFCreator
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
def txt_to_markdown(txt):
md = markdown.Markdown(
extensions=['footnotes', 'tables', 'toc'],
safe_mode=False,)
html = '<html><head><title /></head><body>'+md.convert(txt)+'</body></html>'
return html
def opf_writer(path, opf_name, manifest, spine, mi):
opf = OPFCreator(path, mi)
opf.create_manifest(manifest)
opf.create_spine(spine)
with open(os.path.join(path, opf_name), 'wb') as opffile:
opf.render(opffile)