Fix but in pdfoutput. Add ztxt input.

This commit is contained in:
John Schember 2009-05-03 10:54:07 -04:00
parent 27407c779a
commit daf6e43523
6 changed files with 131 additions and 17 deletions

View File

@ -6,18 +6,18 @@ __copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
from calibre.ebooks.pdb.ereader.reader import Reader as eReader
from calibre.ebooks.pdb.plucker.reader import Reader as Plucker
from calibre.ebooks.pdb.ztxt.reader import Reader as zTXT
FORMATS = {
'PNPdPPrs' : eReader,
'PNRdPPrs' : eReader,
'DataPlkr' : Plucker,
'zTXTGPlm' : zTXT,
}
IDENTITY_TO_NAME = {
'PNPdPPrs' : 'eReader',
'PNRdPPrs' : 'eReader',
'DataPlkr' : 'Plucker',
'zTXTGPlm' : 'zTXT',
}
class PDBError(Exception):

View File

@ -0,0 +1,11 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os
class zTXTError(Exception):
pass

View File

@ -0,0 +1,78 @@
# -*- coding: utf-8 -*-
'''
Read content from ztxt pdb file.
'''
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import StringIO, os, struct, zlib
from calibre.ebooks.pdb.formatreader import FormatReader
from calibre.ebooks.pdb.ztxt import zTXTError
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer
class HeaderRecord(object):
'''
The first record in the file is always the header record. It holds
information related to the location of text, images, and so on
in the file. This is used in conjunction with the sections
defined in the file header.
'''
def __init__(self, raw):
self.version, = struct.unpack('>H', raw[0:2])
self.num_records, = struct.unpack('>H', raw[2:4])
self.size, = struct.unpack('>L', raw[4:8])
self.record_size, = struct.unpack('>H', raw[8:10])
self.crc32, = struct.unpack('>L', raw[18:22])
class Reader(FormatReader):
def __init__(self, header, stream, log, encoding=None):
self.log = log
self.encoding = encoding
self.sections = []
for i in range(header.num_sections):
self.sections.append(header.section_data(i))
self.header_record = HeaderRecord(self.section_data(0))
# Initalize the decompressor
self.uncompressor = zlib.decompressobj()
self.uncompressor.decompress(self.section_data(1))
# if self.header_record.version not in (1, 2) or self.header_record.uid != 1:
# raise zTXTError('Unknown book version %i.' % self.header_record.version)
def section_data(self, number):
return self.sections[number]
def decompress_text(self, number):
if number == 1:
self.uncompressor = zlib.decompressobj()
return self.uncompressor.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
def extract_content(self, output_dir):
txt = ''
for i in range(1, self.header_record.num_records + 1):
txt += self.decompress_text(i)
html = txt_to_markdown(txt)
with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
index.write(html.encode('utf-8'))
mi = MetaInformation(_('Unknown'), _('Unknown'))
manifest = [('index.html', None)]
spine = ['index.html']
opf_writer(output_dir, 'metadata.opf', manifest, spine, mi)
return os.path.join(output_dir, 'metadata.opf')

View File

@ -62,12 +62,12 @@ class PDFOutput(OutputFormatPlugin):
self.write(ImagePDFWriter, images)
def convert_text(self, oeb_book):
with TemporaryDirectory('_pdf_out') as oebdir:
with TemporaryDirectory('_pdf_out') as oeb_dir:
from calibre.customize.ui import plugin_for_output_format
oeb_output = plugin_for_output_format('oeb')
oeb_output.convert(oeb, oeb_dir, self.input_plugin, self.opts, self.log)
oeb_output.convert(oeb_book, oeb_dir, self.input_plugin, self.opts, self.log)
opfpath = glob.glob(os.path.join(oebdir, '*.opf'))[0]
opfpath = glob.glob(os.path.join(oeb_dir, '*.opf'))[0]
opf = OPF(opfpath, os.path.dirname(opfpath))
self.write(PDFWriter, [s.path for s in opf.spine])

View File

@ -8,8 +8,7 @@ __docformat__ = 'restructuredtext en'
import os
from calibre.customize.conversion import InputFormatPlugin
from calibre.ebooks.markdown import markdown
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer
class TXTInput(InputFormatPlugin):
@ -25,19 +24,15 @@ class TXTInput(InputFormatPlugin):
ienc = options.input_encoding
txt = stream.read().decode(ienc)
md = markdown.Markdown(
extensions=['footnotes', 'tables', 'toc'],
safe_mode=False,)
html = '<html><head><title /></head><body>'+md.convert(txt)+'</body></html>'
html = txt_to_markdown(txt)
with open('index.html', 'wb') as index:
index.write(html.encode('utf-8'))
from calibre.ebooks.metadata.meta import get_metadata
mi = get_metadata(stream, 'txt')
opf = OPFCreator(os.getcwd(), mi)
opf.create_manifest([('index.html', None)])
opf.create_spine(['index.html'])
with open('metadata.opf', 'wb') as opffile:
opf.render(opffile)
manifest = [('index.html', None)]
spine = ['index.html']
opf_writer(os.getcwd(), 'metadata.opf', manifest, spine, mi)
return os.path.join(os.getcwd(), 'metadata.opf')

View File

@ -0,0 +1,30 @@
# -*- coding: utf-8 -*-
'''
Read content from txt file.
'''
import os
from calibre.ebooks.markdown import markdown
from calibre.ebooks.metadata.opf2 import OPFCreator
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
def txt_to_markdown(txt):
md = markdown.Markdown(
extensions=['footnotes', 'tables', 'toc'],
safe_mode=False,)
html = '<html><head><title /></head><body>'+md.convert(txt)+'</body></html>'
return html
def opf_writer(path, opf_name, manifest, spine, mi):
opf = OPFCreator(path, mi)
opf.create_manifest(manifest)
opf.create_spine(spine)
with open(os.path.join(path, opf_name), 'wb') as opffile:
opf.render(opffile)