diff --git a/src/calibre/ebooks/pdb/__init__.py b/src/calibre/ebooks/pdb/__init__.py index c4e0349f7b..614d610078 100644 --- a/src/calibre/ebooks/pdb/__init__.py +++ b/src/calibre/ebooks/pdb/__init__.py @@ -6,18 +6,18 @@ __copyright__ = '2009, John Schember ' __docformat__ = 'restructuredtext en' from calibre.ebooks.pdb.ereader.reader import Reader as eReader -from calibre.ebooks.pdb.plucker.reader import Reader as Plucker +from calibre.ebooks.pdb.ztxt.reader import Reader as zTXT FORMATS = { 'PNPdPPrs' : eReader, 'PNRdPPrs' : eReader, - 'DataPlkr' : Plucker, + 'zTXTGPlm' : zTXT, } IDENTITY_TO_NAME = { 'PNPdPPrs' : 'eReader', 'PNRdPPrs' : 'eReader', - 'DataPlkr' : 'Plucker', + 'zTXTGPlm' : 'zTXT', } class PDBError(Exception): diff --git a/src/calibre/ebooks/pdb/ztxt/__init__.py b/src/calibre/ebooks/pdb/ztxt/__init__.py new file mode 100644 index 0000000000..2c2028b74f --- /dev/null +++ b/src/calibre/ebooks/pdb/ztxt/__init__.py @@ -0,0 +1,11 @@ +# -*- coding: utf-8 -*- + +__license__ = 'GPL v3' +__copyright__ = '2009, John Schember ' +__docformat__ = 'restructuredtext en' + +import os + +class zTXTError(Exception): + pass + diff --git a/src/calibre/ebooks/pdb/ztxt/reader.py b/src/calibre/ebooks/pdb/ztxt/reader.py new file mode 100644 index 0000000000..cfd5ba6e79 --- /dev/null +++ b/src/calibre/ebooks/pdb/ztxt/reader.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- + +''' +Read content from ztxt pdb file. +''' + +__license__ = 'GPL v3' +__copyright__ = '2009, John Schember ' +__docformat__ = 'restructuredtext en' + +import StringIO, os, struct, zlib + +from calibre.ebooks.pdb.formatreader import FormatReader +from calibre.ebooks.pdb.ztxt import zTXTError +from calibre.ebooks.metadata import MetaInformation +from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer + +class HeaderRecord(object): + ''' + The first record in the file is always the header record. It holds + information related to the location of text, images, and so on + in the file. This is used in conjunction with the sections + defined in the file header. + ''' + + def __init__(self, raw): + self.version, = struct.unpack('>H', raw[0:2]) + self.num_records, = struct.unpack('>H', raw[2:4]) + self.size, = struct.unpack('>L', raw[4:8]) + self.record_size, = struct.unpack('>H', raw[8:10]) + self.crc32, = struct.unpack('>L', raw[18:22]) + + +class Reader(FormatReader): + + def __init__(self, header, stream, log, encoding=None): + self.log = log + self.encoding = encoding + + self.sections = [] + for i in range(header.num_sections): + self.sections.append(header.section_data(i)) + + self.header_record = HeaderRecord(self.section_data(0)) + + # Initalize the decompressor + self.uncompressor = zlib.decompressobj() + self.uncompressor.decompress(self.section_data(1)) + +# if self.header_record.version not in (1, 2) or self.header_record.uid != 1: +# raise zTXTError('Unknown book version %i.' % self.header_record.version) + + + def section_data(self, number): + return self.sections[number] + + def decompress_text(self, number): + if number == 1: + self.uncompressor = zlib.decompressobj() + return self.uncompressor.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding) + + def extract_content(self, output_dir): + txt = '' + + for i in range(1, self.header_record.num_records + 1): + txt += self.decompress_text(i) + + html = txt_to_markdown(txt) + with open(os.path.join(output_dir, 'index.html'), 'wb') as index: + index.write(html.encode('utf-8')) + + mi = MetaInformation(_('Unknown'), _('Unknown')) + manifest = [('index.html', None)] + spine = ['index.html'] + opf_writer(output_dir, 'metadata.opf', manifest, spine, mi) + + return os.path.join(output_dir, 'metadata.opf') + diff --git a/src/calibre/ebooks/pdf/output.py b/src/calibre/ebooks/pdf/output.py index 4eb23877d9..ae44d270f7 100644 --- a/src/calibre/ebooks/pdf/output.py +++ b/src/calibre/ebooks/pdf/output.py @@ -62,12 +62,12 @@ class PDFOutput(OutputFormatPlugin): self.write(ImagePDFWriter, images) def convert_text(self, oeb_book): - with TemporaryDirectory('_pdf_out') as oebdir: + with TemporaryDirectory('_pdf_out') as oeb_dir: from calibre.customize.ui import plugin_for_output_format oeb_output = plugin_for_output_format('oeb') - oeb_output.convert(oeb, oeb_dir, self.input_plugin, self.opts, self.log) + oeb_output.convert(oeb_book, oeb_dir, self.input_plugin, self.opts, self.log) - opfpath = glob.glob(os.path.join(oebdir, '*.opf'))[0] + opfpath = glob.glob(os.path.join(oeb_dir, '*.opf'))[0] opf = OPF(opfpath, os.path.dirname(opfpath)) self.write(PDFWriter, [s.path for s in opf.spine]) diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index 34fafc91fc..b94d3be467 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -8,8 +8,7 @@ __docformat__ = 'restructuredtext en' import os from calibre.customize.conversion import InputFormatPlugin -from calibre.ebooks.markdown import markdown -from calibre.ebooks.metadata.opf2 import OPFCreator +from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer class TXTInput(InputFormatPlugin): @@ -25,19 +24,15 @@ class TXTInput(InputFormatPlugin): ienc = options.input_encoding txt = stream.read().decode(ienc) - md = markdown.Markdown( - extensions=['footnotes', 'tables', 'toc'], - safe_mode=False,) - html = '</head><body>'+md.convert(txt)+'</body></html>' + html = txt_to_markdown(txt) with open('index.html', 'wb') as index: index.write(html.encode('utf-8')) from calibre.ebooks.metadata.meta import get_metadata mi = get_metadata(stream, 'txt') - opf = OPFCreator(os.getcwd(), mi) - opf.create_manifest([('index.html', None)]) - opf.create_spine(['index.html']) - with open('metadata.opf', 'wb') as opffile: - opf.render(opffile) + manifest = [('index.html', None)] + spine = ['index.html'] + opf_writer(os.getcwd(), 'metadata.opf', manifest, spine, mi) return os.path.join(os.getcwd(), 'metadata.opf') + diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py new file mode 100644 index 0000000000..c8f2690622 --- /dev/null +++ b/src/calibre/ebooks/txt/processor.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- + +''' +Read content from txt file. +''' + +import os + +from calibre.ebooks.markdown import markdown +from calibre.ebooks.metadata.opf2 import OPFCreator + +__license__ = 'GPL v3' +__copyright__ = '2009, John Schember <john@nachtimwald.com>' +__docformat__ = 'restructuredtext en' + +def txt_to_markdown(txt): + md = markdown.Markdown( + extensions=['footnotes', 'tables', 'toc'], + safe_mode=False,) + html = '<html><head><title /></head><body>'+md.convert(txt)+'</body></html>' + + return html + +def opf_writer(path, opf_name, manifest, spine, mi): + opf = OPFCreator(path, mi) + opf.create_manifest(manifest) + opf.create_spine(spine) + with open(os.path.join(path, opf_name), 'wb') as opffile: + opf.render(opffile) +