Fix but in pdfoutput. Add ztxt input.

2025-11-13 10:06:59 -05:00 · 2009-05-03 10:54:07 -04:00 · 2009-05-03 10:54:07 -04:00 · daf6e43523
commit daf6e43523
parent 27407c779a
6 changed files with 131 additions and 17 deletions
--- a/src/calibre/ebooks/pdb/init.py
+++ b/src/calibre/ebooks/pdb/init.py
@ -6,18 +6,18 @@ __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

 from calibre.ebooks.pdb.ereader.reader import Reader as eReader
-from calibre.ebooks.pdb.plucker.reader import Reader as Plucker
+from calibre.ebooks.pdb.ztxt.reader import Reader as zTXT

 FORMATS = {
    'PNPdPPrs' : eReader,
    'PNRdPPrs' : eReader,
-    'DataPlkr' : Plucker,
+    'zTXTGPlm' : zTXT,
 }

 IDENTITY_TO_NAME = {
    'PNPdPPrs' : 'eReader',
    'PNRdPPrs' : 'eReader',
-    'DataPlkr' : 'Plucker',
+    'zTXTGPlm' : 'zTXT',
 }

 class PDBError(Exception):
--- a/src/calibre/ebooks/pdb/ztxt/init.py
+++ b/src/calibre/ebooks/pdb/ztxt/init.py
@ -0,0 +1,11 @@
+# -*- coding: utf-8 -*-
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import os
+
+class zTXTError(Exception):
+    pass
+
--- a/src/calibre/ebooks/pdb/ztxt/reader.py
+++ b/src/calibre/ebooks/pdb/ztxt/reader.py
@ -0,0 +1,78 @@
+# -*- coding: utf-8 -*-
+
+'''
+Read content from ztxt pdb file.
+'''
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import StringIO, os, struct, zlib
+
+from calibre.ebooks.pdb.formatreader import FormatReader
+from calibre.ebooks.pdb.ztxt import zTXTError
+from calibre.ebooks.metadata import MetaInformation
+from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer
+
+class HeaderRecord(object):
+    '''
+    The first record in the file is always the header record. It holds
+    information related to the location of text, images, and so on
+    in the file. This is used in conjunction with the sections
+    defined in the file header.
+    '''
+
+    def __init__(self, raw):
+        self.version, = struct.unpack('>H', raw[0:2])
+        self.num_records, = struct.unpack('>H', raw[2:4])
+        self.size, = struct.unpack('>L', raw[4:8])
+        self.record_size, = struct.unpack('>H', raw[8:10])
+        self.crc32, = struct.unpack('>L', raw[18:22])
+        
+    
+class Reader(FormatReader):
+    
+    def __init__(self, header, stream, log, encoding=None):
+        self.log = log
+        self.encoding = encoding
+    
+        self.sections = []
+        for i in range(header.num_sections):
+            self.sections.append(header.section_data(i))
+
+        self.header_record = HeaderRecord(self.section_data(0))
+
+        # Initalize the decompressor
+        self.uncompressor = zlib.decompressobj()
+        self.uncompressor.decompress(self.section_data(1))
+        
+#        if self.header_record.version not in (1, 2) or self.header_record.uid != 1:
+#            raise zTXTError('Unknown book version %i.' % self.header_record.version)
+
+
+    def section_data(self, number):
+        return self.sections[number]
+
+    def decompress_text(self, number):
+        if number == 1:
+            self.uncompressor = zlib.decompressobj()
+        return self.uncompressor.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
+
+    def extract_content(self, output_dir):
+        txt = ''
+        
+        for i in range(1, self.header_record.num_records + 1):
+            txt += self.decompress_text(i)
+
+        html = txt_to_markdown(txt)
+        with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
+            index.write(html.encode('utf-8'))
+                        
+        mi = MetaInformation(_('Unknown'), _('Unknown'))
+        manifest = [('index.html', None)]
+        spine = ['index.html']
+        opf_writer(output_dir, 'metadata.opf', manifest, spine, mi)
+        
+        return os.path.join(output_dir, 'metadata.opf')
+
--- a/src/calibre/ebooks/pdf/output.py
+++ b/src/calibre/ebooks/pdf/output.py
@ -62,12 +62,12 @@ class PDFOutput(OutputFormatPlugin):
        self.write(ImagePDFWriter, images)
            
    def convert_text(self, oeb_book):
-        with TemporaryDirectory('_pdf_out') as oebdir:
+        with TemporaryDirectory('_pdf_out') as oeb_dir:
            from calibre.customize.ui import plugin_for_output_format
            oeb_output = plugin_for_output_format('oeb')
-            oeb_output.convert(oeb, oeb_dir, self.input_plugin, self.opts, self.log)
+            oeb_output.convert(oeb_book, oeb_dir, self.input_plugin, self.opts, self.log)
        
-            opfpath = glob.glob(os.path.join(oebdir, '*.opf'))[0]
+            opfpath = glob.glob(os.path.join(oeb_dir, '*.opf'))[0]
            opf = OPF(opfpath, os.path.dirname(opfpath))
            
            self.write(PDFWriter, [s.path for s in opf.spine])
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@ -8,8 +8,7 @@ __docformat__ = 'restructuredtext en'
 import os

 from calibre.customize.conversion import InputFormatPlugin
-from calibre.ebooks.markdown import markdown
-from calibre.ebooks.metadata.opf2 import OPFCreator
+from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer

 class TXTInput(InputFormatPlugin):
    
@ -25,19 +24,15 @@ class TXTInput(InputFormatPlugin):
            ienc = options.input_encoding
        txt = stream.read().decode(ienc)
        
-        md = markdown.Markdown(
-            extensions=['footnotes', 'tables', 'toc'],
-            safe_mode=False,)
-        html = '<html><head><title /></head><body>'+md.convert(txt)+'</body></html>'
+        html = txt_to_markdown(txt)
        with open('index.html', 'wb') as index:
            index.write(html.encode('utf-8'))
            
        from calibre.ebooks.metadata.meta import get_metadata
        mi = get_metadata(stream, 'txt')
-        opf = OPFCreator(os.getcwd(), mi)
-        opf.create_manifest([('index.html', None)])
-        opf.create_spine(['index.html'])
-        with open('metadata.opf', 'wb') as opffile:
-            opf.render(opffile)
+        manifest = [('index.html', None)]
+        spine = ['index.html']
+        opf_writer(os.getcwd(), 'metadata.opf', manifest, spine, mi)
        
        return os.path.join(os.getcwd(), 'metadata.opf')
+
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@ -0,0 +1,30 @@
+# -*- coding: utf-8 -*-
+
+'''
+Read content from txt file.
+'''
+
+import os
+
+from calibre.ebooks.markdown import markdown
+from calibre.ebooks.metadata.opf2 import OPFCreator
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+def txt_to_markdown(txt):
+    md = markdown.Markdown(
+        extensions=['footnotes', 'tables', 'toc'],
+        safe_mode=False,)
+    html = '<html><head><title /></head><body>'+md.convert(txt)+'</body></html>'
+    
+    return html
+
+def opf_writer(path, opf_name, manifest, spine, mi):
+    opf = OPFCreator(path, mi)
+    opf.create_manifest(manifest)
+    opf.create_spine(spine)
+    with open(os.path.join(path, opf_name), 'wb') as opffile:
+        opf.render(opffile)
+