Pull from driver dev

2025-07-09 03:04:10 -04:00 · 2009-05-03 10:59:12 -07:00 · 2009-05-03 10:59:12 -07:00 · 4afc1a7106
commit 4afc1a7106
parent cfa74fb1fd e447b69bd2
12 changed files with 228 additions and 18 deletions
--- a/src/calibre/ebooks/metadata/pdb.py
+++ b/src/calibre/ebooks/metadata/pdb.py
@ -29,7 +29,7 @@ def get_metadata(stream, extract_cover=True):
    MetadataReader = MREADER.get(pheader.ident, None)

    if MetadataReader is None:
-        return MetaInformation(_('Unknown'), [_('Unknown')])
+        return MetaInformation(pheader.title, [_('Unknown')])

    
    return MetadataReader(stream, extract_cover)
--- a/src/calibre/ebooks/pdb/init.py
+++ b/src/calibre/ebooks/pdb/init.py
@ -6,15 +6,46 @@ __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

 from calibre.ebooks.pdb.ereader.reader import Reader as eReader
+from calibre.ebooks.pdb.ztxt.reader import Reader as zTXT
+from calibre.ebooks.pdb.palmdoc.reader import Reader as PalmDoc

 FORMATS = {
    'PNPdPPrs' : eReader,
    'PNRdPPrs' : eReader,
+    'zTXTGPlm' : zTXT,
+    'TEXtREAd' : PalmDoc,
 }

 IDENTITY_TO_NAME = {
    'PNPdPPrs' : 'eReader',
    'PNRdPPrs' : 'eReader',
+    'zTXTGPlm' : 'zTXT',
+    'TEXtREAd' : 'PalmDOC',
+    
+    '.pdfADBE' : 'Adobe Reader',
+    'BVokBDIC' : 'BDicty',
+    'DB99DBOS' : 'DB (Database program)',
+    'vIMGView' : 'FireViewer (ImageViewer)',
+    'PmDBPmDB' : 'HanDBase',
+    'InfoINDB' : 'InfoView',
+    'ToGoToGo' : 'iSilo',
+    'SDocSilX' : 'iSilo 3',
+    'JbDbJBas' : 'JFile',
+    'JfDbJFil' : 'JFile Pro',
+    'DATALSdb' : 'LIST',
+    'Mdb1Mdb1' : 'MobileDB',
+    'BOOKMOBI' : 'MobiPocket',
+    'DataPlkr' : 'Plucker',
+    'DataSprd' : 'QuickSheet',
+    'SM01SMem' : 'SuperMemo',
+    'TEXtTlDc' : 'TealDoc',
+    'InfoTlIf' : 'TealInfo',
+    'DataTlMl' : 'TealMeal',
+    'DataTlPt' : 'TealPaint',
+    'dataTDBP' : 'ThinkDB',
+    'TdatTide' : 'Tides',
+    'ToRaTRPW' : 'TomeRaider',
+    'BDOCWrdS' : 'WordSmith',
 }

 class PDBError(Exception):
--- a/src/calibre/ebooks/pdb/ereader/init.py
+++ b/src/calibre/ebooks/pdb/ereader/init.py
@ -1,5 +1,4 @@
 # -*- coding: utf-8 -*-
-from __future__ import with_statement

 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
--- a/src/calibre/ebooks/pdb/input.py
+++ b/src/calibre/ebooks/pdb/input.py
@ -24,7 +24,7 @@ class PDBInput(InputFormatPlugin):
        Reader = get_reader(header.ident)

        if Reader is None:
-            raise PDBError('Unknown format in pdb file. Identity is %s' % header.identity)
+            raise PDBError('No reader avaliable for format within container.\n Identity is %s. Book type is %s' % (header.ident, IDENTITY_TO_NAME.get(header.ident, _('Unknown'))))

        log.debug('Detected ebook format as: %s with identity: %s' % (IDENTITY_TO_NAME[header.ident], header.ident))

--- a/src/calibre/ebooks/pdb/palmdoc/init.py
+++ b/src/calibre/ebooks/pdb/palmdoc/init.py
--- a/src/calibre/ebooks/pdb/palmdoc/reader.py
+++ b/src/calibre/ebooks/pdb/palmdoc/reader.py
@ -0,0 +1,70 @@
+# -*- coding: utf-8 -*-
+
+'''
+Read content from palmdoc pdb file.
+'''
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import os, struct, zlib
+
+from calibre.ebooks.pdb.formatreader import FormatReader
+from calibre.ebooks.mobi.palmdoc import decompress_doc
+from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer
+
+class HeaderRecord(object):
+    '''
+    The first record in the file is always the header record. It holds
+    information related to the location of text, images, and so on
+    in the file. This is used in conjunction with the sections
+    defined in the file header.
+    '''
+
+    def __init__(self, raw):
+        self.compression, = struct.unpack('>H', raw[0:2])
+        self.num_records, = struct.unpack('>H', raw[8:10])
+        
+    
+class Reader(FormatReader):
+    
+    def __init__(self, header, stream, log, encoding=None):
+        self.stream = stream
+        self.log = log
+        self.encoding = encoding
+    
+        self.sections = []
+        for i in range(header.num_sections):
+            self.sections.append(header.section_data(i))
+
+        self.header_record = HeaderRecord(self.section_data(0))
+
+    def section_data(self, number):
+        return self.sections[number]
+
+    def decompress_text(self, number):
+        if self.header_record.compression == 1:
+            return self.section_data(number).decode('cp1252' if self.encoding is None else self.encoding)
+        if self.header_record.compression == 2:
+            return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
+        return ''
+
+    def extract_content(self, output_dir):
+        txt = ''
+        
+        for i in range(1, self.header_record.num_records + 1):
+            txt += self.decompress_text(i)
+
+        html = txt_to_markdown(txt)
+        with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
+            index.write(html.encode('utf-8'))
+                        
+        from calibre.ebooks.metadata.meta import get_metadata
+        mi = get_metadata(self.stream, 'pdb')
+        manifest = [('index.html', None)]
+        spine = ['index.html']
+        opf_writer(output_dir, 'metadata.opf', manifest, spine, mi)
+        
+        return os.path.join(output_dir, 'metadata.opf')
+
--- a/src/calibre/ebooks/pdb/ztxt/init.py
+++ b/src/calibre/ebooks/pdb/ztxt/init.py
@ -0,0 +1,11 @@
+# -*- coding: utf-8 -*-
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import os
+
+class zTXTError(Exception):
+    pass
+
--- a/src/calibre/ebooks/pdb/ztxt/reader.py
+++ b/src/calibre/ebooks/pdb/ztxt/reader.py
@ -0,0 +1,74 @@
+# -*- coding: utf-8 -*-
+
+'''
+Read content from ztxt pdb file.
+'''
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import os, struct, zlib
+
+from calibre.ebooks.pdb.formatreader import FormatReader
+from calibre.ebooks.pdb.ztxt import zTXTError
+from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer
+
+class HeaderRecord(object):
+    '''
+    The first record in the file is always the header record. It holds
+    information related to the location of text, images, and so on
+    in the file. This is used in conjunction with the sections
+    defined in the file header.
+    '''
+
+    def __init__(self, raw):
+        self.version, = struct.unpack('>H', raw[0:2])
+        self.num_records, = struct.unpack('>H', raw[2:4])
+        self.size, = struct.unpack('>L', raw[4:8])
+        self.record_size, = struct.unpack('>H', raw[8:10])
+        
+    
+class Reader(FormatReader):
+    
+    def __init__(self, header, stream, log, encoding=None):
+        self.stream = stream
+        self.log = log
+        self.encoding = encoding
+    
+        self.sections = []
+        for i in range(header.num_sections):
+            self.sections.append(header.section_data(i))
+
+        self.header_record = HeaderRecord(self.section_data(0))
+
+        # Initalize the decompressor
+        self.uncompressor = zlib.decompressobj()
+        self.uncompressor.decompress(self.section_data(1))
+
+    def section_data(self, number):
+        return self.sections[number]
+
+    def decompress_text(self, number):
+        if number == 1:
+            self.uncompressor = zlib.decompressobj()
+        return self.uncompressor.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
+
+    def extract_content(self, output_dir):
+        txt = ''
+        
+        for i in range(1, self.header_record.num_records + 1):
+            txt += self.decompress_text(i)
+
+        html = txt_to_markdown(txt)
+        with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
+            index.write(html.encode('utf-8'))
+                        
+        from calibre.ebooks.metadata.meta import get_metadata
+        mi = get_metadata(self.stream, 'pdb')
+        manifest = [('index.html', None)]
+        spine = ['index.html']
+        opf_writer(output_dir, 'metadata.opf', manifest, spine, mi)
+        
+        return os.path.join(output_dir, 'metadata.opf')
+
--- a/src/calibre/ebooks/pdf/output.py
+++ b/src/calibre/ebooks/pdf/output.py
@ -62,12 +62,12 @@ class PDFOutput(OutputFormatPlugin):
        self.write(ImagePDFWriter, images)
            
    def convert_text(self, oeb_book):
-        with TemporaryDirectory('_pdf_out') as oebdir:
+        with TemporaryDirectory('_pdf_out') as oeb_dir:
            from calibre.customize.ui import plugin_for_output_format
            oeb_output = plugin_for_output_format('oeb')
-            oeb_output.convert(oeb, oeb_dir, self.input_plugin, self.opts, self.log)
+            oeb_output.convert(oeb_book, oeb_dir, self.input_plugin, self.opts, self.log)
        
-            opfpath = glob.glob(os.path.join(oebdir, '*.opf'))[0]
+            opfpath = glob.glob(os.path.join(oeb_dir, '*.opf'))[0]
            opf = OPF(opfpath, os.path.dirname(opfpath))
            
            self.write(PDFWriter, [s.path for s in opf.spine])
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@ -8,8 +8,7 @@ __docformat__ = 'restructuredtext en'
 import os

 from calibre.customize.conversion import InputFormatPlugin
-from calibre.ebooks.markdown import markdown
-from calibre.ebooks.metadata.opf2 import OPFCreator
+from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer

 class TXTInput(InputFormatPlugin):
    
@ -25,19 +24,15 @@ class TXTInput(InputFormatPlugin):
            ienc = options.input_encoding
        txt = stream.read().decode(ienc)
        
-        md = markdown.Markdown(
-            extensions=['footnotes', 'tables', 'toc'],
-            safe_mode=False,)
-        html = '<html><head><title /></head><body>'+md.convert(txt)+'</body></html>'
+        html = txt_to_markdown(txt)
        with open('index.html', 'wb') as index:
            index.write(html.encode('utf-8'))
            
        from calibre.ebooks.metadata.meta import get_metadata
        mi = get_metadata(stream, 'txt')
-        opf = OPFCreator(os.getcwd(), mi)
-        opf.create_manifest([('index.html', None)])
-        opf.create_spine(['index.html'])
-        with open('metadata.opf', 'wb') as opffile:
-            opf.render(opffile)
+        manifest = [('index.html', None)]
+        spine = ['index.html']
+        opf_writer(os.getcwd(), 'metadata.opf', manifest, spine, mi)
        
        return os.path.join(os.getcwd(), 'metadata.opf')
+
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@ -0,0 +1,30 @@
+# -*- coding: utf-8 -*-
+
+'''
+Read content from txt file.
+'''
+
+import os
+
+from calibre.ebooks.markdown import markdown
+from calibre.ebooks.metadata.opf2 import OPFCreator
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+def txt_to_markdown(txt):
+    md = markdown.Markdown(
+        extensions=['footnotes', 'tables', 'toc'],
+        safe_mode=False,)
+    html = '<html><head><title /></head><body>'+md.convert(txt)+'</body></html>'
+    
+    return html
+
+def opf_writer(path, opf_name, manifest, spine, mi):
+    opf = OPFCreator(path, mi)
+    opf.create_manifest(manifest)
+    opf.create_spine(spine)
+    with open(os.path.join(path, opf_name), 'wb') as opffile:
+        opf.render(opffile)
+