mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
palmdoc pdb input.
This commit is contained in:
parent
5c40057adf
commit
e447b69bd2
@ -7,13 +7,13 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
from calibre.ebooks.pdb.ereader.reader import Reader as eReader
|
||||
from calibre.ebooks.pdb.ztxt.reader import Reader as zTXT
|
||||
#from calibre.ebooks.pdb.palmdoc.reader import Reader as PalmDoc
|
||||
from calibre.ebooks.pdb.palmdoc.reader import Reader as PalmDoc
|
||||
|
||||
FORMATS = {
|
||||
'PNPdPPrs' : eReader,
|
||||
'PNRdPPrs' : eReader,
|
||||
'zTXTGPlm' : zTXT,
|
||||
# 'TEXtREAd' : PalmDoc,
|
||||
'TEXtREAd' : PalmDoc,
|
||||
}
|
||||
|
||||
IDENTITY_TO_NAME = {
|
||||
|
0
src/calibre/ebooks/pdb/palmdoc/__init__.py
Normal file
0
src/calibre/ebooks/pdb/palmdoc/__init__.py
Normal file
70
src/calibre/ebooks/pdb/palmdoc/reader.py
Normal file
70
src/calibre/ebooks/pdb/palmdoc/reader.py
Normal file
@ -0,0 +1,70 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
'''
|
||||
Read content from palmdoc pdb file.
|
||||
'''
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, struct, zlib
|
||||
|
||||
from calibre.ebooks.pdb.formatreader import FormatReader
|
||||
from calibre.ebooks.mobi.palmdoc import decompress_doc
|
||||
from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer
|
||||
|
||||
class HeaderRecord(object):
|
||||
'''
|
||||
The first record in the file is always the header record. It holds
|
||||
information related to the location of text, images, and so on
|
||||
in the file. This is used in conjunction with the sections
|
||||
defined in the file header.
|
||||
'''
|
||||
|
||||
def __init__(self, raw):
|
||||
self.compression, = struct.unpack('>H', raw[0:2])
|
||||
self.num_records, = struct.unpack('>H', raw[8:10])
|
||||
|
||||
|
||||
class Reader(FormatReader):
|
||||
|
||||
def __init__(self, header, stream, log, encoding=None):
|
||||
self.stream = stream
|
||||
self.log = log
|
||||
self.encoding = encoding
|
||||
|
||||
self.sections = []
|
||||
for i in range(header.num_sections):
|
||||
self.sections.append(header.section_data(i))
|
||||
|
||||
self.header_record = HeaderRecord(self.section_data(0))
|
||||
|
||||
def section_data(self, number):
|
||||
return self.sections[number]
|
||||
|
||||
def decompress_text(self, number):
|
||||
if self.header_record.compression == 1:
|
||||
return self.section_data(number).decode('cp1252' if self.encoding is None else self.encoding)
|
||||
if self.header_record.compression == 2:
|
||||
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
|
||||
return ''
|
||||
|
||||
def extract_content(self, output_dir):
|
||||
txt = ''
|
||||
|
||||
for i in range(1, self.header_record.num_records + 1):
|
||||
txt += self.decompress_text(i)
|
||||
|
||||
html = txt_to_markdown(txt)
|
||||
with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
|
||||
index.write(html.encode('utf-8'))
|
||||
|
||||
from calibre.ebooks.metadata.meta import get_metadata
|
||||
mi = get_metadata(self.stream, 'pdb')
|
||||
manifest = [('index.html', None)]
|
||||
spine = ['index.html']
|
||||
opf_writer(output_dir, 'metadata.opf', manifest, spine, mi)
|
||||
|
||||
return os.path.join(output_dir, 'metadata.opf')
|
||||
|
@ -8,11 +8,10 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import StringIO, os, struct, zlib
|
||||
import os, struct, zlib
|
||||
|
||||
from calibre.ebooks.pdb.formatreader import FormatReader
|
||||
from calibre.ebooks.pdb.ztxt import zTXTError
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer
|
||||
|
||||
class HeaderRecord(object):
|
||||
|
Loading…
x
Reference in New Issue
Block a user