mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 10:44:09 -04:00
palmdoc pdb input.
This commit is contained in:
parent
5c40057adf
commit
e447b69bd2
@ -7,13 +7,13 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
from calibre.ebooks.pdb.ereader.reader import Reader as eReader
|
from calibre.ebooks.pdb.ereader.reader import Reader as eReader
|
||||||
from calibre.ebooks.pdb.ztxt.reader import Reader as zTXT
|
from calibre.ebooks.pdb.ztxt.reader import Reader as zTXT
|
||||||
#from calibre.ebooks.pdb.palmdoc.reader import Reader as PalmDoc
|
from calibre.ebooks.pdb.palmdoc.reader import Reader as PalmDoc
|
||||||
|
|
||||||
FORMATS = {
|
FORMATS = {
|
||||||
'PNPdPPrs' : eReader,
|
'PNPdPPrs' : eReader,
|
||||||
'PNRdPPrs' : eReader,
|
'PNRdPPrs' : eReader,
|
||||||
'zTXTGPlm' : zTXT,
|
'zTXTGPlm' : zTXT,
|
||||||
# 'TEXtREAd' : PalmDoc,
|
'TEXtREAd' : PalmDoc,
|
||||||
}
|
}
|
||||||
|
|
||||||
IDENTITY_TO_NAME = {
|
IDENTITY_TO_NAME = {
|
||||||
|
0
src/calibre/ebooks/pdb/palmdoc/__init__.py
Normal file
0
src/calibre/ebooks/pdb/palmdoc/__init__.py
Normal file
70
src/calibre/ebooks/pdb/palmdoc/reader.py
Normal file
70
src/calibre/ebooks/pdb/palmdoc/reader.py
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
'''
|
||||||
|
Read content from palmdoc pdb file.
|
||||||
|
'''
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os, struct, zlib
|
||||||
|
|
||||||
|
from calibre.ebooks.pdb.formatreader import FormatReader
|
||||||
|
from calibre.ebooks.mobi.palmdoc import decompress_doc
|
||||||
|
from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer
|
||||||
|
|
||||||
|
class HeaderRecord(object):
|
||||||
|
'''
|
||||||
|
The first record in the file is always the header record. It holds
|
||||||
|
information related to the location of text, images, and so on
|
||||||
|
in the file. This is used in conjunction with the sections
|
||||||
|
defined in the file header.
|
||||||
|
'''
|
||||||
|
|
||||||
|
def __init__(self, raw):
|
||||||
|
self.compression, = struct.unpack('>H', raw[0:2])
|
||||||
|
self.num_records, = struct.unpack('>H', raw[8:10])
|
||||||
|
|
||||||
|
|
||||||
|
class Reader(FormatReader):
|
||||||
|
|
||||||
|
def __init__(self, header, stream, log, encoding=None):
|
||||||
|
self.stream = stream
|
||||||
|
self.log = log
|
||||||
|
self.encoding = encoding
|
||||||
|
|
||||||
|
self.sections = []
|
||||||
|
for i in range(header.num_sections):
|
||||||
|
self.sections.append(header.section_data(i))
|
||||||
|
|
||||||
|
self.header_record = HeaderRecord(self.section_data(0))
|
||||||
|
|
||||||
|
def section_data(self, number):
|
||||||
|
return self.sections[number]
|
||||||
|
|
||||||
|
def decompress_text(self, number):
|
||||||
|
if self.header_record.compression == 1:
|
||||||
|
return self.section_data(number).decode('cp1252' if self.encoding is None else self.encoding)
|
||||||
|
if self.header_record.compression == 2:
|
||||||
|
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
|
||||||
|
return ''
|
||||||
|
|
||||||
|
def extract_content(self, output_dir):
|
||||||
|
txt = ''
|
||||||
|
|
||||||
|
for i in range(1, self.header_record.num_records + 1):
|
||||||
|
txt += self.decompress_text(i)
|
||||||
|
|
||||||
|
html = txt_to_markdown(txt)
|
||||||
|
with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
|
||||||
|
index.write(html.encode('utf-8'))
|
||||||
|
|
||||||
|
from calibre.ebooks.metadata.meta import get_metadata
|
||||||
|
mi = get_metadata(self.stream, 'pdb')
|
||||||
|
manifest = [('index.html', None)]
|
||||||
|
spine = ['index.html']
|
||||||
|
opf_writer(output_dir, 'metadata.opf', manifest, spine, mi)
|
||||||
|
|
||||||
|
return os.path.join(output_dir, 'metadata.opf')
|
||||||
|
|
@ -8,11 +8,10 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import StringIO, os, struct, zlib
|
import os, struct, zlib
|
||||||
|
|
||||||
from calibre.ebooks.pdb.formatreader import FormatReader
|
from calibre.ebooks.pdb.formatreader import FormatReader
|
||||||
from calibre.ebooks.pdb.ztxt import zTXTError
|
from calibre.ebooks.pdb.ztxt import zTXTError
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
|
||||||
from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer
|
from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer
|
||||||
|
|
||||||
class HeaderRecord(object):
|
class HeaderRecord(object):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user