diff --git a/src/calibre/ebooks/pdb/__init__.py b/src/calibre/ebooks/pdb/__init__.py index a7fb2760fd..1bf8ebeb89 100644 --- a/src/calibre/ebooks/pdb/__init__.py +++ b/src/calibre/ebooks/pdb/__init__.py @@ -7,13 +7,13 @@ __docformat__ = 'restructuredtext en' from calibre.ebooks.pdb.ereader.reader import Reader as eReader from calibre.ebooks.pdb.ztxt.reader import Reader as zTXT -#from calibre.ebooks.pdb.palmdoc.reader import Reader as PalmDoc +from calibre.ebooks.pdb.palmdoc.reader import Reader as PalmDoc FORMATS = { 'PNPdPPrs' : eReader, 'PNRdPPrs' : eReader, 'zTXTGPlm' : zTXT, -# 'TEXtREAd' : PalmDoc, + 'TEXtREAd' : PalmDoc, } IDENTITY_TO_NAME = { diff --git a/src/calibre/ebooks/pdb/palmdoc/__init__.py b/src/calibre/ebooks/pdb/palmdoc/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/calibre/ebooks/pdb/palmdoc/reader.py b/src/calibre/ebooks/pdb/palmdoc/reader.py new file mode 100644 index 0000000000..a5a58b4d81 --- /dev/null +++ b/src/calibre/ebooks/pdb/palmdoc/reader.py @@ -0,0 +1,70 @@ +# -*- coding: utf-8 -*- + +''' +Read content from palmdoc pdb file. +''' + +__license__ = 'GPL v3' +__copyright__ = '2009, John Schember ' +__docformat__ = 'restructuredtext en' + +import os, struct, zlib + +from calibre.ebooks.pdb.formatreader import FormatReader +from calibre.ebooks.mobi.palmdoc import decompress_doc +from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer + +class HeaderRecord(object): + ''' + The first record in the file is always the header record. It holds + information related to the location of text, images, and so on + in the file. This is used in conjunction with the sections + defined in the file header. + ''' + + def __init__(self, raw): + self.compression, = struct.unpack('>H', raw[0:2]) + self.num_records, = struct.unpack('>H', raw[8:10]) + + +class Reader(FormatReader): + + def __init__(self, header, stream, log, encoding=None): + self.stream = stream + self.log = log + self.encoding = encoding + + self.sections = [] + for i in range(header.num_sections): + self.sections.append(header.section_data(i)) + + self.header_record = HeaderRecord(self.section_data(0)) + + def section_data(self, number): + return self.sections[number] + + def decompress_text(self, number): + if self.header_record.compression == 1: + return self.section_data(number).decode('cp1252' if self.encoding is None else self.encoding) + if self.header_record.compression == 2: + return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding) + return '' + + def extract_content(self, output_dir): + txt = '' + + for i in range(1, self.header_record.num_records + 1): + txt += self.decompress_text(i) + + html = txt_to_markdown(txt) + with open(os.path.join(output_dir, 'index.html'), 'wb') as index: + index.write(html.encode('utf-8')) + + from calibre.ebooks.metadata.meta import get_metadata + mi = get_metadata(self.stream, 'pdb') + manifest = [('index.html', None)] + spine = ['index.html'] + opf_writer(output_dir, 'metadata.opf', manifest, spine, mi) + + return os.path.join(output_dir, 'metadata.opf') + diff --git a/src/calibre/ebooks/pdb/ztxt/reader.py b/src/calibre/ebooks/pdb/ztxt/reader.py index 19c04b66b4..3d96018def 100644 --- a/src/calibre/ebooks/pdb/ztxt/reader.py +++ b/src/calibre/ebooks/pdb/ztxt/reader.py @@ -8,11 +8,10 @@ __license__ = 'GPL v3' __copyright__ = '2009, John Schember ' __docformat__ = 'restructuredtext en' -import StringIO, os, struct, zlib +import os, struct, zlib from calibre.ebooks.pdb.formatreader import FormatReader from calibre.ebooks.pdb.ztxt import zTXTError -from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer class HeaderRecord(object):