diff --git a/src/calibre/ebooks/metadata/pdb.py b/src/calibre/ebooks/metadata/pdb.py index e473925b87..a6f7c6796b 100644 --- a/src/calibre/ebooks/metadata/pdb.py +++ b/src/calibre/ebooks/metadata/pdb.py @@ -29,7 +29,7 @@ def get_metadata(stream, extract_cover=True): MetadataReader = MREADER.get(pheader.ident, None) if MetadataReader is None: - return MetaInformation(_('Unknown'), [_('Unknown')]) + return MetaInformation(pheader.title, [_('Unknown')]) return MetadataReader(stream, extract_cover) diff --git a/src/calibre/ebooks/pdb/__init__.py b/src/calibre/ebooks/pdb/__init__.py index 8c4f45337f..1bf8ebeb89 100644 --- a/src/calibre/ebooks/pdb/__init__.py +++ b/src/calibre/ebooks/pdb/__init__.py @@ -6,15 +6,46 @@ __copyright__ = '2009, John Schember ' __docformat__ = 'restructuredtext en' from calibre.ebooks.pdb.ereader.reader import Reader as eReader +from calibre.ebooks.pdb.ztxt.reader import Reader as zTXT +from calibre.ebooks.pdb.palmdoc.reader import Reader as PalmDoc FORMATS = { 'PNPdPPrs' : eReader, 'PNRdPPrs' : eReader, + 'zTXTGPlm' : zTXT, + 'TEXtREAd' : PalmDoc, } IDENTITY_TO_NAME = { 'PNPdPPrs' : 'eReader', 'PNRdPPrs' : 'eReader', + 'zTXTGPlm' : 'zTXT', + 'TEXtREAd' : 'PalmDOC', + + '.pdfADBE' : 'Adobe Reader', + 'BVokBDIC' : 'BDicty', + 'DB99DBOS' : 'DB (Database program)', + 'vIMGView' : 'FireViewer (ImageViewer)', + 'PmDBPmDB' : 'HanDBase', + 'InfoINDB' : 'InfoView', + 'ToGoToGo' : 'iSilo', + 'SDocSilX' : 'iSilo 3', + 'JbDbJBas' : 'JFile', + 'JfDbJFil' : 'JFile Pro', + 'DATALSdb' : 'LIST', + 'Mdb1Mdb1' : 'MobileDB', + 'BOOKMOBI' : 'MobiPocket', + 'DataPlkr' : 'Plucker', + 'DataSprd' : 'QuickSheet', + 'SM01SMem' : 'SuperMemo', + 'TEXtTlDc' : 'TealDoc', + 'InfoTlIf' : 'TealInfo', + 'DataTlMl' : 'TealMeal', + 'DataTlPt' : 'TealPaint', + 'dataTDBP' : 'ThinkDB', + 'TdatTide' : 'Tides', + 'ToRaTRPW' : 'TomeRaider', + 'BDOCWrdS' : 'WordSmith', } class PDBError(Exception): diff --git a/src/calibre/ebooks/pdb/ereader/__init__.py b/src/calibre/ebooks/pdb/ereader/__init__.py index b39467c6e3..185a44d1a9 100644 --- a/src/calibre/ebooks/pdb/ereader/__init__.py +++ b/src/calibre/ebooks/pdb/ereader/__init__.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2009, John Schember ' diff --git a/src/calibre/ebooks/pdb/ereader/reader.py b/src/calibre/ebooks/pdb/ereader/reader.py index ecf5c706c4..a1ab0a7a65 100644 --- a/src/calibre/ebooks/pdb/ereader/reader.py +++ b/src/calibre/ebooks/pdb/ereader/reader.py @@ -16,7 +16,7 @@ from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.pdb.formatreader import FormatReader from calibre.ebooks.pdb.ereader import EreaderError from calibre.ebooks.pml.pmlconverter import pml_to_html, \ - footnote_sidebar_to_html + footnote_sidebar_to_html from calibre.ebooks.mobi.palmdoc import decompress_doc from calibre.ebooks.metadata.opf2 import OPFCreator diff --git a/src/calibre/ebooks/pdb/input.py b/src/calibre/ebooks/pdb/input.py index 24bc8a1025..1a7e32e3eb 100644 --- a/src/calibre/ebooks/pdb/input.py +++ b/src/calibre/ebooks/pdb/input.py @@ -24,7 +24,7 @@ class PDBInput(InputFormatPlugin): Reader = get_reader(header.ident) if Reader is None: - raise PDBError('Unknown format in pdb file. Identity is %s' % header.identity) + raise PDBError('No reader avaliable for format within container.\n Identity is %s. Book type is %s' % (header.ident, IDENTITY_TO_NAME.get(header.ident, _('Unknown')))) log.debug('Detected ebook format as: %s with identity: %s' % (IDENTITY_TO_NAME[header.ident], header.ident)) diff --git a/src/calibre/ebooks/pdb/palmdoc/__init__.py b/src/calibre/ebooks/pdb/palmdoc/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/calibre/ebooks/pdb/palmdoc/reader.py b/src/calibre/ebooks/pdb/palmdoc/reader.py new file mode 100644 index 0000000000..a5a58b4d81 --- /dev/null +++ b/src/calibre/ebooks/pdb/palmdoc/reader.py @@ -0,0 +1,70 @@ +# -*- coding: utf-8 -*- + +''' +Read content from palmdoc pdb file. +''' + +__license__ = 'GPL v3' +__copyright__ = '2009, John Schember ' +__docformat__ = 'restructuredtext en' + +import os, struct, zlib + +from calibre.ebooks.pdb.formatreader import FormatReader +from calibre.ebooks.mobi.palmdoc import decompress_doc +from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer + +class HeaderRecord(object): + ''' + The first record in the file is always the header record. It holds + information related to the location of text, images, and so on + in the file. This is used in conjunction with the sections + defined in the file header. + ''' + + def __init__(self, raw): + self.compression, = struct.unpack('>H', raw[0:2]) + self.num_records, = struct.unpack('>H', raw[8:10]) + + +class Reader(FormatReader): + + def __init__(self, header, stream, log, encoding=None): + self.stream = stream + self.log = log + self.encoding = encoding + + self.sections = [] + for i in range(header.num_sections): + self.sections.append(header.section_data(i)) + + self.header_record = HeaderRecord(self.section_data(0)) + + def section_data(self, number): + return self.sections[number] + + def decompress_text(self, number): + if self.header_record.compression == 1: + return self.section_data(number).decode('cp1252' if self.encoding is None else self.encoding) + if self.header_record.compression == 2: + return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding) + return '' + + def extract_content(self, output_dir): + txt = '' + + for i in range(1, self.header_record.num_records + 1): + txt += self.decompress_text(i) + + html = txt_to_markdown(txt) + with open(os.path.join(output_dir, 'index.html'), 'wb') as index: + index.write(html.encode('utf-8')) + + from calibre.ebooks.metadata.meta import get_metadata + mi = get_metadata(self.stream, 'pdb') + manifest = [('index.html', None)] + spine = ['index.html'] + opf_writer(output_dir, 'metadata.opf', manifest, spine, mi) + + return os.path.join(output_dir, 'metadata.opf') + diff --git a/src/calibre/ebooks/pdb/ztxt/__init__.py b/src/calibre/ebooks/pdb/ztxt/__init__.py new file mode 100644 index 0000000000..2c2028b74f --- /dev/null +++ b/src/calibre/ebooks/pdb/ztxt/__init__.py @@ -0,0 +1,11 @@ +# -*- coding: utf-8 -*- + +__license__ = 'GPL v3' +__copyright__ = '2009, John Schember ' +__docformat__ = 'restructuredtext en' + +import os + +class zTXTError(Exception): + pass + diff --git a/src/calibre/ebooks/pdb/ztxt/reader.py b/src/calibre/ebooks/pdb/ztxt/reader.py new file mode 100644 index 0000000000..3d96018def --- /dev/null +++ b/src/calibre/ebooks/pdb/ztxt/reader.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- + +''' +Read content from ztxt pdb file. +''' + +__license__ = 'GPL v3' +__copyright__ = '2009, John Schember ' +__docformat__ = 'restructuredtext en' + +import os, struct, zlib + +from calibre.ebooks.pdb.formatreader import FormatReader +from calibre.ebooks.pdb.ztxt import zTXTError +from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer + +class HeaderRecord(object): + ''' + The first record in the file is always the header record. It holds + information related to the location of text, images, and so on + in the file. This is used in conjunction with the sections + defined in the file header. + ''' + + def __init__(self, raw): + self.version, = struct.unpack('>H', raw[0:2]) + self.num_records, = struct.unpack('>H', raw[2:4]) + self.size, = struct.unpack('>L', raw[4:8]) + self.record_size, = struct.unpack('>H', raw[8:10]) + + +class Reader(FormatReader): + + def __init__(self, header, stream, log, encoding=None): + self.stream = stream + self.log = log + self.encoding = encoding + + self.sections = [] + for i in range(header.num_sections): + self.sections.append(header.section_data(i)) + + self.header_record = HeaderRecord(self.section_data(0)) + + # Initalize the decompressor + self.uncompressor = zlib.decompressobj() + self.uncompressor.decompress(self.section_data(1)) + + def section_data(self, number): + return self.sections[number] + + def decompress_text(self, number): + if number == 1: + self.uncompressor = zlib.decompressobj() + return self.uncompressor.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding) + + def extract_content(self, output_dir): + txt = '' + + for i in range(1, self.header_record.num_records + 1): + txt += self.decompress_text(i) + + html = txt_to_markdown(txt) + with open(os.path.join(output_dir, 'index.html'), 'wb') as index: + index.write(html.encode('utf-8')) + + from calibre.ebooks.metadata.meta import get_metadata + mi = get_metadata(self.stream, 'pdb') + manifest = [('index.html', None)] + spine = ['index.html'] + opf_writer(output_dir, 'metadata.opf', manifest, spine, mi) + + return os.path.join(output_dir, 'metadata.opf') + diff --git a/src/calibre/ebooks/pdf/output.py b/src/calibre/ebooks/pdf/output.py index 4eb23877d9..ae44d270f7 100644 --- a/src/calibre/ebooks/pdf/output.py +++ b/src/calibre/ebooks/pdf/output.py @@ -62,12 +62,12 @@ class PDFOutput(OutputFormatPlugin): self.write(ImagePDFWriter, images) def convert_text(self, oeb_book): - with TemporaryDirectory('_pdf_out') as oebdir: + with TemporaryDirectory('_pdf_out') as oeb_dir: from calibre.customize.ui import plugin_for_output_format oeb_output = plugin_for_output_format('oeb') - oeb_output.convert(oeb, oeb_dir, self.input_plugin, self.opts, self.log) + oeb_output.convert(oeb_book, oeb_dir, self.input_plugin, self.opts, self.log) - opfpath = glob.glob(os.path.join(oebdir, '*.opf'))[0] + opfpath = glob.glob(os.path.join(oeb_dir, '*.opf'))[0] opf = OPF(opfpath, os.path.dirname(opfpath)) self.write(PDFWriter, [s.path for s in opf.spine]) diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index 34fafc91fc..b94d3be467 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -8,8 +8,7 @@ __docformat__ = 'restructuredtext en' import os from calibre.customize.conversion import InputFormatPlugin -from calibre.ebooks.markdown import markdown -from calibre.ebooks.metadata.opf2 import OPFCreator +from calibre.ebooks.txt.processor import txt_to_markdown, opf_writer class TXTInput(InputFormatPlugin): @@ -25,19 +24,15 @@ class TXTInput(InputFormatPlugin): ienc = options.input_encoding txt = stream.read().decode(ienc) - md = markdown.Markdown( - extensions=['footnotes', 'tables', 'toc'], - safe_mode=False,) - html = '</head><body>'+md.convert(txt)+'</body></html>' + html = txt_to_markdown(txt) with open('index.html', 'wb') as index: index.write(html.encode('utf-8')) from calibre.ebooks.metadata.meta import get_metadata mi = get_metadata(stream, 'txt') - opf = OPFCreator(os.getcwd(), mi) - opf.create_manifest([('index.html', None)]) - opf.create_spine(['index.html']) - with open('metadata.opf', 'wb') as opffile: - opf.render(opffile) + manifest = [('index.html', None)] + spine = ['index.html'] + opf_writer(os.getcwd(), 'metadata.opf', manifest, spine, mi) return os.path.join(os.getcwd(), 'metadata.opf') + diff --git a/src/calibre/ebooks/txt/processor.py b/src/calibre/ebooks/txt/processor.py new file mode 100644 index 0000000000..c8f2690622 --- /dev/null +++ b/src/calibre/ebooks/txt/processor.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- + +''' +Read content from txt file. +''' + +import os + +from calibre.ebooks.markdown import markdown +from calibre.ebooks.metadata.opf2 import OPFCreator + +__license__ = 'GPL v3' +__copyright__ = '2009, John Schember <john@nachtimwald.com>' +__docformat__ = 'restructuredtext en' + +def txt_to_markdown(txt): + md = markdown.Markdown( + extensions=['footnotes', 'tables', 'toc'], + safe_mode=False,) + html = '<html><head><title /></head><body>'+md.convert(txt)+'</body></html>' + + return html + +def opf_writer(path, opf_name, manifest, spine, mi): + opf = OPFCreator(path, mi) + opf.create_manifest(manifest) + opf.create_spine(spine) + with open(os.path.join(path, opf_name), 'wb') as opffile: + opf.render(opffile) +