From 9c74cd945fb3548d97b673116aa2ecbbfa0943f6 Mon Sep 17 00:00:00 2001 From: John Schember Date: Sun, 21 Feb 2010 10:48:24 -0500 Subject: [PATCH] Implement bug #4971: Support reading of PDF PDB files. --- src/calibre/ebooks/oeb/iterator.py | 2 +- src/calibre/ebooks/pdb/__init__.py | 4 ++- src/calibre/ebooks/pdb/pdf/__init__.py | 0 src/calibre/ebooks/pdb/pdf/reader.py | 38 ++++++++++++++++++++++++++ 4 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 src/calibre/ebooks/pdb/pdf/__init__.py create mode 100644 src/calibre/ebooks/pdb/pdf/reader.py diff --git a/src/calibre/ebooks/oeb/iterator.py b/src/calibre/ebooks/oeb/iterator.py index 8959d62fac..d09c49ebeb 100644 --- a/src/calibre/ebooks/oeb/iterator.py +++ b/src/calibre/ebooks/oeb/iterator.py @@ -177,7 +177,7 @@ class EbookIterator(object): plumber.opts, plumber.input_fmt, self.log, {}, self.base) - if processed or plumber.input_fmt.lower() in ('pdf', 'rb') and \ + if processed or plumber.input_fmt.lower() in ('pdb', 'pdf', 'rb') and \ not hasattr(self.pathtoopf, 'manifest'): self.pathtoopf = create_oebbook(self.log, self.pathtoopf, plumber.opts, plumber.input_plugin) diff --git a/src/calibre/ebooks/pdb/__init__.py b/src/calibre/ebooks/pdb/__init__.py index 54f3826470..092c8a21bd 100644 --- a/src/calibre/ebooks/pdb/__init__.py +++ b/src/calibre/ebooks/pdb/__init__.py @@ -11,12 +11,14 @@ class PDBError(Exception): from calibre.ebooks.pdb.ereader.reader import Reader as ereader_reader from calibre.ebooks.pdb.palmdoc.reader import Reader as palmdoc_reader from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader +from calibre.ebooks.pdb.pdf.reader import Reader as pdf_reader FORMAT_READERS = { 'PNPdPPrs': ereader_reader, 'PNRdPPrs': ereader_reader, 'zTXTGPlm': ztxt_reader, 'TEXtREAd': palmdoc_reader, + '.pdfADBE': pdf_reader, } from calibre.ebooks.pdb.palmdoc.writer import Writer as palmdoc_writer @@ -34,8 +36,8 @@ IDENTITY_TO_NAME = { 'PNRdPPrs': 'eReader', 'zTXTGPlm': 'zTXT', 'TEXtREAd': 'PalmDOC', - '.pdfADBE': 'Adobe Reader', + 'BVokBDIC': 'BDicty', 'DB99DBOS': 'DB (Database program)', 'vIMGView': 'FireViewer (ImageViewer)', diff --git a/src/calibre/ebooks/pdb/pdf/__init__.py b/src/calibre/ebooks/pdb/pdf/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/calibre/ebooks/pdb/pdf/reader.py b/src/calibre/ebooks/pdb/pdf/reader.py new file mode 100644 index 0000000000..913d06f634 --- /dev/null +++ b/src/calibre/ebooks/pdb/pdf/reader.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- + +''' +Read content from palmdoc pdb file. +''' + +__license__ = 'GPL v3' +__copyright__ = '2010, John Schember ' +__docformat__ = 'restructuredtext en' + +import cStringIO + +from calibre.ebooks.pdb.formatreader import FormatReader +from calibre.ptempfile import TemporaryFile + +class Reader(FormatReader): + + def __init__(self, header, stream, log, options): + self.header = header + self.stream = stream + self.log = log + self.options = options + setattr(self.options, 'new_pdf_engine', False) + setattr(self.options, 'no_images', False) + setattr(self.options, 'unwrap_factor', 0.5) + + def extract_content(self, output_dir): + self.log.info('Extracting PDF...') + + with TemporaryFile() as pdf_n: + pdf = open(pdf_n, 'rw+b') + for x in xrange(self.header.section_count()): + pdf.write(self.header.section_data(x)) + + from calibre.customize.ui import plugin_for_input_format + pdf.seek(0) + return plugin_for_input_format('pdf').convert(pdf, self.options, + 'pdf', self.log, [])