Use PoDoFo in pdfmanipulate info

This commit is contained in:
Kovid Goyal 2009-05-31 09:34:27 -07:00
parent c3db153421
commit a52242cecb
2 changed files with 63 additions and 31 deletions

View File

@ -9,16 +9,14 @@ __docformat__ = 'restructuredtext en'
Merge PDF files into a single PDF document. Merge PDF files into a single PDF document.
''' '''
import os, re, sys, time import os, sys
from optparse import OptionGroup, Option
from calibre.utils.config import OptionParser from calibre.utils.config import OptionParser
from calibre.utils.logging import Log from calibre.utils.logging import Log
from calibre.constants import preferred_encoding from calibre.constants import preferred_encoding
from calibre.customize.conversion import OptionRecommendation from calibre.ebooks.pdf.verify import is_valid_pdfs, is_encrypted
from calibre.ebooks.pdf.verify import is_valid_pdfs, is_encrypted, is_encrypted from calibre import prints
from calibre.utils.podofo import podofo, podofo_err
from pyPdf import PdfFileWriter, PdfFileReader
USAGE = '\n%prog %%name ' + _('''\ USAGE = '\n%prog %%name ' + _('''\
file.pdf ... file.pdf ...
@ -35,27 +33,23 @@ def option_parser(name):
return OptionParser(usage=usage) return OptionParser(usage=usage)
def print_info(pdf_path): def print_info(pdf_path):
with open(os.path.abspath(pdf_path), 'rb') as pdf_file: if not podofo:
pdf = PdfFileReader(pdf_file) raise RuntimeError('Failed to load PoDoFo with error:'+podofo_err)
print _('Title: %s' % pdf.documentInfo.title) p = podofo.PDFDoc()
print _('Author: %s' % pdf.documentInfo.author) p.open(pdf_path)
print _('Subject: %s' % pdf.documentInfo.subject)
print _('Creator: %s' % pdf.documentInfo.creator) fmt = lambda x, y: '%-20s: %s'%(x, y)
print _('Producer: %s' % pdf.documentInfo.producer)
#print _('Creation Date: %s' % time.strftime('%a %b %d %H:%M:%S %Y', time.gmtime(os.path.getctime(pdf_path)))) print
#print _('Modification Date: %s' % time.strftime('%a %b %d %H:%M:%S %Y', time.gmtime(os.path.getmtime(pdf_path))))
print _('Pages: %s' % pdf.numPages) prints(fmt(_('Title'), p.title))
#print _('Encrypted: %s' % pdf.isEncrypted) prints(fmt(_('Author'), p.author))
try: prints(fmt(_('Subject'), p.subject))
print _('File Size: %s bytes' % os.path.getsize(pdf_path)) prints(fmt(_('Creator'), p.creator))
except: pass prints(fmt(_('Producer'), p.producer))
try: prints(fmt(_('Pages'), p.pages))
pdf_file.seek(0) prints(fmt(_('File Size'), os.stat(pdf_path).st_size))
vline = pdf_file.readline() prints(fmt(_('PDF Version'), p.version if p.version else _('Unknown')))
mo = re.search('(?iu)^%...-(?P<version>\d+\.\d+)', vline)
if mo != None:
print _('PDF Version: %s' % mo.group('version'))
except: pass
def main(args=sys.argv, name=''): def main(args=sys.argv, name=''):
log = Log() log = Log()

View File

@ -108,6 +108,40 @@ podofo_PDFDoc_pages_getter(podofo_PDFDoc *self, void *closure) {
return ans; return ans;
} }
static PyObject *
podofo_PDFDoc_version_getter(podofo_PDFDoc *self, void *closure) {
int version;
try {
version = self->doc->GetPdfVersion();
} catch(const PdfError & err) {
podofo_set_exception(err);
return NULL;
}
switch(version) {
case ePdfVersion_1_0:
return Py_BuildValue("s", "1.0");
case ePdfVersion_1_1:
return Py_BuildValue("s", "1.1");
case ePdfVersion_1_2:
return Py_BuildValue("s", "1.2");
case ePdfVersion_1_3:
return Py_BuildValue("s", "1.3");
case ePdfVersion_1_4:
return Py_BuildValue("s", "1.4");
case ePdfVersion_1_5:
return Py_BuildValue("s", "1.5");
case ePdfVersion_1_6:
return Py_BuildValue("s", "1.6");
case ePdfVersion_1_7:
return Py_BuildValue("s", "1.7");
default:
return Py_BuildValue("");
}
return Py_BuildValue("");
}
static PyObject * static PyObject *
podofo_PDFDoc_delete_pages(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) { podofo_PDFDoc_delete_pages(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) {
int first_page, num_pages; int first_page, num_pages;
@ -315,6 +349,10 @@ static PyGetSetDef podofo_PDFDoc_getsetters[] = {
(getter)podofo_PDFDoc_pages_getter, NULL, (getter)podofo_PDFDoc_pages_getter, NULL,
(char *)"Number of pages in document (read only)", (char *)"Number of pages in document (read only)",
NULL}, NULL},
{(char *)"version",
(getter)podofo_PDFDoc_version_getter, NULL,
(char *)"The PDF version (read only)",
NULL},
{NULL} /* Sentinel */ {NULL} /* Sentinel */
}; };