IGN:Initial implementation of PDF->XML engine

This commit is contained in:
Kovid Goyal 2009-09-21 21:17:38 -06:00
parent 4efa4d7bb1
commit 5a94e3d965
12 changed files with 227 additions and 433 deletions

View File

@ -6,7 +6,7 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, socket, struct
import os, socket, struct, subprocess
from distutils.spawn import find_executable
from PyQt4 import pyqtconfig
@ -42,6 +42,39 @@ elif find_executable('qmake'):
QMAKE = find_executable('qmake')
QMAKE = os.environ.get('QMAKE', QMAKE)
PKGCONFIG = find_executable('pkg-config')
PKGCONFIG = os.environ.get('PKG_CONFIG', PKGCONFIG)
def run_pkgconfig(name, envvar, default, flag, prefix):
ans = []
if envvar:
ans = os.environ.get(envvar, default)
ans = [x.strip() for x in ans.split(os.pathsep)]
ans = [x for x in ans if x and (prefix=='-l' or os.path.exists(x))]
if not ans:
try:
raw = subprocess.Popen([PKGCONFIG, flag, name],
stdout=subprocess.PIPE).stdout.read()
ans = [x.strip() for x in raw.split(prefix)]
ans = [x for x in ans if x and (prefix=='-l' or os.path.exists(x))]
except:
print 'Failed to run pkg-config:', PKGCONFIG, 'for:', name
return ans
def pkgconfig_include_dirs(name, envvar, default):
return run_pkgconfig(name, envvar, default, '--cflags-only-I', '-I')
def pkgconfig_lib_dirs(name, envvar, default):
return run_pkgconfig(name, envvar, default,'--libs-only-L', '-L')
def pkgconfig_libs(name, envvar, default):
return run_pkgconfig(name, envvar, default,'--libs-only-l', '-l')
def consolidate(envvar, default):
val = os.environ.get(envvar, default)
ans = [x.strip() for x in val.split(os.pathsep())]
return [x for x in ans if x and os.path.exists(x)]
pyqt = pyqtconfig.Configuration()
@ -50,28 +83,62 @@ qt_lib = pyqt.qt_lib_dir
fc_inc = '/usr/include/fontconfig'
fc_lib = '/usr/lib'
poppler_inc = '/usr/include/poppler/qt4'
poppler_lib = '/usr/lib'
poppler_libs = []
podofo_inc = '/usr/include/podofo'
podofo_lib = '/usr/lib'
if iswindows:
fc_inc = r'C:\cygwin\home\kovid\fontconfig\include\fontconfig'
fc_lib = r'C:\cygwin\home\kovid\fontconfig\lib'
poppler_inc = r'C:\cygwin\home\kovid\poppler\include\poppler\qt4'
poppler_lib = r'C:\cygwin\home\kovid\poppler\lib'
poppler_libs = ['QtCore4', 'QtGui4']
poppler_inc_dirs = consolidate('POPPLER_INC_DIR',
r'C:\cygwin\home\kovid\poppler\include\poppler')
popplerqt4_inc_dirs = poppler_inc_dirs + [poppler_inc_dirs[0]+r'\qt4']
poppler_lib_dirs = consolidate('POPPLER_LIB_DIR',
r'C:\cygwin\home\kovid\poppler\lib')
popplerqt4_lib_dirs = poppler_lib_dirs
poppler_libs = ['poppler']
popplerqt4_libs = poppler_libs + ['QtCore4', 'QtGui4']
podofo_inc = 'C:\\podofo\\include\\podofo'
podofo_lib = r'C:\podofo'
if isosx:
elif isosx:
fc_inc = '/Users/kovid/fontconfig/include/fontconfig'
fc_lib = '/Users/kovid/fontconfig/lib'
poppler_inc = '/Volumes/sw/build/poppler-0.10.7/qt4/src'
poppler_lib = '/Users/kovid/poppler/lib'
poppler_inc_dirs = consolidate('POPPLER_INC_DIR',
'/Volumes/sw/build/poppler-0.10.7/poppler')
popplerqt4_inc_dirs = poppler_inc_dirs + [poppler_inc_dirs[0]+'/qt4']
poppler_lib_dirs = consolidate('POPPLER_LIB_DIR',
'/Users/kovid/poppler/lib')
popplerqt4_lib_dirs = poppler_lib_dirs
poppler_libs = popplerqt4_libs = ['poppler']
podofo_inc = '/usr/local/include/podofo'
podofo_lib = '/usr/local/lib'
else:
# Include directories
poppler_inc_dirs = pkgconfig_include_dirs('poppler',
'POPPLER_INC_DIR', '/usr/include/poppler')
popplerqt4_inc_dirs = pkgconfig_include_dirs('poppler-qt4', '', '')
if not popplerqt4_inc_dirs:
popplerqt4_inc_dirs = poppler_inc_dirs + [poppler_inc_dirs[0]+'/qt4']
png_inc_dirs = pkgconfig_include_dirs('libpng', 'PNG_INC_DIR',
'/usr/include')
magick_inc_dirs = pkgconfig_include_dirs('MagickWand', 'MAGICK_INC', '/usr/include/ImageMagick')
# Library directories
poppler_lib_dirs = popplerqt4_lib_dirs = pkgconfig_lib_dirs('poppler', 'POPPLER_LIB_DIR',
'/usr/lib')
png_lib_dirs = pkgconfig_lib_dirs('libpng', 'PNG_LIB_DIR', '/usr/lib')
magick_lib_dirs = pkgconfig_lib_dirs('MagickWand', 'MAGICK_LIB', '/usr/lib')
# Libraries
poppler_libs = pkgconfig_libs('poppler', '', '')
if not poppler_libs:
poppler_libs = ['poppler']
popplerqt4_libs = pkgconfig_libs('poppler-qt4', '', '')
if not popplerqt4_libs:
popplerqt4_libs = ['poppler-qt4', 'poppler']
magick_libs = pkgconfig_libs('MagickWand', '', '')
if not magick_libs:
magick_libs = ['MagickWand', 'MagickCore']
png_libs = ['png']
fc_inc = os.environ.get('FC_INC_DIR', fc_inc)
@ -82,14 +149,27 @@ fc_error = None if os.path.exists(os.path.join(fc_inc, 'fontconfig.h')) else \
'variables.')
poppler_inc = os.environ.get('POPPLER_INC_DIR', poppler_inc)
poppler_lib = os.environ.get('POPPLER_LIB_DIR', poppler_lib)
poppler_error = None if os.path.exists(os.path.join(poppler_inc,
'poppler-qt4.h')) else \
poppler_error = None
if not poppler_inc_dirs or not os.path.exists(
os.path.join(poppler_inc_dirs[0], 'OutputDev.h')):
poppler_error = \
('Poppler not found on your system. Various PDF related',
' functionality will not work. Use the POPPLER_INC_DIR and',
' POPPLER_LIB_DIR environment variables.')
popplerqt4_error = None
if not popplerqt4_inc_dirs or not os.path.exists(
os.path.join(popplerqt4_inc_dirs[-1], 'poppler-qt4.h')):
popplerqt4_error = \
('Poppler Qt4 bindings not found on your system.')
magick_error = None
if not magick_inc_dirs or not os.path.exists(os.path.join(magick_inc_dirs[0],
'wand')):
magick_error = ('ImageMagick not found on your system. '
'Try setting the environment variables MAGICK_INC '
'and MAGICK_LIB to help calibre locate the inclue and libbrary '
'files.')
podofo_lib = os.environ.get('PODOFO_LIB_DIR', podofo_lib)
podofo_inc = os.environ.get('PODOFO_INC_DIR', podofo_inc)
@ -116,3 +196,5 @@ except:
HOST='unknown'
PROJECT=os.path.basename(os.path.abspath('.'))

View File

@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
__all__ = [
'pot', 'translations', 'get_translations', 'iso639',
'build',
'build', 'build_pdf2xml',
'gui',
'develop', 'install',
'resources',
@ -30,8 +30,9 @@ translations = Translations()
get_translations = GetTranslations()
iso639 = ISO639()
from setup.extensions import Build
from setup.extensions import Build, BuildPDF2XML
build = Build()
build_pdf2xml = BuildPDF2XML()
from setup.install import Develop, Install, Sdist
develop = Develop()

View File

@ -12,10 +12,12 @@ from distutils import sysconfig
from PyQt4.pyqtconfig import QtGuiModuleMakefile
from setup import Command, islinux, isosx, SRC, iswindows
from setup.build_environment import fc_inc, fc_lib, qt_inc, qt_lib, \
fc_error, poppler_libs, poppler_lib, poppler_inc, podofo_inc, \
from setup.build_environment import fc_inc, fc_lib, \
fc_error, poppler_libs, poppler_lib_dirs, poppler_inc_dirs, podofo_inc, \
podofo_lib, podofo_error, poppler_error, pyqt, OSX_SDK, NMAKE, \
leopard_build, QMAKE, msvc, MT, win_inc, win_lib
leopard_build, QMAKE, msvc, MT, win_inc, win_lib, png_inc_dirs, \
magick_inc_dirs, magick_lib_dirs, png_lib_dirs, png_libs, \
magick_error, magick_libs
MT
isunix = islinux or isosx
@ -43,6 +45,10 @@ class Extension(object):
self.ldflags = kwargs.get('ldflags', [])
self.optional = kwargs.get('optional', False)
reflow_sources = glob.glob(os.path.join(SRC, 'calibre', 'ebooks', 'pdf', '*.cpp'))
reflow_headers = glob.glob(os.path.join(SRC, 'calibre', 'ebooks', 'pdf', '*.h'))
reflow_error = poppler_error if poppler_error else magick_error
extensions = [
Extension('lzx',
['calibre/utils/lzx/lzxmodule.c',
@ -76,15 +82,6 @@ extensions = [
Extension('cPalmdoc',
['calibre/ebooks/compression/palmdoc.c']),
Extension('calibre_poppler',
['calibre/utils/poppler/poppler.cpp'],
libraries=(['poppler', 'poppler-qt4']+poppler_libs),
lib_dirs=[os.environ.get('POPPLER_LIB_DIR',
poppler_lib), qt_lib],
inc_dirs=[poppler_inc, qt_inc],
error=poppler_error,
optional=True),
Extension('podofo',
['calibre/utils/podofo/podofo.cpp'],
libraries=['podofo'],
@ -97,10 +94,20 @@ extensions = [
inc_dirs = ['calibre/gui2/pictureflow'],
headers = ['calibre/gui2/pictureflow/pictureflow.h'],
sip_files = ['calibre/gui2/pictureflow/pictureflow.sip']
)
),
Extension('pdfreflow',
reflow_sources,
headers=reflow_headers,
libraries=poppler_libs+magick_libs+png_libs,
lib_dirs=poppler_lib_dirs+magick_lib_dirs+png_lib_dirs,
inc_dirs=poppler_inc_dirs+magick_inc_dirs+png_inc_dirs,
error=reflow_error,
cflags=['-DPNG_SKIP_SETJMP_CHECK'] if islinux else []
)
]
if iswindows:
extensions.append(Extension('winutil',
['calibre/utils/windows/winutil.c'],
@ -346,10 +353,36 @@ class Build(Command):
class BuildPDF2XML(Command):
description = 'Build command line pdf2xml utility'
def run(self, opts):
dest = os.path.expanduser('~/bin/pdf2xml')
odest = self.j(self.d(self.SRC), 'build', 'objects', 'pdf2xml')
if not os.path.exists(odest):
os.makedirs(odest)
objects = []
for src in reflow_sources:
if src.endswith('python.cpp'):
continue
obj = self.j(odest, self.b(src+'.o'))
if self.newer(obj, [src]+reflow_headers):
cmd = ['g++', '-pthread', '-pedantic', '-g', '-c', '-Wall', '-I/usr/include/poppler',
'-I/usr/include/ImageMagick',
'-DPDF2XML', '-o', obj, src]
self.info(*cmd)
subprocess.check_call(cmd)
objects.append(obj)
if self.newer(dest, objects):
cmd = ['g++', '-g', '-o', dest]+objects+['-lpoppler', '-lMagickWand',
'-lpng', '-lpthread']
self.info(*cmd)
subprocess.check_call(cmd)
self.info('Binary installed as', dest)

View File

@ -192,6 +192,10 @@ class Install(Develop):
x = self.j(dest, x)
if os.path.exists(dest):
shutil.rmtree(x)
for x in os.walk(dest):
for f in x[-1]:
if os.path.splitext(f)[1] in ('.c', '.cpp', '.h'):
os.remove(self.j(x[0], f))
dest = self.root + self.resources
if os.path.exists(dest):
shutil.rmtree(dest)
@ -241,4 +245,3 @@ class Sdist(Command):
os.remove(self.DEST)

View File

@ -38,6 +38,7 @@ class LinuxFreeze(Command):
binary_includes = [
'/usr/bin/pdftohtml',
'/usr/lib/libwmflite-0.2.so.7',
'/usr/lib/liblcms.so.1',
'/tmp/calibre-mount-helper',
'/usr/lib/libunrar.so',
'/usr/lib/libsqlite3.so.0',

View File

@ -55,7 +55,7 @@ if plugins is None:
sys.path.insert(0, plugin_path)
for plugin in ['pictureflow', 'lzx', 'msdes', 'podofo', 'cPalmdoc',
'fontconfig', 'calibre_poppler'] + \
'fontconfig', 'pdfreflow'] + \
(['winutil'] if iswindows else []) + \
(['usbobserver'] if isosx else []):
try:

View File

@ -161,6 +161,7 @@ quick_metadata = QuickMetadata()
def get_file_type_metadata(stream, ftype):
mi = MetaInformation(None, None)
ftype = ftype.lower().strip()
if _metadata_readers.has_key(ftype):
for plugin in _metadata_readers[ftype]:
@ -168,6 +169,8 @@ def get_file_type_metadata(stream, ftype):
with plugin:
try:
plugin.quick = quick_metadata.quick
if hasattr(stream, 'seek'):
stream.seek(0)
mi = plugin.get_metadata(stream, ftype.lower().strip())
break
except:

View File

@ -10,6 +10,7 @@ import sys, os, re, shutil
from calibre.utils.config import OptionParser
from calibre.constants import iswindows, isosx
from calibre.libunzip import update
from calibre import prints
def option_parser():
parser = OptionParser(usage='''\
@ -28,6 +29,8 @@ Run an embedded python interpreter.
help='Debug the specified device driver.')
parser.add_option('-g', '--gui', default=False, action='store_true',
help='Run the GUI',)
parser.add_option('--paths', default=False, action='store_true',
help='Output the paths necessary to setup the calibre environment')
parser.add_option('--migrate', action='store_true', default=False,
help='Migrate old database. Needs two arguments. Path '
'to library1.db and path to new library folder.')
@ -203,6 +206,10 @@ def main(args=sys.argv):
migrate(args[1], args[2])
elif opts.add_simple_plugin is not None:
add_simple_plugin(opts.add_simple_plugin)
elif opts.paths:
prints('CALIBRE_RESOURCES_LOCATION='+sys.resources_location)
prints('CALIBRE_EXTENSIONS_LOCATION='+sys.extensions_location)
prints('CALIBRE_PYTHON_PATH='+os.pathsep.join(sys.path))
else:
from IPython.Shell import IPShellEmbed
ipshell = IPShellEmbed()

View File

@ -3,6 +3,52 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''Read meta information from PDF files'''
from functools import partial
from calibre import plugins, prints
from calibre.ebooks.metadata import MetaInformation, string_to_authors#, authors_to_string
pdfreflow, pdfreflow_error = plugins['pdfreflow']
def get_metadata(stream, cover=True):
if pdfreflow is None:
raise RuntimeError(pdfreflow_error)
info = pdfreflow.get_metadata(stream.read(), cover)
title = info.get('Title', None)
au = info.get('Author', None)
if au is None:
au = [_('Unknown')]
else:
au = string_to_authors(au)
mi = MetaInformation(title, au)
creator = info.get('Creator', None)
if creator:
mi.book_producer = creator
keywords = info.get('Keywords', None)
mi.tags = []
if keywords:
mi.tags = [x.strip() for x in keywords.split(',')]
subject = info.get('Subject', None)
if subject:
mi.tags.insert(0, subject)
if cover and 'cover' in info:
data = info['cover']
if data is None:
prints(title, 'is an encrypted document, cover extraction not allowed.')
else:
mi.cover_data = ('png', data)
return mi
get_quick_metadata = partial(get_metadata, cover=False)
'''
import sys, os, cStringIO
from threading import Thread
@ -139,6 +185,6 @@ def get_cover(cover_path):
MagickSetImageFormat(wand, 'JPEG')
MagickWriteImage(wand, '%s.jpg' % cover_path)
return open('%s.jpg' % cover_path, 'rb').read()
'''

View File

@ -40,12 +40,12 @@
<string>...</string>
</property>
<property name="icon">
<iconset resource="../../../../resources/images.qrc">
<iconset resource="../../../work/calibre/resources/images.qrc">
<normaloff>:/images/document_open.svg</normaloff>:/images/document_open.svg</iconset>
</property>
</widget>
</item>
<item row="2" column="0">
<item row="3" column="0">
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
@ -64,15 +64,25 @@
<string>...</string>
</property>
<property name="icon">
<iconset resource="../../../../resources/images.qrc">
<iconset resource="../../../work/calibre/resources/images.qrc">
<normaloff>:/images/clear_left.svg</normaloff>:/images/clear_left.svg</iconset>
</property>
</widget>
</item>
<item row="2" column="0">
<widget class="QLabel" name="label_2">
<property name="text">
<string>The debug process outputs the intermediate HTML generated at various stages of the conversion process. This HTML can sometimes serve as a good starting point for hand editing a conversion.</string>
</property>
<property name="wordWrap">
<bool>true</bool>
</property>
</widget>
</item>
</layout>
</widget>
<resources>
<include location="../../../../resources/images.qrc"/>
<include location="../../../work/calibre/resources/images.qrc"/>
</resources>
<connections/>
</ui>

View File

@ -1,63 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os
from calibre.constants import plugins
from calibre.ebooks.metadata import MetaInformation, string_to_authors
poppler, poppler_err = plugins['calibre_poppler']
class NotAvailable(Exception):
pass
def get_metadata(stream, cover=True):
if not poppler:
raise NotAvailable('Failed to load poppler with error: '+poppler_err)
raw = stream.read()
doc = poppler.PDFDoc()
doc.load(raw)
del raw
title = doc.title
if not title or not title.strip():
title = _('Unknown')
if hasattr(stream, 'name'):
title = os.path.splitext(os.path.basename(stream.name))[0]
author = doc.author
authors = string_to_authors(author) if author else [_('Unknown')]
creator = doc.creator
mi = MetaInformation(title, authors)
if creator:
mi.book_producer = creator
if doc.subject:
mi.category = doc.subject
if doc.keywords:
mi.tags = [x.strip() for x in doc.keywords.split(',')]
if cover:
from calibre.gui2 import is_ok_to_use_qt
cdata = None
if is_ok_to_use_qt():
try:
cdata = doc.render_page(0)
except:
import traceback
traceback.print_exc()
if cdata is not None:
mi.cover_data = ('jpg', cdata)
del doc
return mi

View File

@ -1,329 +0,0 @@
#define UNICODE
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include <poppler-qt4.h>
#include <QtCore/QBuffer>
#include <QtGui/QImage>
typedef struct {
PyObject_HEAD
/* Type-specific fields go here. */
Poppler::Document *doc;
} poppler_PDFDoc;
extern "C" {
static void
poppler_PDFDoc_dealloc(poppler_PDFDoc* self)
{
if (self->doc != NULL) delete self->doc;
self->ob_type->tp_free((PyObject*)self);
}
static PyObject *
poppler_PDFDoc_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
poppler_PDFDoc *self;
self = (poppler_PDFDoc *)type->tp_alloc(type, 0);
if (self != NULL) {
self->doc = NULL;
}
return (PyObject *)self;
}
static PyObject *
poppler_PDFDoc_load(poppler_PDFDoc *self, PyObject *args, PyObject *kwargs) {
char *buffer; Py_ssize_t size; QByteArray data;
if (!PyArg_ParseTuple(args, "s#", &buffer, &size)) return NULL;
data = QByteArray::fromRawData(buffer, size);
self->doc = Poppler::Document::loadFromData(data);
if (self->doc == NULL) {PyErr_SetString(PyExc_ValueError, "Could not load PDF file from data."); return NULL;}
Py_RETURN_NONE;
}
}
static QString
poppler_convert_pystring(PyObject *py) {
QString ans;
Py_UNICODE* u = PyUnicode_AS_UNICODE(py);
PyObject *u8 = PyUnicode_EncodeUTF8(u, PyUnicode_GET_SIZE(py), "replace");
if (u8 == NULL) { PyErr_NoMemory(); return NULL; }
ans = QString::fromUtf8(PyString_AS_STRING(u8));
Py_DECREF(u8);
return ans;
}
extern "C" {
static PyObject *
poppler_convert_qstring(const QString &src) {
QByteArray data = src.toUtf8();
const char *cdata = data.constData();
int sz = data.size();
return PyUnicode_Decode(cdata, sz, "utf-8", "error");
}
static PyObject *
poppler_PDFDoc_open(poppler_PDFDoc *self, PyObject *args, PyObject *kwargs) {
PyObject *fname; QString _fname;
if (!PyArg_ParseTuple(args, "O", &fname)) return NULL;
_fname = poppler_convert_pystring(fname);
self->doc = Poppler::Document::load(_fname);
Py_RETURN_NONE;
}
static PyObject *
poppler_PDFDoc_getter(poppler_PDFDoc *self, int field)
{
PyObject *ans;
const char *s;
switch (field) {
case 0:
s = "Title"; break;
case 1:
s = "Author"; break;
case 2:
s = "Subject"; break;
case 3:
s = "Keywords"; break;
case 4:
s = "Creator"; break;
case 5:
s = "Producer"; break;
default:
PyErr_SetString(PyExc_Exception, "Bad field");
return NULL;
}
ans = poppler_convert_qstring(self->doc->info(QString(s)));
if (ans != NULL) Py_INCREF(ans);
return ans;
}
static int
poppler_PDFDoc_setter(poppler_PDFDoc *self, PyObject *val, int field) {
return -1;
}
static PyObject *
poppler_PDFDoc_title_getter(poppler_PDFDoc *self, void *closure) {
return poppler_PDFDoc_getter(self, 0);
}
static PyObject *
poppler_PDFDoc_author_getter(poppler_PDFDoc *self, void *closure) {
return poppler_PDFDoc_getter(self, 1);
}
static PyObject *
poppler_PDFDoc_subject_getter(poppler_PDFDoc *self, void *closure) {
return poppler_PDFDoc_getter(self, 2);
}
static PyObject *
poppler_PDFDoc_keywords_getter(poppler_PDFDoc *self, void *closure) {
return poppler_PDFDoc_getter(self, 3);
}
static PyObject *
poppler_PDFDoc_creator_getter(poppler_PDFDoc *self, void *closure) {
return poppler_PDFDoc_getter(self, 4);
}
static PyObject *
poppler_PDFDoc_producer_getter(poppler_PDFDoc *self, void *closure) {
return poppler_PDFDoc_getter(self, 5);
}
static PyObject *
poppler_PDFDoc_version_getter(poppler_PDFDoc *self, void *closure) {
PyObject *ans = PyFloat_FromDouble(self->doc->pdfVersion());
if (ans != NULL) Py_INCREF(ans);
return ans;
}
static int
poppler_PDFDoc_title_setter(poppler_PDFDoc *self, PyObject *val, void *closure) {
return poppler_PDFDoc_setter(self, val, 0);
}
static int
poppler_PDFDoc_author_setter(poppler_PDFDoc *self, PyObject *val, void *closure) {
return poppler_PDFDoc_setter(self, val, 1);
}
static int
poppler_PDFDoc_subject_setter(poppler_PDFDoc *self, PyObject *val, void *closure) {
return poppler_PDFDoc_setter(self, val, 2);
}
static int
poppler_PDFDoc_keywords_setter(poppler_PDFDoc *self, PyObject *val, void *closure) {
return poppler_PDFDoc_setter(self, val, 3);
}
static int
poppler_PDFDoc_creator_setter(poppler_PDFDoc *self, PyObject *val, void *closure) {
return poppler_PDFDoc_setter(self, val, 4);
}
static int
poppler_PDFDoc_producer_setter(poppler_PDFDoc *self, PyObject *val, void *closure) {
return poppler_PDFDoc_setter(self, val, 5);
}
}
static PyObject *
poppler_PDFDoc_render_page(poppler_PDFDoc *self, PyObject *args, PyObject *kwargs) {
QImage img;
float xdpi = 166.0, ydpi = 166.0;
Poppler::Page *page;
QByteArray ba;
PyObject *ans = NULL;
QBuffer buffer(&ba);
int num;
if (!PyArg_ParseTuple(args, "i|ff", &num, &xdpi, &ydpi)) return ans;
if ( self->doc->isLocked()) {
PyErr_SetString(PyExc_ValueError, "This document is copyrighted.");
return ans;
}
if ( num < 0 || num >= self->doc->numPages()) {
PyErr_SetString(PyExc_ValueError, "Invalid page number");
return ans;
}
page = self->doc->page(num);
img = page->renderToImage(xdpi, ydpi);
if (img.isNull()) {
PyErr_SetString(PyExc_Exception, "Failed to render first page of PDF");
return ans;
}
buffer.open(QIODevice::WriteOnly);
if (!img.save(&buffer, "JPEG")) {
PyErr_SetString(PyExc_Exception, "Failed to save rendered page");
return ans;
}
ans = PyString_FromStringAndSize(ba.data(), ba.size());
if (ans != NULL) { Py_INCREF(ans); }
return ans;
}
static PyMethodDef poppler_PDFDoc_methods[] = {
{"load", (PyCFunction)poppler_PDFDoc_load, METH_VARARGS,
"Load a PDF document from a byte buffer (string)"
},
{"open", (PyCFunction)poppler_PDFDoc_open, METH_VARARGS,
"Load a PDF document from a file path (string)"
},
{"render_page", (PyCFunction)poppler_PDFDoc_render_page, METH_VARARGS,
"render_page(page_num, xdpi=166, ydpi=166) -> Render a page to a JPEG image. Page numbers start from zero."
},
{NULL} /* Sentinel */
};
static PyObject *
poppler_PDFDoc_pages_getter(poppler_PDFDoc *self, void *closure) {
int pages = self->doc->numPages();
PyObject *ans = PyInt_FromLong(static_cast<long>(pages));
if (ans != NULL) Py_INCREF(ans);
return ans;
}
static PyGetSetDef poppler_PDFDoc_getsetters[] = {
{(char *)"title",
(getter)poppler_PDFDoc_title_getter, (setter)poppler_PDFDoc_title_setter,
(char *)"Document title",
NULL},
{(char *)"author",
(getter)poppler_PDFDoc_author_getter, (setter)poppler_PDFDoc_author_setter,
(char *)"Document author",
NULL},
{(char *)"subject",
(getter)poppler_PDFDoc_subject_getter, (setter)poppler_PDFDoc_subject_setter,
(char *)"Document subject",
NULL},
{(char *)"keywords",
(getter)poppler_PDFDoc_keywords_getter, (setter)poppler_PDFDoc_keywords_setter,
(char *)"Document keywords",
NULL},
{(char *)"creator",
(getter)poppler_PDFDoc_creator_getter, (setter)poppler_PDFDoc_creator_setter,
(char *)"Document creator",
NULL},
{(char *)"producer",
(getter)poppler_PDFDoc_producer_getter, (setter)poppler_PDFDoc_producer_setter,
(char *)"Document producer",
NULL},
{(char *)"pages",
(getter)poppler_PDFDoc_pages_getter, NULL,
(char *)"Number of pages in document (read only)",
NULL},
{(char *)"version",
(getter)poppler_PDFDoc_version_getter, NULL,
(char *)"The PDF version (read only)",
NULL},
{NULL} /* Sentinel */
};
static PyTypeObject poppler_PDFDocType = {
PyObject_HEAD_INIT(NULL)
0, /*ob_size*/
"calibre_poppler.PDFDoc", /*tp_name*/
sizeof(poppler_PDFDoc), /*tp_basicsize*/
0, /*tp_itemsize*/
(destructor)poppler_PDFDoc_dealloc, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
0, /*tp_compare*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash */
0, /*tp_call*/
0, /*tp_str*/
0, /*tp_getattro*/
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT, /*tp_flags*/
"PDF Documents", /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
poppler_PDFDoc_methods, /* tp_methods */
0, /* tp_members */
poppler_PDFDoc_getsetters, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
poppler_PDFDoc_new, /* tp_new */
};
static PyMethodDef poppler_methods[] = {
{NULL} /* Sentinel */
};
extern "C" {
PyMODINIT_FUNC
initcalibre_poppler(void)
{
PyObject* m;
if (PyType_Ready(&poppler_PDFDocType) < 0)
return;
m = Py_InitModule3("calibre_poppler", poppler_methods,
"Wrapper for the Poppler PDF library");
Py_INCREF(&poppler_PDFDocType);
PyModule_AddObject(m, "PDFDoc", (PyObject *)&poppler_PDFDocType);
}
}