IGN:Initial implementation of PDF->XML engine

This commit is contained in:
Kovid Goyal 2009-09-21 21:17:38 -06:00
parent 4efa4d7bb1
commit 5a94e3d965
12 changed files with 227 additions and 433 deletions

View File

@ -6,7 +6,7 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os, socket, struct import os, socket, struct, subprocess
from distutils.spawn import find_executable from distutils.spawn import find_executable
from PyQt4 import pyqtconfig from PyQt4 import pyqtconfig
@ -42,6 +42,39 @@ elif find_executable('qmake'):
QMAKE = find_executable('qmake') QMAKE = find_executable('qmake')
QMAKE = os.environ.get('QMAKE', QMAKE) QMAKE = os.environ.get('QMAKE', QMAKE)
PKGCONFIG = find_executable('pkg-config')
PKGCONFIG = os.environ.get('PKG_CONFIG', PKGCONFIG)
def run_pkgconfig(name, envvar, default, flag, prefix):
ans = []
if envvar:
ans = os.environ.get(envvar, default)
ans = [x.strip() for x in ans.split(os.pathsep)]
ans = [x for x in ans if x and (prefix=='-l' or os.path.exists(x))]
if not ans:
try:
raw = subprocess.Popen([PKGCONFIG, flag, name],
stdout=subprocess.PIPE).stdout.read()
ans = [x.strip() for x in raw.split(prefix)]
ans = [x for x in ans if x and (prefix=='-l' or os.path.exists(x))]
except:
print 'Failed to run pkg-config:', PKGCONFIG, 'for:', name
return ans
def pkgconfig_include_dirs(name, envvar, default):
return run_pkgconfig(name, envvar, default, '--cflags-only-I', '-I')
def pkgconfig_lib_dirs(name, envvar, default):
return run_pkgconfig(name, envvar, default,'--libs-only-L', '-L')
def pkgconfig_libs(name, envvar, default):
return run_pkgconfig(name, envvar, default,'--libs-only-l', '-l')
def consolidate(envvar, default):
val = os.environ.get(envvar, default)
ans = [x.strip() for x in val.split(os.pathsep())]
return [x for x in ans if x and os.path.exists(x)]
pyqt = pyqtconfig.Configuration() pyqt = pyqtconfig.Configuration()
@ -50,28 +83,62 @@ qt_lib = pyqt.qt_lib_dir
fc_inc = '/usr/include/fontconfig' fc_inc = '/usr/include/fontconfig'
fc_lib = '/usr/lib' fc_lib = '/usr/lib'
poppler_inc = '/usr/include/poppler/qt4'
poppler_lib = '/usr/lib'
poppler_libs = []
podofo_inc = '/usr/include/podofo' podofo_inc = '/usr/include/podofo'
podofo_lib = '/usr/lib' podofo_lib = '/usr/lib'
if iswindows: if iswindows:
fc_inc = r'C:\cygwin\home\kovid\fontconfig\include\fontconfig' fc_inc = r'C:\cygwin\home\kovid\fontconfig\include\fontconfig'
fc_lib = r'C:\cygwin\home\kovid\fontconfig\lib' fc_lib = r'C:\cygwin\home\kovid\fontconfig\lib'
poppler_inc = r'C:\cygwin\home\kovid\poppler\include\poppler\qt4' poppler_inc_dirs = consolidate('POPPLER_INC_DIR',
poppler_lib = r'C:\cygwin\home\kovid\poppler\lib' r'C:\cygwin\home\kovid\poppler\include\poppler')
poppler_libs = ['QtCore4', 'QtGui4'] popplerqt4_inc_dirs = poppler_inc_dirs + [poppler_inc_dirs[0]+r'\qt4']
poppler_lib_dirs = consolidate('POPPLER_LIB_DIR',
r'C:\cygwin\home\kovid\poppler\lib')
popplerqt4_lib_dirs = poppler_lib_dirs
poppler_libs = ['poppler']
popplerqt4_libs = poppler_libs + ['QtCore4', 'QtGui4']
podofo_inc = 'C:\\podofo\\include\\podofo' podofo_inc = 'C:\\podofo\\include\\podofo'
podofo_lib = r'C:\podofo' podofo_lib = r'C:\podofo'
elif isosx:
if isosx:
fc_inc = '/Users/kovid/fontconfig/include/fontconfig' fc_inc = '/Users/kovid/fontconfig/include/fontconfig'
fc_lib = '/Users/kovid/fontconfig/lib' fc_lib = '/Users/kovid/fontconfig/lib'
poppler_inc = '/Volumes/sw/build/poppler-0.10.7/qt4/src' poppler_inc_dirs = consolidate('POPPLER_INC_DIR',
poppler_lib = '/Users/kovid/poppler/lib' '/Volumes/sw/build/poppler-0.10.7/poppler')
popplerqt4_inc_dirs = poppler_inc_dirs + [poppler_inc_dirs[0]+'/qt4']
poppler_lib_dirs = consolidate('POPPLER_LIB_DIR',
'/Users/kovid/poppler/lib')
popplerqt4_lib_dirs = poppler_lib_dirs
poppler_libs = popplerqt4_libs = ['poppler']
podofo_inc = '/usr/local/include/podofo' podofo_inc = '/usr/local/include/podofo'
podofo_lib = '/usr/local/lib' podofo_lib = '/usr/local/lib'
else:
# Include directories
poppler_inc_dirs = pkgconfig_include_dirs('poppler',
'POPPLER_INC_DIR', '/usr/include/poppler')
popplerqt4_inc_dirs = pkgconfig_include_dirs('poppler-qt4', '', '')
if not popplerqt4_inc_dirs:
popplerqt4_inc_dirs = poppler_inc_dirs + [poppler_inc_dirs[0]+'/qt4']
png_inc_dirs = pkgconfig_include_dirs('libpng', 'PNG_INC_DIR',
'/usr/include')
magick_inc_dirs = pkgconfig_include_dirs('MagickWand', 'MAGICK_INC', '/usr/include/ImageMagick')
# Library directories
poppler_lib_dirs = popplerqt4_lib_dirs = pkgconfig_lib_dirs('poppler', 'POPPLER_LIB_DIR',
'/usr/lib')
png_lib_dirs = pkgconfig_lib_dirs('libpng', 'PNG_LIB_DIR', '/usr/lib')
magick_lib_dirs = pkgconfig_lib_dirs('MagickWand', 'MAGICK_LIB', '/usr/lib')
# Libraries
poppler_libs = pkgconfig_libs('poppler', '', '')
if not poppler_libs:
poppler_libs = ['poppler']
popplerqt4_libs = pkgconfig_libs('poppler-qt4', '', '')
if not popplerqt4_libs:
popplerqt4_libs = ['poppler-qt4', 'poppler']
magick_libs = pkgconfig_libs('MagickWand', '', '')
if not magick_libs:
magick_libs = ['MagickWand', 'MagickCore']
png_libs = ['png']
fc_inc = os.environ.get('FC_INC_DIR', fc_inc) fc_inc = os.environ.get('FC_INC_DIR', fc_inc)
@ -82,14 +149,27 @@ fc_error = None if os.path.exists(os.path.join(fc_inc, 'fontconfig.h')) else \
'variables.') 'variables.')
poppler_inc = os.environ.get('POPPLER_INC_DIR', poppler_inc) poppler_error = None
poppler_lib = os.environ.get('POPPLER_LIB_DIR', poppler_lib) if not poppler_inc_dirs or not os.path.exists(
poppler_error = None if os.path.exists(os.path.join(poppler_inc, os.path.join(poppler_inc_dirs[0], 'OutputDev.h')):
'poppler-qt4.h')) else \ poppler_error = \
('Poppler not found on your system. Various PDF related', ('Poppler not found on your system. Various PDF related',
' functionality will not work. Use the POPPLER_INC_DIR and', ' functionality will not work. Use the POPPLER_INC_DIR and',
' POPPLER_LIB_DIR environment variables.') ' POPPLER_LIB_DIR environment variables.')
popplerqt4_error = None
if not popplerqt4_inc_dirs or not os.path.exists(
os.path.join(popplerqt4_inc_dirs[-1], 'poppler-qt4.h')):
popplerqt4_error = \
('Poppler Qt4 bindings not found on your system.')
magick_error = None
if not magick_inc_dirs or not os.path.exists(os.path.join(magick_inc_dirs[0],
'wand')):
magick_error = ('ImageMagick not found on your system. '
'Try setting the environment variables MAGICK_INC '
'and MAGICK_LIB to help calibre locate the inclue and libbrary '
'files.')
podofo_lib = os.environ.get('PODOFO_LIB_DIR', podofo_lib) podofo_lib = os.environ.get('PODOFO_LIB_DIR', podofo_lib)
podofo_inc = os.environ.get('PODOFO_INC_DIR', podofo_inc) podofo_inc = os.environ.get('PODOFO_INC_DIR', podofo_inc)
@ -116,3 +196,5 @@ except:
HOST='unknown' HOST='unknown'
PROJECT=os.path.basename(os.path.abspath('.')) PROJECT=os.path.basename(os.path.abspath('.'))

View File

@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
__all__ = [ __all__ = [
'pot', 'translations', 'get_translations', 'iso639', 'pot', 'translations', 'get_translations', 'iso639',
'build', 'build', 'build_pdf2xml',
'gui', 'gui',
'develop', 'install', 'develop', 'install',
'resources', 'resources',
@ -30,8 +30,9 @@ translations = Translations()
get_translations = GetTranslations() get_translations = GetTranslations()
iso639 = ISO639() iso639 = ISO639()
from setup.extensions import Build from setup.extensions import Build, BuildPDF2XML
build = Build() build = Build()
build_pdf2xml = BuildPDF2XML()
from setup.install import Develop, Install, Sdist from setup.install import Develop, Install, Sdist
develop = Develop() develop = Develop()

View File

@ -12,10 +12,12 @@ from distutils import sysconfig
from PyQt4.pyqtconfig import QtGuiModuleMakefile from PyQt4.pyqtconfig import QtGuiModuleMakefile
from setup import Command, islinux, isosx, SRC, iswindows from setup import Command, islinux, isosx, SRC, iswindows
from setup.build_environment import fc_inc, fc_lib, qt_inc, qt_lib, \ from setup.build_environment import fc_inc, fc_lib, \
fc_error, poppler_libs, poppler_lib, poppler_inc, podofo_inc, \ fc_error, poppler_libs, poppler_lib_dirs, poppler_inc_dirs, podofo_inc, \
podofo_lib, podofo_error, poppler_error, pyqt, OSX_SDK, NMAKE, \ podofo_lib, podofo_error, poppler_error, pyqt, OSX_SDK, NMAKE, \
leopard_build, QMAKE, msvc, MT, win_inc, win_lib leopard_build, QMAKE, msvc, MT, win_inc, win_lib, png_inc_dirs, \
magick_inc_dirs, magick_lib_dirs, png_lib_dirs, png_libs, \
magick_error, magick_libs
MT MT
isunix = islinux or isosx isunix = islinux or isosx
@ -43,6 +45,10 @@ class Extension(object):
self.ldflags = kwargs.get('ldflags', []) self.ldflags = kwargs.get('ldflags', [])
self.optional = kwargs.get('optional', False) self.optional = kwargs.get('optional', False)
reflow_sources = glob.glob(os.path.join(SRC, 'calibre', 'ebooks', 'pdf', '*.cpp'))
reflow_headers = glob.glob(os.path.join(SRC, 'calibre', 'ebooks', 'pdf', '*.h'))
reflow_error = poppler_error if poppler_error else magick_error
extensions = [ extensions = [
Extension('lzx', Extension('lzx',
['calibre/utils/lzx/lzxmodule.c', ['calibre/utils/lzx/lzxmodule.c',
@ -76,15 +82,6 @@ extensions = [
Extension('cPalmdoc', Extension('cPalmdoc',
['calibre/ebooks/compression/palmdoc.c']), ['calibre/ebooks/compression/palmdoc.c']),
Extension('calibre_poppler',
['calibre/utils/poppler/poppler.cpp'],
libraries=(['poppler', 'poppler-qt4']+poppler_libs),
lib_dirs=[os.environ.get('POPPLER_LIB_DIR',
poppler_lib), qt_lib],
inc_dirs=[poppler_inc, qt_inc],
error=poppler_error,
optional=True),
Extension('podofo', Extension('podofo',
['calibre/utils/podofo/podofo.cpp'], ['calibre/utils/podofo/podofo.cpp'],
libraries=['podofo'], libraries=['podofo'],
@ -97,10 +94,20 @@ extensions = [
inc_dirs = ['calibre/gui2/pictureflow'], inc_dirs = ['calibre/gui2/pictureflow'],
headers = ['calibre/gui2/pictureflow/pictureflow.h'], headers = ['calibre/gui2/pictureflow/pictureflow.h'],
sip_files = ['calibre/gui2/pictureflow/pictureflow.sip'] sip_files = ['calibre/gui2/pictureflow/pictureflow.sip']
) ),
Extension('pdfreflow',
reflow_sources,
headers=reflow_headers,
libraries=poppler_libs+magick_libs+png_libs,
lib_dirs=poppler_lib_dirs+magick_lib_dirs+png_lib_dirs,
inc_dirs=poppler_inc_dirs+magick_inc_dirs+png_inc_dirs,
error=reflow_error,
cflags=['-DPNG_SKIP_SETJMP_CHECK'] if islinux else []
)
] ]
if iswindows: if iswindows:
extensions.append(Extension('winutil', extensions.append(Extension('winutil',
['calibre/utils/windows/winutil.c'], ['calibre/utils/windows/winutil.c'],
@ -346,10 +353,36 @@ class Build(Command):
class BuildPDF2XML(Command):
description = 'Build command line pdf2xml utility'
def run(self, opts):
dest = os.path.expanduser('~/bin/pdf2xml')
odest = self.j(self.d(self.SRC), 'build', 'objects', 'pdf2xml')
if not os.path.exists(odest):
os.makedirs(odest)
objects = []
for src in reflow_sources:
if src.endswith('python.cpp'):
continue
obj = self.j(odest, self.b(src+'.o'))
if self.newer(obj, [src]+reflow_headers):
cmd = ['g++', '-pthread', '-pedantic', '-g', '-c', '-Wall', '-I/usr/include/poppler',
'-I/usr/include/ImageMagick',
'-DPDF2XML', '-o', obj, src]
self.info(*cmd)
subprocess.check_call(cmd)
objects.append(obj)
if self.newer(dest, objects):
cmd = ['g++', '-g', '-o', dest]+objects+['-lpoppler', '-lMagickWand',
'-lpng', '-lpthread']
self.info(*cmd)
subprocess.check_call(cmd)
self.info('Binary installed as', dest)

View File

@ -192,6 +192,10 @@ class Install(Develop):
x = self.j(dest, x) x = self.j(dest, x)
if os.path.exists(dest): if os.path.exists(dest):
shutil.rmtree(x) shutil.rmtree(x)
for x in os.walk(dest):
for f in x[-1]:
if os.path.splitext(f)[1] in ('.c', '.cpp', '.h'):
os.remove(self.j(x[0], f))
dest = self.root + self.resources dest = self.root + self.resources
if os.path.exists(dest): if os.path.exists(dest):
shutil.rmtree(dest) shutil.rmtree(dest)
@ -241,4 +245,3 @@ class Sdist(Command):
os.remove(self.DEST) os.remove(self.DEST)

View File

@ -38,6 +38,7 @@ class LinuxFreeze(Command):
binary_includes = [ binary_includes = [
'/usr/bin/pdftohtml', '/usr/bin/pdftohtml',
'/usr/lib/libwmflite-0.2.so.7', '/usr/lib/libwmflite-0.2.so.7',
'/usr/lib/liblcms.so.1',
'/tmp/calibre-mount-helper', '/tmp/calibre-mount-helper',
'/usr/lib/libunrar.so', '/usr/lib/libunrar.so',
'/usr/lib/libsqlite3.so.0', '/usr/lib/libsqlite3.so.0',

View File

@ -55,7 +55,7 @@ if plugins is None:
sys.path.insert(0, plugin_path) sys.path.insert(0, plugin_path)
for plugin in ['pictureflow', 'lzx', 'msdes', 'podofo', 'cPalmdoc', for plugin in ['pictureflow', 'lzx', 'msdes', 'podofo', 'cPalmdoc',
'fontconfig', 'calibre_poppler'] + \ 'fontconfig', 'pdfreflow'] + \
(['winutil'] if iswindows else []) + \ (['winutil'] if iswindows else []) + \
(['usbobserver'] if isosx else []): (['usbobserver'] if isosx else []):
try: try:

View File

@ -161,6 +161,7 @@ quick_metadata = QuickMetadata()
def get_file_type_metadata(stream, ftype): def get_file_type_metadata(stream, ftype):
mi = MetaInformation(None, None) mi = MetaInformation(None, None)
ftype = ftype.lower().strip() ftype = ftype.lower().strip()
if _metadata_readers.has_key(ftype): if _metadata_readers.has_key(ftype):
for plugin in _metadata_readers[ftype]: for plugin in _metadata_readers[ftype]:
@ -168,6 +169,8 @@ def get_file_type_metadata(stream, ftype):
with plugin: with plugin:
try: try:
plugin.quick = quick_metadata.quick plugin.quick = quick_metadata.quick
if hasattr(stream, 'seek'):
stream.seek(0)
mi = plugin.get_metadata(stream, ftype.lower().strip()) mi = plugin.get_metadata(stream, ftype.lower().strip())
break break
except: except:

View File

@ -10,6 +10,7 @@ import sys, os, re, shutil
from calibre.utils.config import OptionParser from calibre.utils.config import OptionParser
from calibre.constants import iswindows, isosx from calibre.constants import iswindows, isosx
from calibre.libunzip import update from calibre.libunzip import update
from calibre import prints
def option_parser(): def option_parser():
parser = OptionParser(usage='''\ parser = OptionParser(usage='''\
@ -28,6 +29,8 @@ Run an embedded python interpreter.
help='Debug the specified device driver.') help='Debug the specified device driver.')
parser.add_option('-g', '--gui', default=False, action='store_true', parser.add_option('-g', '--gui', default=False, action='store_true',
help='Run the GUI',) help='Run the GUI',)
parser.add_option('--paths', default=False, action='store_true',
help='Output the paths necessary to setup the calibre environment')
parser.add_option('--migrate', action='store_true', default=False, parser.add_option('--migrate', action='store_true', default=False,
help='Migrate old database. Needs two arguments. Path ' help='Migrate old database. Needs two arguments. Path '
'to library1.db and path to new library folder.') 'to library1.db and path to new library folder.')
@ -203,6 +206,10 @@ def main(args=sys.argv):
migrate(args[1], args[2]) migrate(args[1], args[2])
elif opts.add_simple_plugin is not None: elif opts.add_simple_plugin is not None:
add_simple_plugin(opts.add_simple_plugin) add_simple_plugin(opts.add_simple_plugin)
elif opts.paths:
prints('CALIBRE_RESOURCES_LOCATION='+sys.resources_location)
prints('CALIBRE_EXTENSIONS_LOCATION='+sys.extensions_location)
prints('CALIBRE_PYTHON_PATH='+os.pathsep.join(sys.path))
else: else:
from IPython.Shell import IPShellEmbed from IPython.Shell import IPShellEmbed
ipshell = IPShellEmbed() ipshell = IPShellEmbed()

View File

@ -3,6 +3,52 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''Read meta information from PDF files''' '''Read meta information from PDF files'''
from functools import partial
from calibre import plugins, prints
from calibre.ebooks.metadata import MetaInformation, string_to_authors#, authors_to_string
pdfreflow, pdfreflow_error = plugins['pdfreflow']
def get_metadata(stream, cover=True):
if pdfreflow is None:
raise RuntimeError(pdfreflow_error)
info = pdfreflow.get_metadata(stream.read(), cover)
title = info.get('Title', None)
au = info.get('Author', None)
if au is None:
au = [_('Unknown')]
else:
au = string_to_authors(au)
mi = MetaInformation(title, au)
creator = info.get('Creator', None)
if creator:
mi.book_producer = creator
keywords = info.get('Keywords', None)
mi.tags = []
if keywords:
mi.tags = [x.strip() for x in keywords.split(',')]
subject = info.get('Subject', None)
if subject:
mi.tags.insert(0, subject)
if cover and 'cover' in info:
data = info['cover']
if data is None:
prints(title, 'is an encrypted document, cover extraction not allowed.')
else:
mi.cover_data = ('png', data)
return mi
get_quick_metadata = partial(get_metadata, cover=False)
'''
import sys, os, cStringIO import sys, os, cStringIO
from threading import Thread from threading import Thread
@ -139,6 +185,6 @@ def get_cover(cover_path):
MagickSetImageFormat(wand, 'JPEG') MagickSetImageFormat(wand, 'JPEG')
MagickWriteImage(wand, '%s.jpg' % cover_path) MagickWriteImage(wand, '%s.jpg' % cover_path)
return open('%s.jpg' % cover_path, 'rb').read() return open('%s.jpg' % cover_path, 'rb').read()
'''

View File

@ -40,12 +40,12 @@
<string>...</string> <string>...</string>
</property> </property>
<property name="icon"> <property name="icon">
<iconset resource="../../../../resources/images.qrc"> <iconset resource="../../../work/calibre/resources/images.qrc">
<normaloff>:/images/document_open.svg</normaloff>:/images/document_open.svg</iconset> <normaloff>:/images/document_open.svg</normaloff>:/images/document_open.svg</iconset>
</property> </property>
</widget> </widget>
</item> </item>
<item row="2" column="0"> <item row="3" column="0">
<spacer name="verticalSpacer"> <spacer name="verticalSpacer">
<property name="orientation"> <property name="orientation">
<enum>Qt::Vertical</enum> <enum>Qt::Vertical</enum>
@ -64,15 +64,25 @@
<string>...</string> <string>...</string>
</property> </property>
<property name="icon"> <property name="icon">
<iconset resource="../../../../resources/images.qrc"> <iconset resource="../../../work/calibre/resources/images.qrc">
<normaloff>:/images/clear_left.svg</normaloff>:/images/clear_left.svg</iconset> <normaloff>:/images/clear_left.svg</normaloff>:/images/clear_left.svg</iconset>
</property> </property>
</widget> </widget>
</item> </item>
<item row="2" column="0">
<widget class="QLabel" name="label_2">
<property name="text">
<string>The debug process outputs the intermediate HTML generated at various stages of the conversion process. This HTML can sometimes serve as a good starting point for hand editing a conversion.</string>
</property>
<property name="wordWrap">
<bool>true</bool>
</property>
</widget>
</item>
</layout> </layout>
</widget> </widget>
<resources> <resources>
<include location="../../../../resources/images.qrc"/> <include location="../../../work/calibre/resources/images.qrc"/>
</resources> </resources>
<connections/> <connections/>
</ui> </ui>

View File

@ -1,63 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os
from calibre.constants import plugins
from calibre.ebooks.metadata import MetaInformation, string_to_authors
poppler, poppler_err = plugins['calibre_poppler']
class NotAvailable(Exception):
pass
def get_metadata(stream, cover=True):
if not poppler:
raise NotAvailable('Failed to load poppler with error: '+poppler_err)
raw = stream.read()
doc = poppler.PDFDoc()
doc.load(raw)
del raw
title = doc.title
if not title or not title.strip():
title = _('Unknown')
if hasattr(stream, 'name'):
title = os.path.splitext(os.path.basename(stream.name))[0]
author = doc.author
authors = string_to_authors(author) if author else [_('Unknown')]
creator = doc.creator
mi = MetaInformation(title, authors)
if creator:
mi.book_producer = creator
if doc.subject:
mi.category = doc.subject
if doc.keywords:
mi.tags = [x.strip() for x in doc.keywords.split(',')]
if cover:
from calibre.gui2 import is_ok_to_use_qt
cdata = None
if is_ok_to_use_qt():
try:
cdata = doc.render_page(0)
except:
import traceback
traceback.print_exc()
if cdata is not None:
mi.cover_data = ('jpg', cdata)
del doc
return mi

View File

@ -1,329 +0,0 @@
#define UNICODE
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include <poppler-qt4.h>
#include <QtCore/QBuffer>
#include <QtGui/QImage>
typedef struct {
PyObject_HEAD
/* Type-specific fields go here. */
Poppler::Document *doc;
} poppler_PDFDoc;
extern "C" {
static void
poppler_PDFDoc_dealloc(poppler_PDFDoc* self)
{
if (self->doc != NULL) delete self->doc;
self->ob_type->tp_free((PyObject*)self);
}
static PyObject *
poppler_PDFDoc_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
poppler_PDFDoc *self;
self = (poppler_PDFDoc *)type->tp_alloc(type, 0);
if (self != NULL) {
self->doc = NULL;
}
return (PyObject *)self;
}
static PyObject *
poppler_PDFDoc_load(poppler_PDFDoc *self, PyObject *args, PyObject *kwargs) {
char *buffer; Py_ssize_t size; QByteArray data;
if (!PyArg_ParseTuple(args, "s#", &buffer, &size)) return NULL;
data = QByteArray::fromRawData(buffer, size);
self->doc = Poppler::Document::loadFromData(data);
if (self->doc == NULL) {PyErr_SetString(PyExc_ValueError, "Could not load PDF file from data."); return NULL;}
Py_RETURN_NONE;
}
}
static QString
poppler_convert_pystring(PyObject *py) {
QString ans;
Py_UNICODE* u = PyUnicode_AS_UNICODE(py);
PyObject *u8 = PyUnicode_EncodeUTF8(u, PyUnicode_GET_SIZE(py), "replace");
if (u8 == NULL) { PyErr_NoMemory(); return NULL; }
ans = QString::fromUtf8(PyString_AS_STRING(u8));
Py_DECREF(u8);
return ans;
}
extern "C" {
static PyObject *
poppler_convert_qstring(const QString &src) {
QByteArray data = src.toUtf8();
const char *cdata = data.constData();
int sz = data.size();
return PyUnicode_Decode(cdata, sz, "utf-8", "error");
}
static PyObject *
poppler_PDFDoc_open(poppler_PDFDoc *self, PyObject *args, PyObject *kwargs) {
PyObject *fname; QString _fname;
if (!PyArg_ParseTuple(args, "O", &fname)) return NULL;
_fname = poppler_convert_pystring(fname);
self->doc = Poppler::Document::load(_fname);
Py_RETURN_NONE;
}
static PyObject *
poppler_PDFDoc_getter(poppler_PDFDoc *self, int field)
{
PyObject *ans;
const char *s;
switch (field) {
case 0:
s = "Title"; break;
case 1:
s = "Author"; break;
case 2:
s = "Subject"; break;
case 3:
s = "Keywords"; break;
case 4:
s = "Creator"; break;
case 5:
s = "Producer"; break;
default:
PyErr_SetString(PyExc_Exception, "Bad field");
return NULL;
}
ans = poppler_convert_qstring(self->doc->info(QString(s)));
if (ans != NULL) Py_INCREF(ans);
return ans;
}
static int
poppler_PDFDoc_setter(poppler_PDFDoc *self, PyObject *val, int field) {
return -1;
}
static PyObject *
poppler_PDFDoc_title_getter(poppler_PDFDoc *self, void *closure) {
return poppler_PDFDoc_getter(self, 0);
}
static PyObject *
poppler_PDFDoc_author_getter(poppler_PDFDoc *self, void *closure) {
return poppler_PDFDoc_getter(self, 1);
}
static PyObject *
poppler_PDFDoc_subject_getter(poppler_PDFDoc *self, void *closure) {
return poppler_PDFDoc_getter(self, 2);
}
static PyObject *
poppler_PDFDoc_keywords_getter(poppler_PDFDoc *self, void *closure) {
return poppler_PDFDoc_getter(self, 3);
}
static PyObject *
poppler_PDFDoc_creator_getter(poppler_PDFDoc *self, void *closure) {
return poppler_PDFDoc_getter(self, 4);
}
static PyObject *
poppler_PDFDoc_producer_getter(poppler_PDFDoc *self, void *closure) {
return poppler_PDFDoc_getter(self, 5);
}
static PyObject *
poppler_PDFDoc_version_getter(poppler_PDFDoc *self, void *closure) {
PyObject *ans = PyFloat_FromDouble(self->doc->pdfVersion());
if (ans != NULL) Py_INCREF(ans);
return ans;
}
static int
poppler_PDFDoc_title_setter(poppler_PDFDoc *self, PyObject *val, void *closure) {
return poppler_PDFDoc_setter(self, val, 0);
}
static int
poppler_PDFDoc_author_setter(poppler_PDFDoc *self, PyObject *val, void *closure) {
return poppler_PDFDoc_setter(self, val, 1);
}
static int
poppler_PDFDoc_subject_setter(poppler_PDFDoc *self, PyObject *val, void *closure) {
return poppler_PDFDoc_setter(self, val, 2);
}
static int
poppler_PDFDoc_keywords_setter(poppler_PDFDoc *self, PyObject *val, void *closure) {
return poppler_PDFDoc_setter(self, val, 3);
}
static int
poppler_PDFDoc_creator_setter(poppler_PDFDoc *self, PyObject *val, void *closure) {
return poppler_PDFDoc_setter(self, val, 4);
}
static int
poppler_PDFDoc_producer_setter(poppler_PDFDoc *self, PyObject *val, void *closure) {
return poppler_PDFDoc_setter(self, val, 5);
}
}
static PyObject *
poppler_PDFDoc_render_page(poppler_PDFDoc *self, PyObject *args, PyObject *kwargs) {
QImage img;
float xdpi = 166.0, ydpi = 166.0;
Poppler::Page *page;
QByteArray ba;
PyObject *ans = NULL;
QBuffer buffer(&ba);
int num;
if (!PyArg_ParseTuple(args, "i|ff", &num, &xdpi, &ydpi)) return ans;
if ( self->doc->isLocked()) {
PyErr_SetString(PyExc_ValueError, "This document is copyrighted.");
return ans;
}
if ( num < 0 || num >= self->doc->numPages()) {
PyErr_SetString(PyExc_ValueError, "Invalid page number");
return ans;
}
page = self->doc->page(num);
img = page->renderToImage(xdpi, ydpi);
if (img.isNull()) {
PyErr_SetString(PyExc_Exception, "Failed to render first page of PDF");
return ans;
}
buffer.open(QIODevice::WriteOnly);
if (!img.save(&buffer, "JPEG")) {
PyErr_SetString(PyExc_Exception, "Failed to save rendered page");
return ans;
}
ans = PyString_FromStringAndSize(ba.data(), ba.size());
if (ans != NULL) { Py_INCREF(ans); }
return ans;
}
static PyMethodDef poppler_PDFDoc_methods[] = {
{"load", (PyCFunction)poppler_PDFDoc_load, METH_VARARGS,
"Load a PDF document from a byte buffer (string)"
},
{"open", (PyCFunction)poppler_PDFDoc_open, METH_VARARGS,
"Load a PDF document from a file path (string)"
},
{"render_page", (PyCFunction)poppler_PDFDoc_render_page, METH_VARARGS,
"render_page(page_num, xdpi=166, ydpi=166) -> Render a page to a JPEG image. Page numbers start from zero."
},
{NULL} /* Sentinel */
};
static PyObject *
poppler_PDFDoc_pages_getter(poppler_PDFDoc *self, void *closure) {
int pages = self->doc->numPages();
PyObject *ans = PyInt_FromLong(static_cast<long>(pages));
if (ans != NULL) Py_INCREF(ans);
return ans;
}
static PyGetSetDef poppler_PDFDoc_getsetters[] = {
{(char *)"title",
(getter)poppler_PDFDoc_title_getter, (setter)poppler_PDFDoc_title_setter,
(char *)"Document title",
NULL},
{(char *)"author",
(getter)poppler_PDFDoc_author_getter, (setter)poppler_PDFDoc_author_setter,
(char *)"Document author",
NULL},
{(char *)"subject",
(getter)poppler_PDFDoc_subject_getter, (setter)poppler_PDFDoc_subject_setter,
(char *)"Document subject",
NULL},
{(char *)"keywords",
(getter)poppler_PDFDoc_keywords_getter, (setter)poppler_PDFDoc_keywords_setter,
(char *)"Document keywords",
NULL},
{(char *)"creator",
(getter)poppler_PDFDoc_creator_getter, (setter)poppler_PDFDoc_creator_setter,
(char *)"Document creator",
NULL},
{(char *)"producer",
(getter)poppler_PDFDoc_producer_getter, (setter)poppler_PDFDoc_producer_setter,
(char *)"Document producer",
NULL},
{(char *)"pages",
(getter)poppler_PDFDoc_pages_getter, NULL,
(char *)"Number of pages in document (read only)",
NULL},
{(char *)"version",
(getter)poppler_PDFDoc_version_getter, NULL,
(char *)"The PDF version (read only)",
NULL},
{NULL} /* Sentinel */
};
static PyTypeObject poppler_PDFDocType = {
PyObject_HEAD_INIT(NULL)
0, /*ob_size*/
"calibre_poppler.PDFDoc", /*tp_name*/
sizeof(poppler_PDFDoc), /*tp_basicsize*/
0, /*tp_itemsize*/
(destructor)poppler_PDFDoc_dealloc, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
0, /*tp_compare*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash */
0, /*tp_call*/
0, /*tp_str*/
0, /*tp_getattro*/
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT, /*tp_flags*/
"PDF Documents", /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
poppler_PDFDoc_methods, /* tp_methods */
0, /* tp_members */
poppler_PDFDoc_getsetters, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
poppler_PDFDoc_new, /* tp_new */
};
static PyMethodDef poppler_methods[] = {
{NULL} /* Sentinel */
};
extern "C" {
PyMODINIT_FUNC
initcalibre_poppler(void)
{
PyObject* m;
if (PyType_Ready(&poppler_PDFDocType) < 0)
return;
m = Py_InitModule3("calibre_poppler", poppler_methods,
"Wrapper for the Poppler PDF library");
Py_INCREF(&poppler_PDFDocType);
PyModule_AddObject(m, "PDFDoc", (PyObject *)&poppler_PDFDocType);
}
}