mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
IGN:Initial implementation of PDF->XML engine
This commit is contained in:
parent
4efa4d7bb1
commit
5a94e3d965
@ -6,7 +6,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, socket, struct
|
||||
import os, socket, struct, subprocess
|
||||
from distutils.spawn import find_executable
|
||||
|
||||
from PyQt4 import pyqtconfig
|
||||
@ -42,6 +42,39 @@ elif find_executable('qmake'):
|
||||
QMAKE = find_executable('qmake')
|
||||
QMAKE = os.environ.get('QMAKE', QMAKE)
|
||||
|
||||
PKGCONFIG = find_executable('pkg-config')
|
||||
PKGCONFIG = os.environ.get('PKG_CONFIG', PKGCONFIG)
|
||||
|
||||
def run_pkgconfig(name, envvar, default, flag, prefix):
|
||||
ans = []
|
||||
if envvar:
|
||||
ans = os.environ.get(envvar, default)
|
||||
ans = [x.strip() for x in ans.split(os.pathsep)]
|
||||
ans = [x for x in ans if x and (prefix=='-l' or os.path.exists(x))]
|
||||
if not ans:
|
||||
try:
|
||||
raw = subprocess.Popen([PKGCONFIG, flag, name],
|
||||
stdout=subprocess.PIPE).stdout.read()
|
||||
ans = [x.strip() for x in raw.split(prefix)]
|
||||
ans = [x for x in ans if x and (prefix=='-l' or os.path.exists(x))]
|
||||
except:
|
||||
print 'Failed to run pkg-config:', PKGCONFIG, 'for:', name
|
||||
|
||||
return ans
|
||||
|
||||
def pkgconfig_include_dirs(name, envvar, default):
|
||||
return run_pkgconfig(name, envvar, default, '--cflags-only-I', '-I')
|
||||
|
||||
def pkgconfig_lib_dirs(name, envvar, default):
|
||||
return run_pkgconfig(name, envvar, default,'--libs-only-L', '-L')
|
||||
|
||||
def pkgconfig_libs(name, envvar, default):
|
||||
return run_pkgconfig(name, envvar, default,'--libs-only-l', '-l')
|
||||
|
||||
def consolidate(envvar, default):
|
||||
val = os.environ.get(envvar, default)
|
||||
ans = [x.strip() for x in val.split(os.pathsep())]
|
||||
return [x for x in ans if x and os.path.exists(x)]
|
||||
|
||||
pyqt = pyqtconfig.Configuration()
|
||||
|
||||
@ -50,28 +83,62 @@ qt_lib = pyqt.qt_lib_dir
|
||||
|
||||
fc_inc = '/usr/include/fontconfig'
|
||||
fc_lib = '/usr/lib'
|
||||
poppler_inc = '/usr/include/poppler/qt4'
|
||||
poppler_lib = '/usr/lib'
|
||||
poppler_libs = []
|
||||
podofo_inc = '/usr/include/podofo'
|
||||
podofo_lib = '/usr/lib'
|
||||
|
||||
if iswindows:
|
||||
fc_inc = r'C:\cygwin\home\kovid\fontconfig\include\fontconfig'
|
||||
fc_lib = r'C:\cygwin\home\kovid\fontconfig\lib'
|
||||
poppler_inc = r'C:\cygwin\home\kovid\poppler\include\poppler\qt4'
|
||||
poppler_lib = r'C:\cygwin\home\kovid\poppler\lib'
|
||||
poppler_libs = ['QtCore4', 'QtGui4']
|
||||
poppler_inc_dirs = consolidate('POPPLER_INC_DIR',
|
||||
r'C:\cygwin\home\kovid\poppler\include\poppler')
|
||||
popplerqt4_inc_dirs = poppler_inc_dirs + [poppler_inc_dirs[0]+r'\qt4']
|
||||
poppler_lib_dirs = consolidate('POPPLER_LIB_DIR',
|
||||
r'C:\cygwin\home\kovid\poppler\lib')
|
||||
popplerqt4_lib_dirs = poppler_lib_dirs
|
||||
poppler_libs = ['poppler']
|
||||
popplerqt4_libs = poppler_libs + ['QtCore4', 'QtGui4']
|
||||
podofo_inc = 'C:\\podofo\\include\\podofo'
|
||||
podofo_lib = r'C:\podofo'
|
||||
|
||||
if isosx:
|
||||
elif isosx:
|
||||
fc_inc = '/Users/kovid/fontconfig/include/fontconfig'
|
||||
fc_lib = '/Users/kovid/fontconfig/lib'
|
||||
poppler_inc = '/Volumes/sw/build/poppler-0.10.7/qt4/src'
|
||||
poppler_lib = '/Users/kovid/poppler/lib'
|
||||
poppler_inc_dirs = consolidate('POPPLER_INC_DIR',
|
||||
'/Volumes/sw/build/poppler-0.10.7/poppler')
|
||||
popplerqt4_inc_dirs = poppler_inc_dirs + [poppler_inc_dirs[0]+'/qt4']
|
||||
poppler_lib_dirs = consolidate('POPPLER_LIB_DIR',
|
||||
'/Users/kovid/poppler/lib')
|
||||
popplerqt4_lib_dirs = poppler_lib_dirs
|
||||
poppler_libs = popplerqt4_libs = ['poppler']
|
||||
podofo_inc = '/usr/local/include/podofo'
|
||||
podofo_lib = '/usr/local/lib'
|
||||
else:
|
||||
# Include directories
|
||||
poppler_inc_dirs = pkgconfig_include_dirs('poppler',
|
||||
'POPPLER_INC_DIR', '/usr/include/poppler')
|
||||
popplerqt4_inc_dirs = pkgconfig_include_dirs('poppler-qt4', '', '')
|
||||
if not popplerqt4_inc_dirs:
|
||||
popplerqt4_inc_dirs = poppler_inc_dirs + [poppler_inc_dirs[0]+'/qt4']
|
||||
png_inc_dirs = pkgconfig_include_dirs('libpng', 'PNG_INC_DIR',
|
||||
'/usr/include')
|
||||
magick_inc_dirs = pkgconfig_include_dirs('MagickWand', 'MAGICK_INC', '/usr/include/ImageMagick')
|
||||
|
||||
# Library directories
|
||||
poppler_lib_dirs = popplerqt4_lib_dirs = pkgconfig_lib_dirs('poppler', 'POPPLER_LIB_DIR',
|
||||
'/usr/lib')
|
||||
png_lib_dirs = pkgconfig_lib_dirs('libpng', 'PNG_LIB_DIR', '/usr/lib')
|
||||
magick_lib_dirs = pkgconfig_lib_dirs('MagickWand', 'MAGICK_LIB', '/usr/lib')
|
||||
|
||||
# Libraries
|
||||
poppler_libs = pkgconfig_libs('poppler', '', '')
|
||||
if not poppler_libs:
|
||||
poppler_libs = ['poppler']
|
||||
popplerqt4_libs = pkgconfig_libs('poppler-qt4', '', '')
|
||||
if not popplerqt4_libs:
|
||||
popplerqt4_libs = ['poppler-qt4', 'poppler']
|
||||
magick_libs = pkgconfig_libs('MagickWand', '', '')
|
||||
if not magick_libs:
|
||||
magick_libs = ['MagickWand', 'MagickCore']
|
||||
png_libs = ['png']
|
||||
|
||||
|
||||
fc_inc = os.environ.get('FC_INC_DIR', fc_inc)
|
||||
@ -82,14 +149,27 @@ fc_error = None if os.path.exists(os.path.join(fc_inc, 'fontconfig.h')) else \
|
||||
'variables.')
|
||||
|
||||
|
||||
poppler_inc = os.environ.get('POPPLER_INC_DIR', poppler_inc)
|
||||
poppler_lib = os.environ.get('POPPLER_LIB_DIR', poppler_lib)
|
||||
poppler_error = None if os.path.exists(os.path.join(poppler_inc,
|
||||
'poppler-qt4.h')) else \
|
||||
poppler_error = None
|
||||
if not poppler_inc_dirs or not os.path.exists(
|
||||
os.path.join(poppler_inc_dirs[0], 'OutputDev.h')):
|
||||
poppler_error = \
|
||||
('Poppler not found on your system. Various PDF related',
|
||||
' functionality will not work. Use the POPPLER_INC_DIR and',
|
||||
' POPPLER_LIB_DIR environment variables.')
|
||||
|
||||
popplerqt4_error = None
|
||||
if not popplerqt4_inc_dirs or not os.path.exists(
|
||||
os.path.join(popplerqt4_inc_dirs[-1], 'poppler-qt4.h')):
|
||||
popplerqt4_error = \
|
||||
('Poppler Qt4 bindings not found on your system.')
|
||||
|
||||
magick_error = None
|
||||
if not magick_inc_dirs or not os.path.exists(os.path.join(magick_inc_dirs[0],
|
||||
'wand')):
|
||||
magick_error = ('ImageMagick not found on your system. '
|
||||
'Try setting the environment variables MAGICK_INC '
|
||||
'and MAGICK_LIB to help calibre locate the inclue and libbrary '
|
||||
'files.')
|
||||
|
||||
podofo_lib = os.environ.get('PODOFO_LIB_DIR', podofo_lib)
|
||||
podofo_inc = os.environ.get('PODOFO_INC_DIR', podofo_inc)
|
||||
@ -116,3 +196,5 @@ except:
|
||||
HOST='unknown'
|
||||
|
||||
PROJECT=os.path.basename(os.path.abspath('.'))
|
||||
|
||||
|
||||
|
@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
__all__ = [
|
||||
'pot', 'translations', 'get_translations', 'iso639',
|
||||
'build',
|
||||
'build', 'build_pdf2xml',
|
||||
'gui',
|
||||
'develop', 'install',
|
||||
'resources',
|
||||
@ -30,8 +30,9 @@ translations = Translations()
|
||||
get_translations = GetTranslations()
|
||||
iso639 = ISO639()
|
||||
|
||||
from setup.extensions import Build
|
||||
from setup.extensions import Build, BuildPDF2XML
|
||||
build = Build()
|
||||
build_pdf2xml = BuildPDF2XML()
|
||||
|
||||
from setup.install import Develop, Install, Sdist
|
||||
develop = Develop()
|
||||
|
@ -12,10 +12,12 @@ from distutils import sysconfig
|
||||
from PyQt4.pyqtconfig import QtGuiModuleMakefile
|
||||
|
||||
from setup import Command, islinux, isosx, SRC, iswindows
|
||||
from setup.build_environment import fc_inc, fc_lib, qt_inc, qt_lib, \
|
||||
fc_error, poppler_libs, poppler_lib, poppler_inc, podofo_inc, \
|
||||
from setup.build_environment import fc_inc, fc_lib, \
|
||||
fc_error, poppler_libs, poppler_lib_dirs, poppler_inc_dirs, podofo_inc, \
|
||||
podofo_lib, podofo_error, poppler_error, pyqt, OSX_SDK, NMAKE, \
|
||||
leopard_build, QMAKE, msvc, MT, win_inc, win_lib
|
||||
leopard_build, QMAKE, msvc, MT, win_inc, win_lib, png_inc_dirs, \
|
||||
magick_inc_dirs, magick_lib_dirs, png_lib_dirs, png_libs, \
|
||||
magick_error, magick_libs
|
||||
MT
|
||||
isunix = islinux or isosx
|
||||
|
||||
@ -43,6 +45,10 @@ class Extension(object):
|
||||
self.ldflags = kwargs.get('ldflags', [])
|
||||
self.optional = kwargs.get('optional', False)
|
||||
|
||||
reflow_sources = glob.glob(os.path.join(SRC, 'calibre', 'ebooks', 'pdf', '*.cpp'))
|
||||
reflow_headers = glob.glob(os.path.join(SRC, 'calibre', 'ebooks', 'pdf', '*.h'))
|
||||
reflow_error = poppler_error if poppler_error else magick_error
|
||||
|
||||
extensions = [
|
||||
Extension('lzx',
|
||||
['calibre/utils/lzx/lzxmodule.c',
|
||||
@ -76,15 +82,6 @@ extensions = [
|
||||
Extension('cPalmdoc',
|
||||
['calibre/ebooks/compression/palmdoc.c']),
|
||||
|
||||
Extension('calibre_poppler',
|
||||
['calibre/utils/poppler/poppler.cpp'],
|
||||
libraries=(['poppler', 'poppler-qt4']+poppler_libs),
|
||||
lib_dirs=[os.environ.get('POPPLER_LIB_DIR',
|
||||
poppler_lib), qt_lib],
|
||||
inc_dirs=[poppler_inc, qt_inc],
|
||||
error=poppler_error,
|
||||
optional=True),
|
||||
|
||||
Extension('podofo',
|
||||
['calibre/utils/podofo/podofo.cpp'],
|
||||
libraries=['podofo'],
|
||||
@ -97,10 +94,20 @@ extensions = [
|
||||
inc_dirs = ['calibre/gui2/pictureflow'],
|
||||
headers = ['calibre/gui2/pictureflow/pictureflow.h'],
|
||||
sip_files = ['calibre/gui2/pictureflow/pictureflow.sip']
|
||||
)
|
||||
),
|
||||
|
||||
Extension('pdfreflow',
|
||||
reflow_sources,
|
||||
headers=reflow_headers,
|
||||
libraries=poppler_libs+magick_libs+png_libs,
|
||||
lib_dirs=poppler_lib_dirs+magick_lib_dirs+png_lib_dirs,
|
||||
inc_dirs=poppler_inc_dirs+magick_inc_dirs+png_inc_dirs,
|
||||
error=reflow_error,
|
||||
cflags=['-DPNG_SKIP_SETJMP_CHECK'] if islinux else []
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
if iswindows:
|
||||
extensions.append(Extension('winutil',
|
||||
['calibre/utils/windows/winutil.c'],
|
||||
@ -346,10 +353,36 @@ class Build(Command):
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class BuildPDF2XML(Command):
|
||||
|
||||
description = 'Build command line pdf2xml utility'
|
||||
|
||||
def run(self, opts):
|
||||
dest = os.path.expanduser('~/bin/pdf2xml')
|
||||
odest = self.j(self.d(self.SRC), 'build', 'objects', 'pdf2xml')
|
||||
if not os.path.exists(odest):
|
||||
os.makedirs(odest)
|
||||
|
||||
objects = []
|
||||
for src in reflow_sources:
|
||||
if src.endswith('python.cpp'):
|
||||
continue
|
||||
obj = self.j(odest, self.b(src+'.o'))
|
||||
if self.newer(obj, [src]+reflow_headers):
|
||||
cmd = ['g++', '-pthread', '-pedantic', '-g', '-c', '-Wall', '-I/usr/include/poppler',
|
||||
'-I/usr/include/ImageMagick',
|
||||
'-DPDF2XML', '-o', obj, src]
|
||||
self.info(*cmd)
|
||||
subprocess.check_call(cmd)
|
||||
objects.append(obj)
|
||||
|
||||
if self.newer(dest, objects):
|
||||
cmd = ['g++', '-g', '-o', dest]+objects+['-lpoppler', '-lMagickWand',
|
||||
'-lpng', '-lpthread']
|
||||
self.info(*cmd)
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
self.info('Binary installed as', dest)
|
||||
|
||||
|
||||
|
||||
|
@ -192,6 +192,10 @@ class Install(Develop):
|
||||
x = self.j(dest, x)
|
||||
if os.path.exists(dest):
|
||||
shutil.rmtree(x)
|
||||
for x in os.walk(dest):
|
||||
for f in x[-1]:
|
||||
if os.path.splitext(f)[1] in ('.c', '.cpp', '.h'):
|
||||
os.remove(self.j(x[0], f))
|
||||
dest = self.root + self.resources
|
||||
if os.path.exists(dest):
|
||||
shutil.rmtree(dest)
|
||||
@ -241,4 +245,3 @@ class Sdist(Command):
|
||||
os.remove(self.DEST)
|
||||
|
||||
|
||||
|
||||
|
@ -38,6 +38,7 @@ class LinuxFreeze(Command):
|
||||
binary_includes = [
|
||||
'/usr/bin/pdftohtml',
|
||||
'/usr/lib/libwmflite-0.2.so.7',
|
||||
'/usr/lib/liblcms.so.1',
|
||||
'/tmp/calibre-mount-helper',
|
||||
'/usr/lib/libunrar.so',
|
||||
'/usr/lib/libsqlite3.so.0',
|
||||
|
@ -55,7 +55,7 @@ if plugins is None:
|
||||
sys.path.insert(0, plugin_path)
|
||||
|
||||
for plugin in ['pictureflow', 'lzx', 'msdes', 'podofo', 'cPalmdoc',
|
||||
'fontconfig', 'calibre_poppler'] + \
|
||||
'fontconfig', 'pdfreflow'] + \
|
||||
(['winutil'] if iswindows else []) + \
|
||||
(['usbobserver'] if isosx else []):
|
||||
try:
|
||||
|
@ -161,6 +161,7 @@ quick_metadata = QuickMetadata()
|
||||
|
||||
def get_file_type_metadata(stream, ftype):
|
||||
mi = MetaInformation(None, None)
|
||||
|
||||
ftype = ftype.lower().strip()
|
||||
if _metadata_readers.has_key(ftype):
|
||||
for plugin in _metadata_readers[ftype]:
|
||||
@ -168,6 +169,8 @@ def get_file_type_metadata(stream, ftype):
|
||||
with plugin:
|
||||
try:
|
||||
plugin.quick = quick_metadata.quick
|
||||
if hasattr(stream, 'seek'):
|
||||
stream.seek(0)
|
||||
mi = plugin.get_metadata(stream, ftype.lower().strip())
|
||||
break
|
||||
except:
|
||||
|
@ -10,6 +10,7 @@ import sys, os, re, shutil
|
||||
from calibre.utils.config import OptionParser
|
||||
from calibre.constants import iswindows, isosx
|
||||
from calibre.libunzip import update
|
||||
from calibre import prints
|
||||
|
||||
def option_parser():
|
||||
parser = OptionParser(usage='''\
|
||||
@ -28,6 +29,8 @@ Run an embedded python interpreter.
|
||||
help='Debug the specified device driver.')
|
||||
parser.add_option('-g', '--gui', default=False, action='store_true',
|
||||
help='Run the GUI',)
|
||||
parser.add_option('--paths', default=False, action='store_true',
|
||||
help='Output the paths necessary to setup the calibre environment')
|
||||
parser.add_option('--migrate', action='store_true', default=False,
|
||||
help='Migrate old database. Needs two arguments. Path '
|
||||
'to library1.db and path to new library folder.')
|
||||
@ -203,6 +206,10 @@ def main(args=sys.argv):
|
||||
migrate(args[1], args[2])
|
||||
elif opts.add_simple_plugin is not None:
|
||||
add_simple_plugin(opts.add_simple_plugin)
|
||||
elif opts.paths:
|
||||
prints('CALIBRE_RESOURCES_LOCATION='+sys.resources_location)
|
||||
prints('CALIBRE_EXTENSIONS_LOCATION='+sys.extensions_location)
|
||||
prints('CALIBRE_PYTHON_PATH='+os.pathsep.join(sys.path))
|
||||
else:
|
||||
from IPython.Shell import IPShellEmbed
|
||||
ipshell = IPShellEmbed()
|
||||
|
@ -3,6 +3,52 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
'''Read meta information from PDF files'''
|
||||
|
||||
from functools import partial
|
||||
|
||||
from calibre import plugins, prints
|
||||
from calibre.ebooks.metadata import MetaInformation, string_to_authors#, authors_to_string
|
||||
|
||||
pdfreflow, pdfreflow_error = plugins['pdfreflow']
|
||||
|
||||
def get_metadata(stream, cover=True):
|
||||
if pdfreflow is None:
|
||||
raise RuntimeError(pdfreflow_error)
|
||||
info = pdfreflow.get_metadata(stream.read(), cover)
|
||||
title = info.get('Title', None)
|
||||
au = info.get('Author', None)
|
||||
if au is None:
|
||||
au = [_('Unknown')]
|
||||
else:
|
||||
au = string_to_authors(au)
|
||||
mi = MetaInformation(title, au)
|
||||
|
||||
creator = info.get('Creator', None)
|
||||
if creator:
|
||||
mi.book_producer = creator
|
||||
|
||||
keywords = info.get('Keywords', None)
|
||||
mi.tags = []
|
||||
if keywords:
|
||||
mi.tags = [x.strip() for x in keywords.split(',')]
|
||||
|
||||
subject = info.get('Subject', None)
|
||||
if subject:
|
||||
mi.tags.insert(0, subject)
|
||||
|
||||
if cover and 'cover' in info:
|
||||
data = info['cover']
|
||||
if data is None:
|
||||
prints(title, 'is an encrypted document, cover extraction not allowed.')
|
||||
else:
|
||||
mi.cover_data = ('png', data)
|
||||
|
||||
return mi
|
||||
|
||||
|
||||
|
||||
get_quick_metadata = partial(get_metadata, cover=False)
|
||||
|
||||
'''
|
||||
import sys, os, cStringIO
|
||||
from threading import Thread
|
||||
|
||||
@ -139,6 +185,6 @@ def get_cover(cover_path):
|
||||
MagickSetImageFormat(wand, 'JPEG')
|
||||
MagickWriteImage(wand, '%s.jpg' % cover_path)
|
||||
return open('%s.jpg' % cover_path, 'rb').read()
|
||||
|
||||
'''
|
||||
|
||||
|
||||
|
@ -40,12 +40,12 @@
|
||||
<string>...</string>
|
||||
</property>
|
||||
<property name="icon">
|
||||
<iconset resource="../../../../resources/images.qrc">
|
||||
<iconset resource="../../../work/calibre/resources/images.qrc">
|
||||
<normaloff>:/images/document_open.svg</normaloff>:/images/document_open.svg</iconset>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="2" column="0">
|
||||
<item row="3" column="0">
|
||||
<spacer name="verticalSpacer">
|
||||
<property name="orientation">
|
||||
<enum>Qt::Vertical</enum>
|
||||
@ -64,15 +64,25 @@
|
||||
<string>...</string>
|
||||
</property>
|
||||
<property name="icon">
|
||||
<iconset resource="../../../../resources/images.qrc">
|
||||
<iconset resource="../../../work/calibre/resources/images.qrc">
|
||||
<normaloff>:/images/clear_left.svg</normaloff>:/images/clear_left.svg</iconset>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="2" column="0">
|
||||
<widget class="QLabel" name="label_2">
|
||||
<property name="text">
|
||||
<string>The debug process outputs the intermediate HTML generated at various stages of the conversion process. This HTML can sometimes serve as a good starting point for hand editing a conversion.</string>
|
||||
</property>
|
||||
<property name="wordWrap">
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
<resources>
|
||||
<include location="../../../../resources/images.qrc"/>
|
||||
<include location="../../../work/calibre/resources/images.qrc"/>
|
||||
</resources>
|
||||
<connections/>
|
||||
</ui>
|
||||
|
@ -1,63 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
|
||||
from calibre.constants import plugins
|
||||
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
||||
|
||||
poppler, poppler_err = plugins['calibre_poppler']
|
||||
|
||||
class NotAvailable(Exception):
|
||||
pass
|
||||
|
||||
def get_metadata(stream, cover=True):
|
||||
if not poppler:
|
||||
raise NotAvailable('Failed to load poppler with error: '+poppler_err)
|
||||
raw = stream.read()
|
||||
doc = poppler.PDFDoc()
|
||||
doc.load(raw)
|
||||
del raw
|
||||
title = doc.title
|
||||
if not title or not title.strip():
|
||||
title = _('Unknown')
|
||||
if hasattr(stream, 'name'):
|
||||
title = os.path.splitext(os.path.basename(stream.name))[0]
|
||||
author = doc.author
|
||||
authors = string_to_authors(author) if author else [_('Unknown')]
|
||||
creator = doc.creator
|
||||
mi = MetaInformation(title, authors)
|
||||
|
||||
if creator:
|
||||
mi.book_producer = creator
|
||||
|
||||
if doc.subject:
|
||||
mi.category = doc.subject
|
||||
|
||||
if doc.keywords:
|
||||
mi.tags = [x.strip() for x in doc.keywords.split(',')]
|
||||
|
||||
if cover:
|
||||
from calibre.gui2 import is_ok_to_use_qt
|
||||
cdata = None
|
||||
if is_ok_to_use_qt():
|
||||
|
||||
try:
|
||||
cdata = doc.render_page(0)
|
||||
except:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
if cdata is not None:
|
||||
mi.cover_data = ('jpg', cdata)
|
||||
del doc
|
||||
return mi
|
||||
|
||||
|
||||
|
||||
|
@ -1,329 +0,0 @@
|
||||
#define UNICODE
|
||||
#define PY_SSIZE_T_CLEAN
|
||||
#include <Python.h>
|
||||
#include <poppler-qt4.h>
|
||||
#include <QtCore/QBuffer>
|
||||
#include <QtGui/QImage>
|
||||
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
/* Type-specific fields go here. */
|
||||
Poppler::Document *doc;
|
||||
|
||||
} poppler_PDFDoc;
|
||||
|
||||
extern "C" {
|
||||
static void
|
||||
poppler_PDFDoc_dealloc(poppler_PDFDoc* self)
|
||||
{
|
||||
if (self->doc != NULL) delete self->doc;
|
||||
self->ob_type->tp_free((PyObject*)self);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
poppler_PDFDoc_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
poppler_PDFDoc *self;
|
||||
|
||||
self = (poppler_PDFDoc *)type->tp_alloc(type, 0);
|
||||
if (self != NULL) {
|
||||
self->doc = NULL;
|
||||
}
|
||||
|
||||
return (PyObject *)self;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
poppler_PDFDoc_load(poppler_PDFDoc *self, PyObject *args, PyObject *kwargs) {
|
||||
char *buffer; Py_ssize_t size; QByteArray data;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "s#", &buffer, &size)) return NULL;
|
||||
|
||||
data = QByteArray::fromRawData(buffer, size);
|
||||
self->doc = Poppler::Document::loadFromData(data);
|
||||
if (self->doc == NULL) {PyErr_SetString(PyExc_ValueError, "Could not load PDF file from data."); return NULL;}
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
}
|
||||
static QString
|
||||
poppler_convert_pystring(PyObject *py) {
|
||||
QString ans;
|
||||
Py_UNICODE* u = PyUnicode_AS_UNICODE(py);
|
||||
PyObject *u8 = PyUnicode_EncodeUTF8(u, PyUnicode_GET_SIZE(py), "replace");
|
||||
if (u8 == NULL) { PyErr_NoMemory(); return NULL; }
|
||||
ans = QString::fromUtf8(PyString_AS_STRING(u8));
|
||||
Py_DECREF(u8);
|
||||
return ans;
|
||||
}
|
||||
extern "C" {
|
||||
static PyObject *
|
||||
poppler_convert_qstring(const QString &src) {
|
||||
QByteArray data = src.toUtf8();
|
||||
const char *cdata = data.constData();
|
||||
int sz = data.size();
|
||||
return PyUnicode_Decode(cdata, sz, "utf-8", "error");
|
||||
}
|
||||
|
||||
|
||||
static PyObject *
|
||||
poppler_PDFDoc_open(poppler_PDFDoc *self, PyObject *args, PyObject *kwargs) {
|
||||
PyObject *fname; QString _fname;
|
||||
if (!PyArg_ParseTuple(args, "O", &fname)) return NULL;
|
||||
_fname = poppler_convert_pystring(fname);
|
||||
self->doc = Poppler::Document::load(_fname);
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
poppler_PDFDoc_getter(poppler_PDFDoc *self, int field)
|
||||
{
|
||||
PyObject *ans;
|
||||
const char *s;
|
||||
switch (field) {
|
||||
case 0:
|
||||
s = "Title"; break;
|
||||
case 1:
|
||||
s = "Author"; break;
|
||||
case 2:
|
||||
s = "Subject"; break;
|
||||
case 3:
|
||||
s = "Keywords"; break;
|
||||
case 4:
|
||||
s = "Creator"; break;
|
||||
case 5:
|
||||
s = "Producer"; break;
|
||||
default:
|
||||
PyErr_SetString(PyExc_Exception, "Bad field");
|
||||
return NULL;
|
||||
}
|
||||
ans = poppler_convert_qstring(self->doc->info(QString(s)));
|
||||
if (ans != NULL) Py_INCREF(ans);
|
||||
return ans;
|
||||
|
||||
}
|
||||
|
||||
static int
|
||||
poppler_PDFDoc_setter(poppler_PDFDoc *self, PyObject *val, int field) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
poppler_PDFDoc_title_getter(poppler_PDFDoc *self, void *closure) {
|
||||
return poppler_PDFDoc_getter(self, 0);
|
||||
}
|
||||
static PyObject *
|
||||
poppler_PDFDoc_author_getter(poppler_PDFDoc *self, void *closure) {
|
||||
return poppler_PDFDoc_getter(self, 1);
|
||||
}
|
||||
static PyObject *
|
||||
poppler_PDFDoc_subject_getter(poppler_PDFDoc *self, void *closure) {
|
||||
return poppler_PDFDoc_getter(self, 2);
|
||||
}
|
||||
static PyObject *
|
||||
poppler_PDFDoc_keywords_getter(poppler_PDFDoc *self, void *closure) {
|
||||
return poppler_PDFDoc_getter(self, 3);
|
||||
}
|
||||
static PyObject *
|
||||
poppler_PDFDoc_creator_getter(poppler_PDFDoc *self, void *closure) {
|
||||
return poppler_PDFDoc_getter(self, 4);
|
||||
}
|
||||
static PyObject *
|
||||
poppler_PDFDoc_producer_getter(poppler_PDFDoc *self, void *closure) {
|
||||
return poppler_PDFDoc_getter(self, 5);
|
||||
}
|
||||
static PyObject *
|
||||
poppler_PDFDoc_version_getter(poppler_PDFDoc *self, void *closure) {
|
||||
PyObject *ans = PyFloat_FromDouble(self->doc->pdfVersion());
|
||||
if (ans != NULL) Py_INCREF(ans);
|
||||
return ans;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
poppler_PDFDoc_title_setter(poppler_PDFDoc *self, PyObject *val, void *closure) {
|
||||
return poppler_PDFDoc_setter(self, val, 0);
|
||||
}
|
||||
static int
|
||||
poppler_PDFDoc_author_setter(poppler_PDFDoc *self, PyObject *val, void *closure) {
|
||||
return poppler_PDFDoc_setter(self, val, 1);
|
||||
}
|
||||
static int
|
||||
poppler_PDFDoc_subject_setter(poppler_PDFDoc *self, PyObject *val, void *closure) {
|
||||
return poppler_PDFDoc_setter(self, val, 2);
|
||||
}
|
||||
static int
|
||||
poppler_PDFDoc_keywords_setter(poppler_PDFDoc *self, PyObject *val, void *closure) {
|
||||
return poppler_PDFDoc_setter(self, val, 3);
|
||||
}
|
||||
static int
|
||||
poppler_PDFDoc_creator_setter(poppler_PDFDoc *self, PyObject *val, void *closure) {
|
||||
return poppler_PDFDoc_setter(self, val, 4);
|
||||
}
|
||||
static int
|
||||
poppler_PDFDoc_producer_setter(poppler_PDFDoc *self, PyObject *val, void *closure) {
|
||||
return poppler_PDFDoc_setter(self, val, 5);
|
||||
}
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
poppler_PDFDoc_render_page(poppler_PDFDoc *self, PyObject *args, PyObject *kwargs) {
|
||||
QImage img;
|
||||
float xdpi = 166.0, ydpi = 166.0;
|
||||
Poppler::Page *page;
|
||||
QByteArray ba;
|
||||
PyObject *ans = NULL;
|
||||
QBuffer buffer(&ba);
|
||||
int num;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "i|ff", &num, &xdpi, &ydpi)) return ans;
|
||||
if ( self->doc->isLocked()) {
|
||||
PyErr_SetString(PyExc_ValueError, "This document is copyrighted.");
|
||||
return ans;
|
||||
}
|
||||
|
||||
if ( num < 0 || num >= self->doc->numPages()) {
|
||||
PyErr_SetString(PyExc_ValueError, "Invalid page number");
|
||||
return ans;
|
||||
}
|
||||
|
||||
page = self->doc->page(num);
|
||||
img = page->renderToImage(xdpi, ydpi);
|
||||
if (img.isNull()) {
|
||||
PyErr_SetString(PyExc_Exception, "Failed to render first page of PDF");
|
||||
return ans;
|
||||
}
|
||||
buffer.open(QIODevice::WriteOnly);
|
||||
if (!img.save(&buffer, "JPEG")) {
|
||||
PyErr_SetString(PyExc_Exception, "Failed to save rendered page");
|
||||
return ans;
|
||||
}
|
||||
ans = PyString_FromStringAndSize(ba.data(), ba.size());
|
||||
if (ans != NULL) { Py_INCREF(ans); }
|
||||
return ans;
|
||||
}
|
||||
|
||||
static PyMethodDef poppler_PDFDoc_methods[] = {
|
||||
{"load", (PyCFunction)poppler_PDFDoc_load, METH_VARARGS,
|
||||
"Load a PDF document from a byte buffer (string)"
|
||||
},
|
||||
{"open", (PyCFunction)poppler_PDFDoc_open, METH_VARARGS,
|
||||
"Load a PDF document from a file path (string)"
|
||||
},
|
||||
{"render_page", (PyCFunction)poppler_PDFDoc_render_page, METH_VARARGS,
|
||||
"render_page(page_num, xdpi=166, ydpi=166) -> Render a page to a JPEG image. Page numbers start from zero."
|
||||
},
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
static PyObject *
|
||||
poppler_PDFDoc_pages_getter(poppler_PDFDoc *self, void *closure) {
|
||||
int pages = self->doc->numPages();
|
||||
PyObject *ans = PyInt_FromLong(static_cast<long>(pages));
|
||||
if (ans != NULL) Py_INCREF(ans);
|
||||
return ans;
|
||||
}
|
||||
|
||||
static PyGetSetDef poppler_PDFDoc_getsetters[] = {
|
||||
{(char *)"title",
|
||||
(getter)poppler_PDFDoc_title_getter, (setter)poppler_PDFDoc_title_setter,
|
||||
(char *)"Document title",
|
||||
NULL},
|
||||
{(char *)"author",
|
||||
(getter)poppler_PDFDoc_author_getter, (setter)poppler_PDFDoc_author_setter,
|
||||
(char *)"Document author",
|
||||
NULL},
|
||||
{(char *)"subject",
|
||||
(getter)poppler_PDFDoc_subject_getter, (setter)poppler_PDFDoc_subject_setter,
|
||||
(char *)"Document subject",
|
||||
NULL},
|
||||
{(char *)"keywords",
|
||||
(getter)poppler_PDFDoc_keywords_getter, (setter)poppler_PDFDoc_keywords_setter,
|
||||
(char *)"Document keywords",
|
||||
NULL},
|
||||
{(char *)"creator",
|
||||
(getter)poppler_PDFDoc_creator_getter, (setter)poppler_PDFDoc_creator_setter,
|
||||
(char *)"Document creator",
|
||||
NULL},
|
||||
{(char *)"producer",
|
||||
(getter)poppler_PDFDoc_producer_getter, (setter)poppler_PDFDoc_producer_setter,
|
||||
(char *)"Document producer",
|
||||
NULL},
|
||||
{(char *)"pages",
|
||||
(getter)poppler_PDFDoc_pages_getter, NULL,
|
||||
(char *)"Number of pages in document (read only)",
|
||||
NULL},
|
||||
{(char *)"version",
|
||||
(getter)poppler_PDFDoc_version_getter, NULL,
|
||||
(char *)"The PDF version (read only)",
|
||||
NULL},
|
||||
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
|
||||
|
||||
static PyTypeObject poppler_PDFDocType = {
|
||||
PyObject_HEAD_INIT(NULL)
|
||||
0, /*ob_size*/
|
||||
"calibre_poppler.PDFDoc", /*tp_name*/
|
||||
sizeof(poppler_PDFDoc), /*tp_basicsize*/
|
||||
0, /*tp_itemsize*/
|
||||
(destructor)poppler_PDFDoc_dealloc, /*tp_dealloc*/
|
||||
0, /*tp_print*/
|
||||
0, /*tp_getattr*/
|
||||
0, /*tp_setattr*/
|
||||
0, /*tp_compare*/
|
||||
0, /*tp_repr*/
|
||||
0, /*tp_as_number*/
|
||||
0, /*tp_as_sequence*/
|
||||
0, /*tp_as_mapping*/
|
||||
0, /*tp_hash */
|
||||
0, /*tp_call*/
|
||||
0, /*tp_str*/
|
||||
0, /*tp_getattro*/
|
||||
0, /*tp_setattro*/
|
||||
0, /*tp_as_buffer*/
|
||||
Py_TPFLAGS_DEFAULT, /*tp_flags*/
|
||||
"PDF Documents", /* tp_doc */
|
||||
0, /* tp_traverse */
|
||||
0, /* tp_clear */
|
||||
0, /* tp_richcompare */
|
||||
0, /* tp_weaklistoffset */
|
||||
0, /* tp_iter */
|
||||
0, /* tp_iternext */
|
||||
poppler_PDFDoc_methods, /* tp_methods */
|
||||
0, /* tp_members */
|
||||
poppler_PDFDoc_getsetters, /* tp_getset */
|
||||
0, /* tp_base */
|
||||
0, /* tp_dict */
|
||||
0, /* tp_descr_get */
|
||||
0, /* tp_descr_set */
|
||||
0, /* tp_dictoffset */
|
||||
0, /* tp_init */
|
||||
0, /* tp_alloc */
|
||||
poppler_PDFDoc_new, /* tp_new */
|
||||
};
|
||||
|
||||
|
||||
|
||||
static PyMethodDef poppler_methods[] = {
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
extern "C" {
|
||||
|
||||
PyMODINIT_FUNC
|
||||
initcalibre_poppler(void)
|
||||
{
|
||||
PyObject* m;
|
||||
|
||||
if (PyType_Ready(&poppler_PDFDocType) < 0)
|
||||
return;
|
||||
|
||||
m = Py_InitModule3("calibre_poppler", poppler_methods,
|
||||
"Wrapper for the Poppler PDF library");
|
||||
|
||||
Py_INCREF(&poppler_PDFDocType);
|
||||
PyModule_AddObject(m, "PDFDoc", (PyObject *)&poppler_PDFDocType);
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user