Switch to using pdftk to set PDF metadata on windows and linux as it is more robust

This commit is contained in:
Kovid Goyal 2009-05-08 14:30:26 -07:00
parent 5d3c309258
commit f371247347
5 changed files with 133 additions and 16 deletions

View File

@ -11,6 +11,7 @@ LIBUSB_DIR = 'C:\\libusb'
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll' LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
PDFTOHTML = 'C:\\cygwin\\home\\kovid\\poppler-0.10.6\\rel\\pdftohtml.exe' PDFTOHTML = 'C:\\cygwin\\home\\kovid\\poppler-0.10.6\\rel\\pdftohtml.exe'
IMAGEMAGICK_DIR = 'C:\\ImageMagick' IMAGEMAGICK_DIR = 'C:\\ImageMagick'
PDFTK = 'C:\\pdftk.exe'
FONTCONFIG_DIR = 'C:\\fontconfig' FONTCONFIG_DIR = 'C:\\fontconfig'
VC90 = r'C:\VC90.CRT' VC90 = r'C:\VC90.CRT'
@ -100,6 +101,9 @@ class BuildEXE(py2exe.build_exe.py2exe):
shutil.copyfile(PDFTOHTML, os.path.join(PY2EXE_DIR, os.path.basename(PDFTOHTML))) shutil.copyfile(PDFTOHTML, os.path.join(PY2EXE_DIR, os.path.basename(PDFTOHTML)))
shutil.copyfile(PDFTOHTML+'.manifest', os.path.join(PY2EXE_DIR, shutil.copyfile(PDFTOHTML+'.manifest', os.path.join(PY2EXE_DIR,
os.path.basename(PDFTOHTML)+'.manifest')) os.path.basename(PDFTOHTML)+'.manifest'))
print '\tAdding pdftk'
shutil.copyfile(PDFTK, os.path.join(PY2EXE_DIR, os.path.basename(PDFTK)))
print '\tAdding ImageMagick' print '\tAdding ImageMagick'
for f in os.listdir(IMAGEMAGICK_DIR): for f in os.listdir(IMAGEMAGICK_DIR):
shutil.copyfile(os.path.join(IMAGEMAGICK_DIR, f), os.path.join(PY2EXE_DIR, f)) shutil.copyfile(os.path.join(IMAGEMAGICK_DIR, f), os.path.join(PY2EXE_DIR, f))

View File

@ -9,6 +9,7 @@ from threading import Thread
from calibre import FileWrapper from calibre import FileWrapper
from calibre.ebooks.metadata import MetaInformation, authors_to_string, get_parser from calibre.ebooks.metadata import MetaInformation, authors_to_string, get_parser
from pyPdf import PdfFileReader, PdfFileWriter from pyPdf import PdfFileReader, PdfFileWriter
from calibre.utils.pdftk import set_metadata as pdftk_set_metadata
def get_metadata(stream): def get_metadata(stream):
""" Return metadata as a L{MetaInfo} object """ """ Return metadata as a L{MetaInfo} object """
@ -49,6 +50,12 @@ class MetadataWriter(Thread):
def set_metadata(stream, mi): def set_metadata(stream, mi):
stream.seek(0) stream.seek(0)
try:
pdftk_set_metadata(stream, mi)
except:
pass
else:
return
# Use a StringIO object for the pdf because we will want to over # Use a StringIO object for the pdf because we will want to over
# write it later and if we are working on the stream directly it # write it later and if we are working on the stream directly it

View File

@ -1,8 +1,8 @@
from __future__ import with_statement from __future__ import with_statement
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
""" """
Provides platform independent temporary files that persist even after Provides platform independent temporary files that persist even after
being closed. being closed.
""" """
import tempfile, os, atexit, shutil import tempfile, os, atexit, shutil
@ -15,38 +15,38 @@ def cleanup(path):
if os.path.exists(path): if os.path.exists(path):
os.remove(path) os.remove(path)
except: except:
pass pass
class PersistentTemporaryFile(object): class PersistentTemporaryFile(object):
""" """
A file-like object that is a temporary file that is available even after being closed on A file-like object that is a temporary file that is available even after being closed on
all platforms. It is automatically deleted on normal program termination. all platforms. It is automatically deleted on normal program termination.
""" """
_file = None _file = None
def __init__(self, suffix="", prefix="", dir=None, mode='w+b'): def __init__(self, suffix="", prefix="", dir=None, mode='w+b'):
if prefix == None: if prefix == None:
prefix = "" prefix = ""
fd, name = tempfile.mkstemp(suffix, __appname__+"_"+ __version__+"_" + prefix, fd, name = tempfile.mkstemp(suffix, __appname__+"_"+ __version__+"_" + prefix,
dir=dir) dir=dir)
self._file = os.fdopen(fd, 'w+b') self._file = os.fdopen(fd, mode)
self._name = name self._name = name
atexit.register(cleanup, name) atexit.register(cleanup, name)
def __getattr__(self, name): def __getattr__(self, name):
if name == 'name': if name == 'name':
return self.__dict__['_name'] return self.__dict__['_name']
return getattr(self.__dict__['_file'], name) return getattr(self.__dict__['_file'], name)
def __enter__(self): def __enter__(self):
return self return self
def __exit__(self, *args): def __exit__(self, *args):
self.close() self.close()
def __del__(self): def __del__(self):
self.close() self.close()
def PersistentTemporaryDirectory(suffix='', prefix='', dir=None): def PersistentTemporaryDirectory(suffix='', prefix='', dir=None):
''' '''
@ -66,12 +66,37 @@ class TemporaryDirectory(object):
self.prefix = prefix self.prefix = prefix
self.dir = dir self.dir = dir
self.keep = keep self.keep = keep
def __enter__(self): def __enter__(self):
self.tdir = tempfile.mkdtemp(self.suffix, __appname__+"_"+ __version__+"_" +self.prefix, self.dir) self.tdir = tempfile.mkdtemp(self.suffix, __appname__+"_"+ __version__+"_" +self.prefix, self.dir)
return self.tdir return self.tdir
def __exit__(self, *args): def __exit__(self, *args):
if not self.keep and os.path.exists(self.tdir): if not self.keep and os.path.exists(self.tdir):
shutil.rmtree(self.tdir, ignore_errors=True) shutil.rmtree(self.tdir, ignore_errors=True)
class TemporaryFile(object):
def __init__(self, suffix="", prefix="", dir=None, mode='w+b'):
if prefix == None:
prefix = ''
if suffix is None:
suffix = ''
self.prefix, self.suffix, self.dir, self.mode = prefix, suffix, dir, mode
self._file = None
def __enter__(self):
fd, name = tempfile.mkstemp(self.suffix,
__appname__+"_"+ __version__+"_" + self.prefix,
dir=self.dir)
self._file = os.fdopen(fd, self.mode)
self._name = name
self._file.close()
return name
def __exit__(self, *args):
cleanup(self._name)

View File

@ -20,6 +20,7 @@ DEPENDENCIES = [
('BeautifulSoup', '3.0.5', 'beautifulsoup', 'python-beautifulsoup', 'python-BeautifulSoup'), ('BeautifulSoup', '3.0.5', 'beautifulsoup', 'python-beautifulsoup', 'python-BeautifulSoup'),
('dnspython', '1.6.0', 'dnspython', 'dnspython', 'dnspython', 'dnspython'), ('dnspython', '1.6.0', 'dnspython', 'dnspython', 'dnspython', 'dnspython'),
('poppler', '0.10.5', 'poppler', 'poppler', 'poppler', 'poppler'), ('poppler', '0.10.5', 'poppler', 'poppler', 'poppler', 'poppler'),
('pdftk', '1.12', 'pdftk', 'pdftk', 'pdftk', 'pdftk'),
] ]

View File

@ -0,0 +1,80 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import sys, subprocess, os, errno
from functools import partial
from contextlib import nested
from calibre.ptempfile import TemporaryFile
from calibre.constants import iswindows
PDFTK = 'pdftk'
popen = subprocess.Popen
#if isosx and hasattr(sys, 'frameworks_dir'):
# PDFTK = os.path.join(getattr(sys, 'frameworks_dir'), 'pdftk')
if iswindows and hasattr(sys, 'frozen'):
PDFTK = os.path.join(os.path.dirname(sys.executable), 'pdftk.exe')
popen = partial(subprocess.Popen, creationflags=0x08) # CREATE_NO_WINDOW=0x08 so that no ugly console is popped up
class PdftkError(Exception): pass
def mi_to_info(mi):
ans = []
if mi.title:
ans.extend(('InfoKey: Title', 'InfoValue: '+mi.title))
if mi.authors:
from calibre.ebooks.metadata import authors_to_string
ans.extend(('InfoKey: Author', 'InfoValue: ' +
authors_to_string(mi.authors)))
return u'\n'.join(ans)
def set_metadata(stream, mi):
raw = mi_to_info(mi)
if not raw: return
raw = raw.encode('utf-8')
with nested(TemporaryFile('.pdf'), TemporaryFile('.pdf'),
TemporaryFile('.info')) as (input, output, meta):
oi = getattr(stream, 'name', None)
if not oi or not os.access(oi, os.R_OK):
stream.seek(0)
with open(input, 'wb') as f: f.write(stream.read())
else:
input = oi
with open(meta, 'wb') as f: f.write(raw)
if os.path.exists(output):
os.remove(output)
cmd = (PDFTK, input, 'update_info', meta, 'output', output)
p = popen(cmd)
while True:
try:
ret = p.wait()
break
except OSError, e:
if e.errno == errno.EINTR:
continue
else:
raise
if os.stat(output).st_size < 2048:
raise PdftkError('Output file too small')
with open(output, 'rb') as f: raw = f.read()
if raw:
stream.seek(0)
stream.truncate()
stream.write(raw)
stream.flush()
if __name__ == '__main__':
args = sys.argv
from calibre.ebooks.metadata import MetaInformation
mi = MetaInformation(args[2], [args[3]])
x = open(args[1], 'r+b')
set_metadata(x, mi)