Switch to using pdftk to set PDF metadata on windows and linux as it is more robust

This commit is contained in:
Kovid Goyal 2009-05-08 14:30:26 -07:00
parent 5d3c309258
commit f371247347
5 changed files with 133 additions and 16 deletions

View File

@ -11,6 +11,7 @@ LIBUSB_DIR = 'C:\\libusb'
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
PDFTOHTML = 'C:\\cygwin\\home\\kovid\\poppler-0.10.6\\rel\\pdftohtml.exe'
IMAGEMAGICK_DIR = 'C:\\ImageMagick'
PDFTK = 'C:\\pdftk.exe'
FONTCONFIG_DIR = 'C:\\fontconfig'
VC90 = r'C:\VC90.CRT'
@ -100,6 +101,9 @@ class BuildEXE(py2exe.build_exe.py2exe):
shutil.copyfile(PDFTOHTML, os.path.join(PY2EXE_DIR, os.path.basename(PDFTOHTML)))
shutil.copyfile(PDFTOHTML+'.manifest', os.path.join(PY2EXE_DIR,
os.path.basename(PDFTOHTML)+'.manifest'))
print '\tAdding pdftk'
shutil.copyfile(PDFTK, os.path.join(PY2EXE_DIR, os.path.basename(PDFTK)))
print '\tAdding ImageMagick'
for f in os.listdir(IMAGEMAGICK_DIR):
shutil.copyfile(os.path.join(IMAGEMAGICK_DIR, f), os.path.join(PY2EXE_DIR, f))

View File

@ -9,6 +9,7 @@ from threading import Thread
from calibre import FileWrapper
from calibre.ebooks.metadata import MetaInformation, authors_to_string, get_parser
from pyPdf import PdfFileReader, PdfFileWriter
from calibre.utils.pdftk import set_metadata as pdftk_set_metadata
def get_metadata(stream):
""" Return metadata as a L{MetaInfo} object """
@ -49,6 +50,12 @@ class MetadataWriter(Thread):
def set_metadata(stream, mi):
stream.seek(0)
try:
pdftk_set_metadata(stream, mi)
except:
pass
else:
return
# Use a StringIO object for the pdf because we will want to over
# write it later and if we are working on the stream directly it

View File

@ -1,8 +1,8 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
"""
Provides platform independent temporary files that persist even after
"""
Provides platform independent temporary files that persist even after
being closed.
"""
import tempfile, os, atexit, shutil
@ -15,38 +15,38 @@ def cleanup(path):
if os.path.exists(path):
os.remove(path)
except:
pass
pass
class PersistentTemporaryFile(object):
"""
"""
A file-like object that is a temporary file that is available even after being closed on
all platforms. It is automatically deleted on normal program termination.
"""
_file = None
def __init__(self, suffix="", prefix="", dir=None, mode='w+b'):
if prefix == None:
if prefix == None:
prefix = ""
fd, name = tempfile.mkstemp(suffix, __appname__+"_"+ __version__+"_" + prefix,
dir=dir)
self._file = os.fdopen(fd, 'w+b')
self._file = os.fdopen(fd, mode)
self._name = name
atexit.register(cleanup, name)
def __getattr__(self, name):
if name == 'name':
return self.__dict__['_name']
return getattr(self.__dict__['_file'], name)
def __enter__(self):
return self
def __exit__(self, *args):
self.close()
def __del__(self):
self.close()
def PersistentTemporaryDirectory(suffix='', prefix='', dir=None):
'''
@ -66,12 +66,37 @@ class TemporaryDirectory(object):
self.prefix = prefix
self.dir = dir
self.keep = keep
def __enter__(self):
self.tdir = tempfile.mkdtemp(self.suffix, __appname__+"_"+ __version__+"_" +self.prefix, self.dir)
return self.tdir
def __exit__(self, *args):
if not self.keep and os.path.exists(self.tdir):
shutil.rmtree(self.tdir, ignore_errors=True)
class TemporaryFile(object):
def __init__(self, suffix="", prefix="", dir=None, mode='w+b'):
if prefix == None:
prefix = ''
if suffix is None:
suffix = ''
self.prefix, self.suffix, self.dir, self.mode = prefix, suffix, dir, mode
self._file = None
def __enter__(self):
fd, name = tempfile.mkstemp(self.suffix,
__appname__+"_"+ __version__+"_" + self.prefix,
dir=self.dir)
self._file = os.fdopen(fd, self.mode)
self._name = name
self._file.close()
return name
def __exit__(self, *args):
cleanup(self._name)

View File

@ -20,6 +20,7 @@ DEPENDENCIES = [
('BeautifulSoup', '3.0.5', 'beautifulsoup', 'python-beautifulsoup', 'python-BeautifulSoup'),
('dnspython', '1.6.0', 'dnspython', 'dnspython', 'dnspython', 'dnspython'),
('poppler', '0.10.5', 'poppler', 'poppler', 'poppler', 'poppler'),
('pdftk', '1.12', 'pdftk', 'pdftk', 'pdftk', 'pdftk'),
]

View File

@ -0,0 +1,80 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import sys, subprocess, os, errno
from functools import partial
from contextlib import nested
from calibre.ptempfile import TemporaryFile
from calibre.constants import iswindows
PDFTK = 'pdftk'
popen = subprocess.Popen
#if isosx and hasattr(sys, 'frameworks_dir'):
# PDFTK = os.path.join(getattr(sys, 'frameworks_dir'), 'pdftk')
if iswindows and hasattr(sys, 'frozen'):
PDFTK = os.path.join(os.path.dirname(sys.executable), 'pdftk.exe')
popen = partial(subprocess.Popen, creationflags=0x08) # CREATE_NO_WINDOW=0x08 so that no ugly console is popped up
class PdftkError(Exception): pass
def mi_to_info(mi):
ans = []
if mi.title:
ans.extend(('InfoKey: Title', 'InfoValue: '+mi.title))
if mi.authors:
from calibre.ebooks.metadata import authors_to_string
ans.extend(('InfoKey: Author', 'InfoValue: ' +
authors_to_string(mi.authors)))
return u'\n'.join(ans)
def set_metadata(stream, mi):
raw = mi_to_info(mi)
if not raw: return
raw = raw.encode('utf-8')
with nested(TemporaryFile('.pdf'), TemporaryFile('.pdf'),
TemporaryFile('.info')) as (input, output, meta):
oi = getattr(stream, 'name', None)
if not oi or not os.access(oi, os.R_OK):
stream.seek(0)
with open(input, 'wb') as f: f.write(stream.read())
else:
input = oi
with open(meta, 'wb') as f: f.write(raw)
if os.path.exists(output):
os.remove(output)
cmd = (PDFTK, input, 'update_info', meta, 'output', output)
p = popen(cmd)
while True:
try:
ret = p.wait()
break
except OSError, e:
if e.errno == errno.EINTR:
continue
else:
raise
if os.stat(output).st_size < 2048:
raise PdftkError('Output file too small')
with open(output, 'rb') as f: raw = f.read()
if raw:
stream.seek(0)
stream.truncate()
stream.write(raw)
stream.flush()
if __name__ == '__main__':
args = sys.argv
from calibre.ebooks.metadata import MetaInformation
mi = MetaInformation(args[2], [args[3]])
x = open(args[1], 'r+b')
set_metadata(x, mi)