mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Switch to using pdftk to set PDF metadata on windows and linux as it is more robust
This commit is contained in:
parent
5d3c309258
commit
f371247347
@ -11,6 +11,7 @@ LIBUSB_DIR = 'C:\\libusb'
|
|||||||
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
|
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
|
||||||
PDFTOHTML = 'C:\\cygwin\\home\\kovid\\poppler-0.10.6\\rel\\pdftohtml.exe'
|
PDFTOHTML = 'C:\\cygwin\\home\\kovid\\poppler-0.10.6\\rel\\pdftohtml.exe'
|
||||||
IMAGEMAGICK_DIR = 'C:\\ImageMagick'
|
IMAGEMAGICK_DIR = 'C:\\ImageMagick'
|
||||||
|
PDFTK = 'C:\\pdftk.exe'
|
||||||
FONTCONFIG_DIR = 'C:\\fontconfig'
|
FONTCONFIG_DIR = 'C:\\fontconfig'
|
||||||
VC90 = r'C:\VC90.CRT'
|
VC90 = r'C:\VC90.CRT'
|
||||||
|
|
||||||
@ -100,6 +101,9 @@ class BuildEXE(py2exe.build_exe.py2exe):
|
|||||||
shutil.copyfile(PDFTOHTML, os.path.join(PY2EXE_DIR, os.path.basename(PDFTOHTML)))
|
shutil.copyfile(PDFTOHTML, os.path.join(PY2EXE_DIR, os.path.basename(PDFTOHTML)))
|
||||||
shutil.copyfile(PDFTOHTML+'.manifest', os.path.join(PY2EXE_DIR,
|
shutil.copyfile(PDFTOHTML+'.manifest', os.path.join(PY2EXE_DIR,
|
||||||
os.path.basename(PDFTOHTML)+'.manifest'))
|
os.path.basename(PDFTOHTML)+'.manifest'))
|
||||||
|
print '\tAdding pdftk'
|
||||||
|
shutil.copyfile(PDFTK, os.path.join(PY2EXE_DIR, os.path.basename(PDFTK)))
|
||||||
|
|
||||||
print '\tAdding ImageMagick'
|
print '\tAdding ImageMagick'
|
||||||
for f in os.listdir(IMAGEMAGICK_DIR):
|
for f in os.listdir(IMAGEMAGICK_DIR):
|
||||||
shutil.copyfile(os.path.join(IMAGEMAGICK_DIR, f), os.path.join(PY2EXE_DIR, f))
|
shutil.copyfile(os.path.join(IMAGEMAGICK_DIR, f), os.path.join(PY2EXE_DIR, f))
|
||||||
|
@ -9,6 +9,7 @@ from threading import Thread
|
|||||||
from calibre import FileWrapper
|
from calibre import FileWrapper
|
||||||
from calibre.ebooks.metadata import MetaInformation, authors_to_string, get_parser
|
from calibre.ebooks.metadata import MetaInformation, authors_to_string, get_parser
|
||||||
from pyPdf import PdfFileReader, PdfFileWriter
|
from pyPdf import PdfFileReader, PdfFileWriter
|
||||||
|
from calibre.utils.pdftk import set_metadata as pdftk_set_metadata
|
||||||
|
|
||||||
def get_metadata(stream):
|
def get_metadata(stream):
|
||||||
""" Return metadata as a L{MetaInfo} object """
|
""" Return metadata as a L{MetaInfo} object """
|
||||||
@ -49,6 +50,12 @@ class MetadataWriter(Thread):
|
|||||||
|
|
||||||
def set_metadata(stream, mi):
|
def set_metadata(stream, mi):
|
||||||
stream.seek(0)
|
stream.seek(0)
|
||||||
|
try:
|
||||||
|
pdftk_set_metadata(stream, mi)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
return
|
||||||
|
|
||||||
# Use a StringIO object for the pdf because we will want to over
|
# Use a StringIO object for the pdf because we will want to over
|
||||||
# write it later and if we are working on the stream directly it
|
# write it later and if we are working on the stream directly it
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
from __future__ import with_statement
|
from __future__ import with_statement
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
"""
|
"""
|
||||||
Provides platform independent temporary files that persist even after
|
Provides platform independent temporary files that persist even after
|
||||||
being closed.
|
being closed.
|
||||||
"""
|
"""
|
||||||
import tempfile, os, atexit, shutil
|
import tempfile, os, atexit, shutil
|
||||||
@ -15,38 +15,38 @@ def cleanup(path):
|
|||||||
if os.path.exists(path):
|
if os.path.exists(path):
|
||||||
os.remove(path)
|
os.remove(path)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class PersistentTemporaryFile(object):
|
class PersistentTemporaryFile(object):
|
||||||
"""
|
"""
|
||||||
A file-like object that is a temporary file that is available even after being closed on
|
A file-like object that is a temporary file that is available even after being closed on
|
||||||
all platforms. It is automatically deleted on normal program termination.
|
all platforms. It is automatically deleted on normal program termination.
|
||||||
"""
|
"""
|
||||||
_file = None
|
_file = None
|
||||||
|
|
||||||
def __init__(self, suffix="", prefix="", dir=None, mode='w+b'):
|
def __init__(self, suffix="", prefix="", dir=None, mode='w+b'):
|
||||||
if prefix == None:
|
if prefix == None:
|
||||||
prefix = ""
|
prefix = ""
|
||||||
fd, name = tempfile.mkstemp(suffix, __appname__+"_"+ __version__+"_" + prefix,
|
fd, name = tempfile.mkstemp(suffix, __appname__+"_"+ __version__+"_" + prefix,
|
||||||
dir=dir)
|
dir=dir)
|
||||||
self._file = os.fdopen(fd, 'w+b')
|
self._file = os.fdopen(fd, mode)
|
||||||
self._name = name
|
self._name = name
|
||||||
atexit.register(cleanup, name)
|
atexit.register(cleanup, name)
|
||||||
|
|
||||||
def __getattr__(self, name):
|
def __getattr__(self, name):
|
||||||
if name == 'name':
|
if name == 'name':
|
||||||
return self.__dict__['_name']
|
return self.__dict__['_name']
|
||||||
return getattr(self.__dict__['_file'], name)
|
return getattr(self.__dict__['_file'], name)
|
||||||
|
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def __exit__(self, *args):
|
def __exit__(self, *args):
|
||||||
self.close()
|
self.close()
|
||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
self.close()
|
self.close()
|
||||||
|
|
||||||
|
|
||||||
def PersistentTemporaryDirectory(suffix='', prefix='', dir=None):
|
def PersistentTemporaryDirectory(suffix='', prefix='', dir=None):
|
||||||
'''
|
'''
|
||||||
@ -66,12 +66,37 @@ class TemporaryDirectory(object):
|
|||||||
self.prefix = prefix
|
self.prefix = prefix
|
||||||
self.dir = dir
|
self.dir = dir
|
||||||
self.keep = keep
|
self.keep = keep
|
||||||
|
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
self.tdir = tempfile.mkdtemp(self.suffix, __appname__+"_"+ __version__+"_" +self.prefix, self.dir)
|
self.tdir = tempfile.mkdtemp(self.suffix, __appname__+"_"+ __version__+"_" +self.prefix, self.dir)
|
||||||
return self.tdir
|
return self.tdir
|
||||||
|
|
||||||
def __exit__(self, *args):
|
def __exit__(self, *args):
|
||||||
if not self.keep and os.path.exists(self.tdir):
|
if not self.keep and os.path.exists(self.tdir):
|
||||||
shutil.rmtree(self.tdir, ignore_errors=True)
|
shutil.rmtree(self.tdir, ignore_errors=True)
|
||||||
|
|
||||||
|
class TemporaryFile(object):
|
||||||
|
|
||||||
|
def __init__(self, suffix="", prefix="", dir=None, mode='w+b'):
|
||||||
|
if prefix == None:
|
||||||
|
prefix = ''
|
||||||
|
if suffix is None:
|
||||||
|
suffix = ''
|
||||||
|
self.prefix, self.suffix, self.dir, self.mode = prefix, suffix, dir, mode
|
||||||
|
self._file = None
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
fd, name = tempfile.mkstemp(self.suffix,
|
||||||
|
__appname__+"_"+ __version__+"_" + self.prefix,
|
||||||
|
dir=self.dir)
|
||||||
|
self._file = os.fdopen(fd, self.mode)
|
||||||
|
self._name = name
|
||||||
|
self._file.close()
|
||||||
|
return name
|
||||||
|
|
||||||
|
def __exit__(self, *args):
|
||||||
|
cleanup(self._name)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -20,6 +20,7 @@ DEPENDENCIES = [
|
|||||||
('BeautifulSoup', '3.0.5', 'beautifulsoup', 'python-beautifulsoup', 'python-BeautifulSoup'),
|
('BeautifulSoup', '3.0.5', 'beautifulsoup', 'python-beautifulsoup', 'python-BeautifulSoup'),
|
||||||
('dnspython', '1.6.0', 'dnspython', 'dnspython', 'dnspython', 'dnspython'),
|
('dnspython', '1.6.0', 'dnspython', 'dnspython', 'dnspython', 'dnspython'),
|
||||||
('poppler', '0.10.5', 'poppler', 'poppler', 'poppler', 'poppler'),
|
('poppler', '0.10.5', 'poppler', 'poppler', 'poppler', 'poppler'),
|
||||||
|
('pdftk', '1.12', 'pdftk', 'pdftk', 'pdftk', 'pdftk'),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
80
src/calibre/utils/pdftk.py
Normal file
80
src/calibre/utils/pdftk.py
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
|
||||||
|
import sys, subprocess, os, errno
|
||||||
|
from functools import partial
|
||||||
|
from contextlib import nested
|
||||||
|
|
||||||
|
from calibre.ptempfile import TemporaryFile
|
||||||
|
from calibre.constants import iswindows
|
||||||
|
|
||||||
|
PDFTK = 'pdftk'
|
||||||
|
popen = subprocess.Popen
|
||||||
|
#if isosx and hasattr(sys, 'frameworks_dir'):
|
||||||
|
# PDFTK = os.path.join(getattr(sys, 'frameworks_dir'), 'pdftk')
|
||||||
|
if iswindows and hasattr(sys, 'frozen'):
|
||||||
|
PDFTK = os.path.join(os.path.dirname(sys.executable), 'pdftk.exe')
|
||||||
|
popen = partial(subprocess.Popen, creationflags=0x08) # CREATE_NO_WINDOW=0x08 so that no ugly console is popped up
|
||||||
|
|
||||||
|
class PdftkError(Exception): pass
|
||||||
|
|
||||||
|
def mi_to_info(mi):
|
||||||
|
ans = []
|
||||||
|
if mi.title:
|
||||||
|
ans.extend(('InfoKey: Title', 'InfoValue: '+mi.title))
|
||||||
|
if mi.authors:
|
||||||
|
from calibre.ebooks.metadata import authors_to_string
|
||||||
|
ans.extend(('InfoKey: Author', 'InfoValue: ' +
|
||||||
|
authors_to_string(mi.authors)))
|
||||||
|
return u'\n'.join(ans)
|
||||||
|
|
||||||
|
def set_metadata(stream, mi):
|
||||||
|
raw = mi_to_info(mi)
|
||||||
|
if not raw: return
|
||||||
|
raw = raw.encode('utf-8')
|
||||||
|
with nested(TemporaryFile('.pdf'), TemporaryFile('.pdf'),
|
||||||
|
TemporaryFile('.info')) as (input, output, meta):
|
||||||
|
oi = getattr(stream, 'name', None)
|
||||||
|
if not oi or not os.access(oi, os.R_OK):
|
||||||
|
stream.seek(0)
|
||||||
|
with open(input, 'wb') as f: f.write(stream.read())
|
||||||
|
else:
|
||||||
|
input = oi
|
||||||
|
with open(meta, 'wb') as f: f.write(raw)
|
||||||
|
if os.path.exists(output):
|
||||||
|
os.remove(output)
|
||||||
|
cmd = (PDFTK, input, 'update_info', meta, 'output', output)
|
||||||
|
p = popen(cmd)
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
ret = p.wait()
|
||||||
|
break
|
||||||
|
except OSError, e:
|
||||||
|
if e.errno == errno.EINTR:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
|
if os.stat(output).st_size < 2048:
|
||||||
|
raise PdftkError('Output file too small')
|
||||||
|
|
||||||
|
with open(output, 'rb') as f: raw = f.read()
|
||||||
|
if raw:
|
||||||
|
stream.seek(0)
|
||||||
|
stream.truncate()
|
||||||
|
stream.write(raw)
|
||||||
|
stream.flush()
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
args = sys.argv
|
||||||
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
|
mi = MetaInformation(args[2], [args[3]])
|
||||||
|
x = open(args[1], 'r+b')
|
||||||
|
set_metadata(x, mi)
|
Loading…
x
Reference in New Issue
Block a user