mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
PDF Metadata: Switch to using PoDoFp to read/write PDF metadata. On linux, calibre will fall back to pdftk and pypdf. Linux distributors: calibre will only try to build the podofo extension if it detects the podofo header files in the directory pointed to by PODOFO_INC_DIR, defaults to /usr/include/podofo
This commit is contained in:
parent
16a11369a5
commit
3e9e6a63d7
@ -31,6 +31,7 @@ def freeze():
|
||||
'/usr/lib/libsqlite3.so.0',
|
||||
'/usr/lib/libsqlite3.so.0',
|
||||
'/usr/lib/libmng.so.1',
|
||||
'/usr/lib/libpodofo.so.0.6.99',
|
||||
'/lib/libz.so.1',
|
||||
'/lib/libbz2.so.1',
|
||||
'/lib/libbz2.so.1',
|
||||
|
@ -229,6 +229,11 @@ _check_symlinks_prescript()
|
||||
all_modules = main_modules['console'] + main_modules['gui']
|
||||
all_functions = main_functions['console'] + main_functions['gui']
|
||||
print
|
||||
print 'Adding PoDoFo'
|
||||
pdf = glob.glob(os.path.expanduser('~/podofo/*.dylib'))[0]
|
||||
shutil.copyfile(pdf, os.path.join(frameworks_dir, os.path.basename(pdf)))
|
||||
|
||||
|
||||
loader_path = os.path.join(resource_dir, 'loaders')
|
||||
if not os.path.exists(loader_path):
|
||||
os.mkdir(loader_path)
|
||||
|
@ -12,6 +12,7 @@ LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
|
||||
PDFTOHTML = 'C:\\cygwin\\home\\kovid\\poppler-0.10.6\\rel\\pdftohtml.exe'
|
||||
IMAGEMAGICK_DIR = 'C:\\ImageMagick'
|
||||
PDFTK = 'C:\\pdftk.exe'
|
||||
PODOFO = 'C:\\podofo'
|
||||
FONTCONFIG_DIR = 'C:\\fontconfig'
|
||||
VC90 = r'C:\VC90.CRT'
|
||||
|
||||
@ -101,8 +102,11 @@ class BuildEXE(py2exe.build_exe.py2exe):
|
||||
shutil.copyfile(PDFTOHTML, os.path.join(PY2EXE_DIR, os.path.basename(PDFTOHTML)))
|
||||
shutil.copyfile(PDFTOHTML+'.manifest', os.path.join(PY2EXE_DIR,
|
||||
os.path.basename(PDFTOHTML)+'.manifest'))
|
||||
print '\tAdding pdftk'
|
||||
shutil.copyfile(PDFTK, os.path.join(PY2EXE_DIR, os.path.basename(PDFTK)))
|
||||
#print '\tAdding pdftk'
|
||||
#shutil.copyfile(PDFTK, os.path.join(PY2EXE_DIR, os.path.basename(PDFTK)))
|
||||
print 'Adding podofo'
|
||||
for f in glob.glob(os.path.join(PODOFO, '*.dll')):
|
||||
shutil.copyfile(f, os.path.join(PY2EXE_DIR, os.path.basename(f)))
|
||||
|
||||
print '\tAdding ImageMagick'
|
||||
for f in os.listdir(IMAGEMAGICK_DIR):
|
||||
|
@ -80,13 +80,16 @@ CONFIG += x86 ppc
|
||||
os.chdir(cwd)
|
||||
|
||||
def build_sbf(self, sip, sbf, bdir):
|
||||
print '\tBuilding spf...'
|
||||
print '\tBuilding sbf...'
|
||||
sip_bin = self.sipcfg.sip_bin
|
||||
pyqt_sip_flags = []
|
||||
if hasattr(self, 'pyqtcfg'):
|
||||
pyqt_sip_flags += ['-I', self.pyqtcfg.pyqt_sip_dir]
|
||||
pyqt_sip_flags += self.pyqtcfg.pyqt_sip_flags.split()
|
||||
self.spawn([sip_bin,
|
||||
"-c", bdir,
|
||||
"-b", sbf,
|
||||
'-I', self.pyqtcfg.pyqt_sip_dir,
|
||||
] + self.pyqtcfg.pyqt_sip_flags.split()+
|
||||
] + pyqt_sip_flags +
|
||||
[sip])
|
||||
|
||||
def build_pyqt(self, bdir, sbf, ext, qtobjs, headers):
|
||||
@ -94,9 +97,14 @@ CONFIG += x86 ppc
|
||||
build_file=sbf, dir=bdir,
|
||||
makefile='Makefile.pyqt',
|
||||
universal=OSX_SDK, qt=1)
|
||||
makefile.extra_libs = ext.libraries
|
||||
makefile.extra_lib_dirs = ext.library_dirs
|
||||
makefile.extra_cxxflags = ext.extra_compile_args
|
||||
|
||||
if 'win32' in sys.platform:
|
||||
makefile.extra_lib_dirs += WINDOWS_PYTHON
|
||||
makefile.extra_include_dirs = list(set(map(os.path.dirname, headers)))
|
||||
makefile.extra_include_dirs += ext.include_dirs
|
||||
makefile.extra_lflags += qtobjs
|
||||
makefile.generate()
|
||||
cwd = os.getcwd()
|
||||
@ -200,6 +208,14 @@ CONFIG += x86 ppc
|
||||
shutil.copyfile(mod, ext_filename)
|
||||
shutil.copymode(mod, ext_filename)
|
||||
|
||||
|
||||
if self.force or newer_group([mod], ext_filename, 'newer'):
|
||||
if os.path.exists(ext_filename):
|
||||
os.unlink(ext_filename)
|
||||
shutil.copyfile(mod, ext_filename)
|
||||
shutil.copymode(mod, ext_filename)
|
||||
|
||||
|
||||
def get_sip_output_list(self, sbf, bdir):
|
||||
"""
|
||||
Parse the sbf file specified to extract the name of the generated source
|
||||
|
22
setup.py
22
setup.py
@ -2,7 +2,7 @@ from __future__ import with_statement
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import sys, re, os, shutil, cStringIO, tempfile, subprocess, time
|
||||
import sys, re, os, subprocess
|
||||
sys.path.append('src')
|
||||
iswindows = re.search('win(32|64)', sys.platform)
|
||||
isosx = 'darwin' in sys.platform
|
||||
@ -57,7 +57,25 @@ if __name__ == '__main__':
|
||||
|
||||
entry_points['console_scripts'].append(
|
||||
'calibre_postinstall = calibre.linux:post_install')
|
||||
ext_modules = [
|
||||
optional = []
|
||||
|
||||
|
||||
podofo_inc = '/usr/include/podofo' if islinux else \
|
||||
'C:\\podofo\\include\\podofo' if iswindows else \
|
||||
'/Users/kovid/podofo/include/podofo'
|
||||
podofo_lib = '/usr/lib' if islinux else r'C:\podofo' if iswindows else \
|
||||
'/Users/kovid/podofo/lib'
|
||||
if os.path.exists(os.path.join(podofo_inc, 'PdfString.h')):
|
||||
eca = ['/EHsc'] if iswindows else []
|
||||
optional.append(PyQtExtension('calibre.plugins.podofo', [],
|
||||
['src/calibre/utils/podofo/podofo.sip'],
|
||||
libraries=['podofo'], extra_compile_args=eca,
|
||||
library_dirs=[os.environ.get('PODOFO_LIB_DIR', podofo_lib)],
|
||||
include_dirs=\
|
||||
[os.environ.get('PODOFO_INC_DIR', podofo_inc)]))
|
||||
|
||||
ext_modules = optional + [
|
||||
|
||||
Extension('calibre.plugins.lzx',
|
||||
sources=['src/calibre/utils/lzx/lzxmodule.c',
|
||||
'src/calibre/utils/lzx/compressor.c',
|
||||
|
@ -53,7 +53,7 @@ if plugins is None:
|
||||
plugin_path = getattr(pkg_resources, 'resource_filename')('calibre', 'plugins')
|
||||
sys.path.insert(0, plugin_path)
|
||||
|
||||
for plugin in ['pictureflow', 'lzx', 'msdes'] + \
|
||||
for plugin in ['pictureflow', 'lzx', 'msdes', 'podofo'] + \
|
||||
(['winutil'] if iswindows else []) + \
|
||||
(['usbobserver'] if isosx else []):
|
||||
try:
|
||||
|
@ -89,6 +89,7 @@ def rtf2opf(path, tdir, opts):
|
||||
def txt2opf(path, tdir, opts):
|
||||
from calibre.ebooks.lrf.txt.convert_from import generate_html
|
||||
generate_html(path, opts.encoding, tdir)
|
||||
opts.encoding = 'utf-8'
|
||||
return os.path.join(tdir, 'metadata.opf')
|
||||
|
||||
def pdf2opf(path, tdir, opts):
|
||||
|
@ -6,13 +6,35 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
import sys, os, cStringIO
|
||||
from threading import Thread
|
||||
|
||||
from calibre import FileWrapper
|
||||
from calibre.ebooks.metadata import MetaInformation, authors_to_string, get_parser
|
||||
from pyPdf import PdfFileReader, PdfFileWriter
|
||||
from calibre.utils.pdftk import set_metadata as pdftk_set_metadata
|
||||
from calibre.utils.podofo import get_metadata as podofo_get_metadata, \
|
||||
set_metadata as podofo_set_metadata
|
||||
|
||||
|
||||
def get_metadata(stream):
|
||||
try:
|
||||
return podofo_get_metadata(stream)
|
||||
except:
|
||||
return get_metadata_pypdf(stream)
|
||||
|
||||
def set_metadata(stream, mi):
|
||||
stream.seek(0)
|
||||
try:
|
||||
return podofo_set_metadata(stream, mi)
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
return pdftk_set_metadata(stream, mi)
|
||||
except:
|
||||
pass
|
||||
set_metadata_pypdf(stream, mi)
|
||||
|
||||
|
||||
def get_metadata_pypdf(stream):
|
||||
""" Return metadata as a L{MetaInfo} object """
|
||||
from pyPdf import PdfFileReader
|
||||
from calibre import FileWrapper
|
||||
mi = MetaInformation(_('Unknown'), [_('Unknown')])
|
||||
stream.seek(0)
|
||||
try:
|
||||
@ -48,18 +70,12 @@ class MetadataWriter(Thread):
|
||||
except RuntimeError:
|
||||
pass
|
||||
|
||||
def set_metadata(stream, mi):
|
||||
stream.seek(0)
|
||||
try:
|
||||
pdftk_set_metadata(stream, mi)
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
return
|
||||
|
||||
def set_metadata_pypdf(stream, mi):
|
||||
# Use a StringIO object for the pdf because we will want to over
|
||||
# write it later and if we are working on the stream directly it
|
||||
# could cause some issues.
|
||||
|
||||
from pyPdf import PdfFileReader, PdfFileWriter
|
||||
raw = cStringIO.StringIO(stream.read())
|
||||
orig_pdf = PdfFileReader(raw)
|
||||
|
||||
@ -73,7 +89,7 @@ def set_metadata(stream, mi):
|
||||
out_pdf.addPage(page)
|
||||
|
||||
writer.start()
|
||||
writer.join(15) # Wait 15 secs for writing to complete
|
||||
writer.join(10) # Wait 10 secs for writing to complete
|
||||
out_pdf.killed = True
|
||||
writer.join()
|
||||
if out_pdf.killed:
|
||||
|
@ -402,6 +402,7 @@ class LibraryDatabase2(LibraryDatabase):
|
||||
|
||||
def get_property(idx, index_is_id=False, loc=-1):
|
||||
row = self.data._data[idx] if index_is_id else self.data[idx]
|
||||
if row is not None:
|
||||
return row[loc]
|
||||
|
||||
for prop in ('author_sort', 'authors', 'comment', 'comments', 'isbn',
|
||||
|
@ -220,7 +220,7 @@ Post any output you see in a help message on the `Forum <http://www.mobileread.c
|
||||
|
||||
My antivirus programs claims |app| is a virus/trojan?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
Your antivirus program is wrong. |app| is a completely open source product. You can actually browse the source code yourself (or hire someone to do it for you) to verify that it is not a virus. Please report the false identification to whatever company you buy your antivirus software from.
|
||||
Your antivirus program is wrong. |app| is a completely open source product. You can actually browse the source code yourself (or hire someone to do it for you) to verify that it is not a virus. Please report the false identification to whatever company you buy your antivirus software from. If the antivirus program is preventing you from downloading/installing |app|, disable it temporarily, install |app| and then re-enable it.
|
||||
|
||||
|
||||
I want some feature added to |app|. What can I do?
|
||||
|
98
src/calibre/utils/podofo/__init__.py
Normal file
98
src/calibre/utils/podofo/__init__.py
Normal file
@ -0,0 +1,98 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
|
||||
from calibre.constants import plugins, preferred_encoding
|
||||
from calibre.ebooks.metadata import MetaInformation, string_to_authors, \
|
||||
authors_to_string
|
||||
|
||||
podofo, podofo_err = plugins['podofo']
|
||||
|
||||
class Unavailable(Exception): pass
|
||||
|
||||
def get_metadata(stream):
|
||||
if not podofo:
|
||||
raise Unavailable(podofo_err)
|
||||
raw = stream.read()
|
||||
stream.seek(0)
|
||||
p = podofo.PdfMemDocument()
|
||||
p.Load(raw, len(raw))
|
||||
info = p.GetInfo()
|
||||
title = info.GetTitle().decode('utf-8').strip()
|
||||
if not title:
|
||||
title = getattr(stream, 'name', _('Unknown'))
|
||||
title = os.path.splitext(os.path.basename(title))[0]
|
||||
author = info.GetAuthor().decode('utf-8').strip()
|
||||
authors = string_to_authors(author) if author else [_('Unknown')]
|
||||
mi = MetaInformation(title, authors)
|
||||
creator = info.GetCreator().decode('utf-8').strip()
|
||||
if creator:
|
||||
mi.book_producer = creator
|
||||
return mi
|
||||
|
||||
def prep(val):
|
||||
if not val:
|
||||
return u''
|
||||
if not isinstance(val, unicode):
|
||||
val = val.decode(preferred_encoding, 'replace')
|
||||
return val.strip()
|
||||
|
||||
def set_metadata(stream, mi):
|
||||
if not podofo:
|
||||
raise Unavailable(podofo_err)
|
||||
raw = stream.read()
|
||||
p = podofo.PdfMemDocument()
|
||||
p.Load(raw, len(raw))
|
||||
info = p.GetInfo()
|
||||
title = prep(mi.title)
|
||||
touched = False
|
||||
if title:
|
||||
info.SetTitle(title)
|
||||
touched = True
|
||||
|
||||
author = prep(authors_to_string(mi.authors))
|
||||
if author:
|
||||
print repr(author)
|
||||
info.SetAuthor(author)
|
||||
touched = True
|
||||
|
||||
bkp = prep(mi.book_producer)
|
||||
if bkp:
|
||||
info.SetCreator(bkp)
|
||||
touched = True
|
||||
|
||||
if touched:
|
||||
p.SetInfo(info)
|
||||
from calibre.ptempfile import TemporaryFile
|
||||
with TemporaryFile('_pdf_set_metadata.pdf') as f:
|
||||
p.Write(f)
|
||||
raw = open(f, 'rb').read()
|
||||
stream.seek(0)
|
||||
stream.truncate()
|
||||
stream.write(raw)
|
||||
stream.flush()
|
||||
stream.seek(0)
|
||||
|
||||
if __name__ == '__main__':
|
||||
f = '/tmp/t.pdf'
|
||||
import StringIO
|
||||
stream = StringIO.StringIO(open(f).read())
|
||||
mi = get_metadata(open(f))
|
||||
print
|
||||
print 'Original metadata:'
|
||||
print mi
|
||||
mi.title = 'Test title'
|
||||
mi.authors = ['Test author', 'author2']
|
||||
mi.book_producer = 'calibre'
|
||||
set_metadata(stream, mi)
|
||||
open('/tmp/x.pdf', 'wb').write(stream.getvalue())
|
||||
print
|
||||
print 'New pdf written to /tmp/x.pdf'
|
||||
|
||||
|
128
src/calibre/utils/podofo/podofo.sip
Normal file
128
src/calibre/utils/podofo/podofo.sip
Normal file
@ -0,0 +1,128 @@
|
||||
%Module podofo 0
|
||||
|
||||
%MappedType PdfString
|
||||
{
|
||||
%TypeHeaderCode
|
||||
#define USING_SHARED_PODOFO
|
||||
#include <PdfString.h>
|
||||
using namespace PoDoFo;
|
||||
%End
|
||||
%ConvertFromTypeCode
|
||||
if (sipCpp -> IsValid()) {
|
||||
std::string raw = sipCpp->GetStringUtf8();
|
||||
return PyString_FromStringAndSize(raw.c_str(), raw.length());
|
||||
} else return PyString_FromString("");
|
||||
%End
|
||||
%ConvertToTypeCode
|
||||
if (sipIsErr == NULL) {
|
||||
if (sipIsErr == NULL)
|
||||
return (PyUnicode_Check(sipPy) || PyString_Check(sipPy));
|
||||
}
|
||||
if (sipPy == Py_None) {
|
||||
*sipCppPtr = NULL;
|
||||
return 0;
|
||||
}
|
||||
if (PyString_Check(sipPy)) {
|
||||
*sipCppPtr = new PdfString((pdf_utf8 *)PyString_AS_STRING(sipPy));
|
||||
return sipGetState(sipTransferObj);
|
||||
}
|
||||
if (PyUnicode_Check(sipPy)) {
|
||||
Py_UNICODE* u = PyUnicode_AS_UNICODE(sipPy);
|
||||
PyObject *u8 = PyUnicode_EncodeUTF8(u, PyUnicode_GET_SIZE(sipPy), "replace");
|
||||
pdf_utf8 *s8 = (pdf_utf8 *)PyString_AS_STRING(u8);
|
||||
*sipCppPtr = new PdfString(s8);
|
||||
return sipGetState(sipTransferObj);
|
||||
}
|
||||
*sipCppPtr = (PdfString *)sipForceConvertTo_PdfString(sipPy,sipIsErr);
|
||||
return 1;
|
||||
%End
|
||||
};
|
||||
|
||||
class PdfObject {
|
||||
%TypeHeaderCode
|
||||
#define USING_SHARED_PODOFO
|
||||
#include <PdfObject.h>
|
||||
using namespace PoDoFo;
|
||||
%End
|
||||
public:
|
||||
PdfObject();
|
||||
|
||||
};
|
||||
|
||||
class PdfInfo {
|
||||
%TypeHeaderCode
|
||||
#define USING_SHARED_PODOFO
|
||||
#include <PdfInfo.h>
|
||||
using namespace PoDoFo;
|
||||
%End
|
||||
public:
|
||||
PdfInfo(PdfObject *);
|
||||
|
||||
PdfString GetAuthor() const;
|
||||
PdfString GetSubject() const;
|
||||
PdfString GetTitle() const;
|
||||
PdfString GetKeywords() const;
|
||||
PdfString GetCreator() const;
|
||||
PdfString GetProducer() const;
|
||||
|
||||
void SetAuthor(PdfString &);
|
||||
void SetSubject(PdfString &);
|
||||
void SetTitle(PdfString &);
|
||||
void SetKeywords(PdfString &);
|
||||
void SetCreator(PdfString &);
|
||||
void SetProducer(PdfString &);
|
||||
|
||||
};
|
||||
|
||||
class PdfOutputDevice {
|
||||
%TypeHeaderCode
|
||||
#define USING_SHARED_PODOFO
|
||||
#include <PdfOutputDevice.h>
|
||||
using namespace PoDoFo;
|
||||
%End
|
||||
public:
|
||||
PdfOutputDevice(char *, long);
|
||||
unsigned long GetLength();
|
||||
unsigned long Tell();
|
||||
void Flush();
|
||||
};
|
||||
|
||||
|
||||
class PdfMemDocument {
|
||||
%TypeHeaderCode
|
||||
#define USING_SHARED_PODOFO
|
||||
#include <PdfMemDocument.h>
|
||||
using namespace PoDoFo;
|
||||
%End
|
||||
public:
|
||||
PdfMemDocument();
|
||||
|
||||
void Load(const char *filename);
|
||||
void Load(const char *buffer, long size);
|
||||
void Write(const char *filename);
|
||||
PdfInfo *GetInfo() const;
|
||||
|
||||
protected:
|
||||
void SetInfo(PdfInfo * /TransferThis/);
|
||||
|
||||
private:
|
||||
PdfMemDocument(PdfMemDocument &);
|
||||
|
||||
};
|
||||
|
||||
|
||||
%Exception PoDoFo::PdfError /PyName=PdfError/
|
||||
{
|
||||
%TypeHeaderCode
|
||||
#define USING_SHARED_PODOFO
|
||||
#include <PdfError.h>
|
||||
%End
|
||||
%RaiseCode
|
||||
const char *detail = sipExceptionRef.what();
|
||||
|
||||
SIP_BLOCK_THREADS
|
||||
PyErr_SetString(sipException_PoDoFo_PdfError, detail);
|
||||
SIP_UNBLOCK_THREADS
|
||||
%End
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user