mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Move pdf metadata reader and writer into separate process, since podofo crashes the python interpreter alarmingly often
This commit is contained in:
parent
5b4d17c783
commit
ac1e73174a
@ -30,6 +30,13 @@ PARALLEL_FUNCS = {
|
|||||||
'read_metadata' :
|
'read_metadata' :
|
||||||
('calibre.ebooks.metadata.worker', 'read_metadata_', 'notification'),
|
('calibre.ebooks.metadata.worker', 'read_metadata_', 'notification'),
|
||||||
|
|
||||||
|
'read_pdf_metadata' :
|
||||||
|
('calibre.utils.podofo.__init__', 'get_metadata_', None),
|
||||||
|
|
||||||
|
'write_pdf_metadata' :
|
||||||
|
('calibre.utils.podofo.__init__', 'set_metadata_', None),
|
||||||
|
|
||||||
|
|
||||||
'save_book' :
|
'save_book' :
|
||||||
('calibre.ebooks.metadata.worker', 'save_book', 'notification'),
|
('calibre.ebooks.metadata.worker', 'save_book', 'notification'),
|
||||||
}
|
}
|
||||||
|
@ -6,11 +6,14 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os
|
import os, time
|
||||||
|
|
||||||
from calibre.constants import plugins, preferred_encoding
|
from calibre.constants import plugins, preferred_encoding
|
||||||
from calibre.ebooks.metadata import MetaInformation, string_to_authors, \
|
from calibre.ebooks.metadata import MetaInformation, string_to_authors, \
|
||||||
authors_to_string
|
authors_to_string
|
||||||
|
from calibre.utils.ipc.job import ParallelJob
|
||||||
|
from calibre.utils.ipc.server import Server
|
||||||
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
|
|
||||||
podofo, podofo_err = plugins['podofo']
|
podofo, podofo_err = plugins['podofo']
|
||||||
|
|
||||||
@ -19,21 +22,40 @@ class Unavailable(Exception): pass
|
|||||||
def get_metadata(stream):
|
def get_metadata(stream):
|
||||||
if not podofo:
|
if not podofo:
|
||||||
raise Unavailable(podofo_err)
|
raise Unavailable(podofo_err)
|
||||||
raw = stream.read()
|
pt = PersistentTemporaryFile('_podofo.pdf')
|
||||||
stream.seek(0)
|
pt.write(stream.read())
|
||||||
|
pt.close()
|
||||||
|
server = Server(pool_size=1)
|
||||||
|
job = ParallelJob('read_pdf_metadata', 'Read pdf metadata',
|
||||||
|
lambda x,y:x, args=[pt.name])
|
||||||
|
server.add_job(job)
|
||||||
|
while not job.is_finished:
|
||||||
|
time.sleep(0.1)
|
||||||
|
job.update()
|
||||||
|
|
||||||
|
job.update()
|
||||||
|
server.close()
|
||||||
|
if job.result is None:
|
||||||
|
raise ValueError('Failed to read metadata: PoDoFo crashed')
|
||||||
|
title, authors, creator = job.result
|
||||||
|
|
||||||
|
mi = MetaInformation(title, authors)
|
||||||
|
if creator:
|
||||||
|
mi.book_producer = creator
|
||||||
|
if os.path.exists(pt.name): os.remove(pt.name)
|
||||||
|
return mi
|
||||||
|
|
||||||
|
def get_metadata_(path):
|
||||||
p = podofo.PDFDoc()
|
p = podofo.PDFDoc()
|
||||||
p.load(raw)
|
p.open(path)
|
||||||
title = p.title
|
title = p.title
|
||||||
if not title:
|
if not title:
|
||||||
title = getattr(stream, 'name', _('Unknown'))
|
title = getattr(stream, 'name', _('Unknown'))
|
||||||
title = os.path.splitext(os.path.basename(title))[0]
|
title = os.path.splitext(os.path.basename(title))[0]
|
||||||
author = p.author
|
author = p.author
|
||||||
authors = string_to_authors(author) if author else [_('Unknown')]
|
authors = string_to_authors(author) if author else [_('Unknown')]
|
||||||
mi = MetaInformation(title, authors)
|
|
||||||
creator = p.creator
|
creator = p.creator
|
||||||
if creator:
|
return (title, authors, creator)
|
||||||
mi.book_producer = creator
|
|
||||||
return mi
|
|
||||||
|
|
||||||
def prep(val):
|
def prep(val):
|
||||||
if not val:
|
if not val:
|
||||||
@ -45,21 +67,43 @@ def prep(val):
|
|||||||
def set_metadata(stream, mi):
|
def set_metadata(stream, mi):
|
||||||
if not podofo:
|
if not podofo:
|
||||||
raise Unavailable(podofo_err)
|
raise Unavailable(podofo_err)
|
||||||
raw = stream.read()
|
pt = PersistentTemporaryFile('_podofo.pdf')
|
||||||
|
pt.write(stream.read())
|
||||||
|
pt.close()
|
||||||
|
server = Server(pool_size=1)
|
||||||
|
job = ParallelJob('write_pdf_metadata', 'Write pdf metadata',
|
||||||
|
lambda x,y:x, args=[pt.name, mi.title, mi.authors, mi.book_producer])
|
||||||
|
server.add_job(job)
|
||||||
|
while not job.is_finished:
|
||||||
|
time.sleep(0.1)
|
||||||
|
job.update()
|
||||||
|
|
||||||
|
job.update()
|
||||||
|
server.close()
|
||||||
|
if job.result is not None:
|
||||||
|
stream.seek(0)
|
||||||
|
stream.truncate()
|
||||||
|
stream.write(job.result)
|
||||||
|
stream.flush()
|
||||||
|
stream.seek(0)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def set_metadata_(path, title, authors, bkp):
|
||||||
p = podofo.PDFDoc()
|
p = podofo.PDFDoc()
|
||||||
p.load(raw)
|
p.open(path)
|
||||||
title = prep(mi.title)
|
title = prep(title)
|
||||||
touched = False
|
touched = False
|
||||||
if title:
|
if title:
|
||||||
p.title = title
|
p.title = title
|
||||||
touched = True
|
touched = True
|
||||||
|
|
||||||
author = prep(authors_to_string(mi.authors))
|
author = prep(authors_to_string(authors))
|
||||||
if author:
|
if author:
|
||||||
p.author = author
|
p.author = author
|
||||||
touched = True
|
touched = True
|
||||||
|
|
||||||
bkp = prep(mi.book_producer)
|
bkp = prep(bkp)
|
||||||
if bkp:
|
if bkp:
|
||||||
p.creator = bkp
|
p.creator = bkp
|
||||||
touched = True
|
touched = True
|
||||||
@ -68,12 +112,7 @@ def set_metadata(stream, mi):
|
|||||||
from calibre.ptempfile import TemporaryFile
|
from calibre.ptempfile import TemporaryFile
|
||||||
with TemporaryFile('_pdf_set_metadata.pdf') as f:
|
with TemporaryFile('_pdf_set_metadata.pdf') as f:
|
||||||
p.save(f)
|
p.save(f)
|
||||||
raw = open(f, 'rb').read()
|
return open(f, 'rb').read()
|
||||||
stream.seek(0)
|
|
||||||
stream.truncate()
|
|
||||||
stream.write(raw)
|
|
||||||
stream.flush()
|
|
||||||
stream.seek(0)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
f = '/tmp/t.pdf'
|
f = '/tmp/t.pdf'
|
||||||
|
@ -64,6 +64,24 @@ podofo_PDFDoc_load(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) {
|
|||||||
return Py_None;
|
return Py_None;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
podofo_PDFDoc_open(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) {
|
||||||
|
char *fname;
|
||||||
|
|
||||||
|
if (PyArg_ParseTuple(args, "s", &fname)) {
|
||||||
|
try {
|
||||||
|
self->doc->Load(fname);
|
||||||
|
} catch(const PdfError & err) {
|
||||||
|
podofo_set_exception(err);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
} else return NULL;
|
||||||
|
|
||||||
|
|
||||||
|
Py_INCREF(Py_None);
|
||||||
|
return Py_None;
|
||||||
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
podofo_PDFDoc_save(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) {
|
podofo_PDFDoc_save(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) {
|
||||||
char *buffer;
|
char *buffer;
|
||||||
@ -232,6 +250,9 @@ static PyMethodDef podofo_PDFDoc_methods[] = {
|
|||||||
{"load", (PyCFunction)podofo_PDFDoc_load, METH_VARARGS,
|
{"load", (PyCFunction)podofo_PDFDoc_load, METH_VARARGS,
|
||||||
"Load a PDF document from a byte buffer (string)"
|
"Load a PDF document from a byte buffer (string)"
|
||||||
},
|
},
|
||||||
|
{"open", (PyCFunction)podofo_PDFDoc_open, METH_VARARGS,
|
||||||
|
"Load a PDF document from a file path (string)"
|
||||||
|
},
|
||||||
{"save", (PyCFunction)podofo_PDFDoc_save, METH_VARARGS,
|
{"save", (PyCFunction)podofo_PDFDoc_save, METH_VARARGS,
|
||||||
"Save the PDF document to a path on disk"
|
"Save the PDF document to a path on disk"
|
||||||
},
|
},
|
||||||
|
Loading…
x
Reference in New Issue
Block a user