Sync to pluginize

This commit is contained in:
John Schember 2009-05-27 18:20:14 -04:00
commit 814763fc59
3 changed files with 89 additions and 39 deletions

View File

@ -18,7 +18,7 @@ except:
from calibre.ebooks.metadata import MetaInformation, authors_to_string from calibre.ebooks.metadata import MetaInformation, authors_to_string
from calibre.utils.pdftk import set_metadata as pdftk_set_metadata from calibre.utils.pdftk import set_metadata as pdftk_set_metadata
from calibre.utils.podofo import get_metadata as podofo_get_metadata, \ from calibre.utils.podofo import get_metadata as podofo_get_metadata, \
set_metadata as podofo_set_metadata, Unavailable set_metadata as podofo_set_metadata, Unavailable, write_first_page
def get_metadata(stream, extract_cover=True): def get_metadata(stream, extract_cover=True):
@ -119,29 +119,16 @@ def set_metadata_pypdf(stream, mi):
stream.seek(0) stream.seek(0)
def get_cover(stream): def get_cover(stream):
from pyPdf import PdfFileReader, PdfFileWriter stream.seek(0)
with TemporaryDirectory('_pdfmeta') as tdir:
cover_path = os.path.join(tdir, 'cover.pdf')
write_first_page(stream, cover_path)
with ImageMagick():
wand = NewMagickWand()
MagickReadImage(wand, cover_path)
MagickSetImageFormat(wand, 'JPEG')
MagickWriteImage(wand, '%s.jpg' % cover_path)
return open('%s.jpg' % cover_path, 'rb').read()
try:
with StreamReadWrapper(stream) as stream:
pdf = PdfFileReader(stream)
output = PdfFileWriter()
if len(pdf.pages) >= 1:
output.addPage(pdf.getPage(0))
with TemporaryDirectory('_pdfmeta') as tdir:
cover_path = os.path.join(tdir, 'cover.pdf')
with open(cover_path, "wb") as outputStream:
output.write(outputStream)
with ImageMagick():
wand = NewMagickWand()
MagickReadImage(wand, cover_path)
MagickSetImageFormat(wand, 'JPEG')
MagickWriteImage(wand, '%s.jpg' % cover_path)
return open('%s.jpg' % cover_path, 'rb').read()
except:
import traceback
traceback.print_exc()
return ''

View File

@ -6,7 +6,7 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os, time import os, time, shutil
from calibre.constants import plugins, preferred_encoding from calibre.constants import plugins, preferred_encoding
from calibre.ebooks.metadata import MetaInformation, string_to_authors, \ from calibre.ebooks.metadata import MetaInformation, string_to_authors, \
@ -19,6 +19,38 @@ podofo, podofo_err = plugins['podofo']
class Unavailable(Exception): pass class Unavailable(Exception): pass
def write_first_page(stream, opath):
if not podofo:
raise Unavailable(podofo_err)
pt = PersistentTemporaryFile('_podofo.pdf')
pt.write(stream.read())
pt.close()
server = Server(pool_size=1)
job = ParallelJob('write_pdf_first_page', 'Extract first page of pdf',
lambda x,y:x, args=[pt.name, opath])
server.add_job(job)
while not job.is_finished:
time.sleep(0.1)
job.update()
job.update()
server.close()
if not job.result:
raise ValueError('Failed to extract first page: ' + job.details)
def write_first_page_(inpath, outpath):
p = podofo.PDFDoc()
p.open(inpath)
pages = p.pages
if pages < 1:
raise ValueError('PDF has no pages')
if pages == 1:
shutil.copyfile(inpath, outpath)
return True
p.delete_pages(1, pages-1)
p.save(outpath)
return True
def get_metadata(stream): def get_metadata(stream):
if not podofo: if not podofo:
raise Unavailable(podofo_err) raise Unavailable(podofo_err)

View File

@ -100,6 +100,29 @@ podofo_PDFDoc_save(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) {
return Py_None; return Py_None;
} }
static PyObject *
podofo_PDFDoc_pages_getter(podofo_PDFDoc *self, void *closure) {
int pages = self->doc->GetPageCount();
PyObject *ans = PyInt_FromLong(static_cast<long>(pages));
if (ans != NULL) Py_INCREF(ans);
return ans;
}
static PyObject *
podofo_PDFDoc_delete_pages(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) {
int first_page, num_pages;
if (PyArg_ParseTuple(args, "ii", &first_page, &num_pages)) {
try {
self->doc->DeletePages(first_page, num_pages);
} catch(const PdfError & err) {
podofo_set_exception(err);
return NULL;
}
} else return NULL;
Py_INCREF(Py_None);
return Py_None;
}
static PyObject * static PyObject *
podofo_convert_pdfstring(const PdfString &s) { podofo_convert_pdfstring(const PdfString &s) {
std::string raw = s.GetStringUtf8(); std::string raw = s.GetStringUtf8();
@ -256,33 +279,41 @@ static PyMethodDef podofo_PDFDoc_methods[] = {
{"save", (PyCFunction)podofo_PDFDoc_save, METH_VARARGS, {"save", (PyCFunction)podofo_PDFDoc_save, METH_VARARGS,
"Save the PDF document to a path on disk" "Save the PDF document to a path on disk"
}, },
{"delete_pages", (PyCFunction)podofo_PDFDoc_delete_pages, METH_VARARGS,
"delete_pages(start_page, num_pages) -> int, int\nDelete pages from the PDF document."
},
{NULL} /* Sentinel */ {NULL} /* Sentinel */
}; };
static PyGetSetDef podofo_PDFDoc_getseters[] = { static PyGetSetDef podofo_PDFDoc_getsetters[] = {
{"title", {(char *)"title",
(getter)podofo_PDFDoc_title_getter, (setter)podofo_PDFDoc_title_setter, (getter)podofo_PDFDoc_title_getter, (setter)podofo_PDFDoc_title_setter,
"Document title", (char *)"Document title",
NULL}, NULL},
{"author", {(char *)"author",
(getter)podofo_PDFDoc_author_getter, (setter)podofo_PDFDoc_author_setter, (getter)podofo_PDFDoc_author_getter, (setter)podofo_PDFDoc_author_setter,
"Document author", (char *)"Document author",
NULL}, NULL},
{"subject", {(char *)"subject",
(getter)podofo_PDFDoc_subject_getter, (setter)podofo_PDFDoc_subject_setter, (getter)podofo_PDFDoc_subject_getter, (setter)podofo_PDFDoc_subject_setter,
"Document subject", (char *)"Document subject",
NULL}, NULL},
{"keywords", {(char *)"keywords",
(getter)podofo_PDFDoc_keywords_getter, (setter)podofo_PDFDoc_keywords_setter, (getter)podofo_PDFDoc_keywords_getter, (setter)podofo_PDFDoc_keywords_setter,
"Document keywords", (char *)"Document keywords",
NULL}, NULL},
{"creator", {(char *)"creator",
(getter)podofo_PDFDoc_creator_getter, (setter)podofo_PDFDoc_creator_setter, (getter)podofo_PDFDoc_creator_getter, (setter)podofo_PDFDoc_creator_setter,
"Document creator", (char *)"Document creator",
NULL}, NULL},
{"producer", {(char *)"producer",
(getter)podofo_PDFDoc_producer_getter, (setter)podofo_PDFDoc_producer_setter, (getter)podofo_PDFDoc_producer_getter, (setter)podofo_PDFDoc_producer_setter,
"Document producer", (char *)"Document producer",
NULL},
{(char *)"pages",
(getter)podofo_PDFDoc_pages_getter, NULL,
(char *)"Number of pages in document (read only)",
NULL}, NULL},
{NULL} /* Sentinel */ {NULL} /* Sentinel */
@ -319,7 +350,7 @@ static PyTypeObject podofo_PDFDocType = {
0, /* tp_iternext */ 0, /* tp_iternext */
podofo_PDFDoc_methods, /* tp_methods */ podofo_PDFDoc_methods, /* tp_methods */
0, /* tp_members */ 0, /* tp_members */
podofo_PDFDoc_getseters, /* tp_getset */ podofo_PDFDoc_getsetters, /* tp_getset */
0, /* tp_base */ 0, /* tp_base */
0, /* tp_dict */ 0, /* tp_dict */
0, /* tp_descr_get */ 0, /* tp_descr_get */