Sync to pluginize

This commit is contained in:
John Schember 2009-05-27 18:20:14 -04:00
commit 814763fc59
3 changed files with 89 additions and 39 deletions

View File

@ -18,7 +18,7 @@ except:
from calibre.ebooks.metadata import MetaInformation, authors_to_string
from calibre.utils.pdftk import set_metadata as pdftk_set_metadata
from calibre.utils.podofo import get_metadata as podofo_get_metadata, \
set_metadata as podofo_set_metadata, Unavailable
set_metadata as podofo_set_metadata, Unavailable, write_first_page
def get_metadata(stream, extract_cover=True):
@ -119,29 +119,16 @@ def set_metadata_pypdf(stream, mi):
stream.seek(0)
def get_cover(stream):
from pyPdf import PdfFileReader, PdfFileWriter
try:
with StreamReadWrapper(stream) as stream:
pdf = PdfFileReader(stream)
output = PdfFileWriter()
if len(pdf.pages) >= 1:
output.addPage(pdf.getPage(0))
stream.seek(0)
with TemporaryDirectory('_pdfmeta') as tdir:
cover_path = os.path.join(tdir, 'cover.pdf')
with open(cover_path, "wb") as outputStream:
output.write(outputStream)
write_first_page(stream, cover_path)
with ImageMagick():
wand = NewMagickWand()
MagickReadImage(wand, cover_path)
MagickSetImageFormat(wand, 'JPEG')
MagickWriteImage(wand, '%s.jpg' % cover_path)
return open('%s.jpg' % cover_path, 'rb').read()
except:
import traceback
traceback.print_exc()
return ''

View File

@ -6,7 +6,7 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, time
import os, time, shutil
from calibre.constants import plugins, preferred_encoding
from calibre.ebooks.metadata import MetaInformation, string_to_authors, \
@ -19,6 +19,38 @@ podofo, podofo_err = plugins['podofo']
class Unavailable(Exception): pass
def write_first_page(stream, opath):
if not podofo:
raise Unavailable(podofo_err)
pt = PersistentTemporaryFile('_podofo.pdf')
pt.write(stream.read())
pt.close()
server = Server(pool_size=1)
job = ParallelJob('write_pdf_first_page', 'Extract first page of pdf',
lambda x,y:x, args=[pt.name, opath])
server.add_job(job)
while not job.is_finished:
time.sleep(0.1)
job.update()
job.update()
server.close()
if not job.result:
raise ValueError('Failed to extract first page: ' + job.details)
def write_first_page_(inpath, outpath):
p = podofo.PDFDoc()
p.open(inpath)
pages = p.pages
if pages < 1:
raise ValueError('PDF has no pages')
if pages == 1:
shutil.copyfile(inpath, outpath)
return True
p.delete_pages(1, pages-1)
p.save(outpath)
return True
def get_metadata(stream):
if not podofo:
raise Unavailable(podofo_err)

View File

@ -100,6 +100,29 @@ podofo_PDFDoc_save(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) {
return Py_None;
}
static PyObject *
podofo_PDFDoc_pages_getter(podofo_PDFDoc *self, void *closure) {
int pages = self->doc->GetPageCount();
PyObject *ans = PyInt_FromLong(static_cast<long>(pages));
if (ans != NULL) Py_INCREF(ans);
return ans;
}
static PyObject *
podofo_PDFDoc_delete_pages(podofo_PDFDoc *self, PyObject *args, PyObject *kwargs) {
int first_page, num_pages;
if (PyArg_ParseTuple(args, "ii", &first_page, &num_pages)) {
try {
self->doc->DeletePages(first_page, num_pages);
} catch(const PdfError & err) {
podofo_set_exception(err);
return NULL;
}
} else return NULL;
Py_INCREF(Py_None);
return Py_None;
}
static PyObject *
podofo_convert_pdfstring(const PdfString &s) {
std::string raw = s.GetStringUtf8();
@ -256,33 +279,41 @@ static PyMethodDef podofo_PDFDoc_methods[] = {
{"save", (PyCFunction)podofo_PDFDoc_save, METH_VARARGS,
"Save the PDF document to a path on disk"
},
{"delete_pages", (PyCFunction)podofo_PDFDoc_delete_pages, METH_VARARGS,
"delete_pages(start_page, num_pages) -> int, int\nDelete pages from the PDF document."
},
{NULL} /* Sentinel */
};
static PyGetSetDef podofo_PDFDoc_getseters[] = {
{"title",
static PyGetSetDef podofo_PDFDoc_getsetters[] = {
{(char *)"title",
(getter)podofo_PDFDoc_title_getter, (setter)podofo_PDFDoc_title_setter,
"Document title",
(char *)"Document title",
NULL},
{"author",
{(char *)"author",
(getter)podofo_PDFDoc_author_getter, (setter)podofo_PDFDoc_author_setter,
"Document author",
(char *)"Document author",
NULL},
{"subject",
{(char *)"subject",
(getter)podofo_PDFDoc_subject_getter, (setter)podofo_PDFDoc_subject_setter,
"Document subject",
(char *)"Document subject",
NULL},
{"keywords",
{(char *)"keywords",
(getter)podofo_PDFDoc_keywords_getter, (setter)podofo_PDFDoc_keywords_setter,
"Document keywords",
(char *)"Document keywords",
NULL},
{"creator",
{(char *)"creator",
(getter)podofo_PDFDoc_creator_getter, (setter)podofo_PDFDoc_creator_setter,
"Document creator",
(char *)"Document creator",
NULL},
{"producer",
{(char *)"producer",
(getter)podofo_PDFDoc_producer_getter, (setter)podofo_PDFDoc_producer_setter,
"Document producer",
(char *)"Document producer",
NULL},
{(char *)"pages",
(getter)podofo_PDFDoc_pages_getter, NULL,
(char *)"Number of pages in document (read only)",
NULL},
{NULL} /* Sentinel */
@ -319,7 +350,7 @@ static PyTypeObject podofo_PDFDocType = {
0, /* tp_iternext */
podofo_PDFDoc_methods, /* tp_methods */
0, /* tp_members */
podofo_PDFDoc_getseters, /* tp_getset */
podofo_PDFDoc_getsetters, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */