mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
IGN:Restore PoDoFo based set pdf metadata functionality
This commit is contained in:
parent
4be28fb1fa
commit
c24f507cc2
@ -5,8 +5,9 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
from functools import partial
|
||||
|
||||
from calibre import plugins, prints
|
||||
from calibre.ebooks.metadata import MetaInformation, string_to_authors#, authors_to_string
|
||||
from calibre import prints
|
||||
from calibre.constants import plugins
|
||||
from calibre.ebooks.metadata import MetaInformation, string_to_authors, authors_to_string
|
||||
|
||||
pdfreflow, pdfreflow_error = plugins['pdfreflow']
|
||||
|
||||
@ -44,64 +45,13 @@ def get_metadata(stream, cover=True):
|
||||
|
||||
return mi
|
||||
|
||||
|
||||
|
||||
get_quick_metadata = partial(get_metadata, cover=False)
|
||||
|
||||
'''
|
||||
import sys, os, cStringIO
|
||||
import cStringIO
|
||||
from threading import Thread
|
||||
|
||||
from calibre import StreamReadWrapper
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
try:
|
||||
from calibre.utils.PythonMagickWand import \
|
||||
NewMagickWand, MagickReadImage, MagickSetImageFormat, \
|
||||
MagickWriteImage, ImageMagick
|
||||
_imagemagick_loaded = True
|
||||
except:
|
||||
_imagemagick_loaded = False
|
||||
from calibre.ebooks.metadata import MetaInformation, string_to_authors, authors_to_string
|
||||
from calibre.utils.pdftk import set_metadata as pdftk_set_metadata
|
||||
from calibre.utils.podofo import get_metadata as podofo_get_metadata, \
|
||||
set_metadata as podofo_set_metadata, Unavailable, get_metadata_quick
|
||||
from calibre.utils.poppler import get_metadata as get_metadata_poppler, NotAvailable
|
||||
|
||||
def get_quick_metadata(stream):
|
||||
try:
|
||||
return get_metadata_poppler(stream, False)
|
||||
except NotAvailable:
|
||||
pass
|
||||
|
||||
return get_metadata_pypdf(stream)
|
||||
raw = stream.read()
|
||||
mi = get_metadata_quick(raw)
|
||||
if mi.title == '_':
|
||||
mi.title = getattr(stream, 'name', _('Unknown'))
|
||||
mi.title = mi.title.rpartition('.')[0]
|
||||
return mi
|
||||
|
||||
|
||||
def get_metadata(stream, extract_cover=True):
|
||||
try:
|
||||
return get_metadata_poppler(stream, extract_cover)
|
||||
except NotAvailable:
|
||||
pass
|
||||
try:
|
||||
with TemporaryDirectory('_pdfmeta') as tdir:
|
||||
cpath = os.path.join(tdir, 'cover.pdf')
|
||||
if not extract_cover:
|
||||
cpath = None
|
||||
mi = podofo_get_metadata(stream, cpath=cpath)
|
||||
if mi.cover is not None:
|
||||
cdata = get_cover(mi.cover)
|
||||
mi.cover = None
|
||||
if cdata is not None:
|
||||
mi.cover_data = ('jpg', cdata)
|
||||
except Unavailable:
|
||||
mi = get_metadata_pypdf(stream)
|
||||
return mi
|
||||
|
||||
from calibre.utils.podofo import set_metadata as podofo_set_metadata, Unavailable
|
||||
|
||||
def set_metadata(stream, mi):
|
||||
stream.seek(0)
|
||||
@ -116,25 +66,6 @@ def set_metadata(stream, mi):
|
||||
set_metadata_pypdf(stream, mi)
|
||||
|
||||
|
||||
def get_metadata_pypdf(stream):
|
||||
""" Return metadata as a L{MetaInfo} object """
|
||||
from pyPdf import PdfFileReader
|
||||
mi = MetaInformation(_('Unknown'), [_('Unknown')])
|
||||
try:
|
||||
with StreamReadWrapper(stream) as stream:
|
||||
info = PdfFileReader(stream).getDocumentInfo()
|
||||
if info.title:
|
||||
mi.title = info.title
|
||||
if info.author:
|
||||
mi.author = info.author
|
||||
mi.authors = string_to_authors(info.author)
|
||||
if info.subject:
|
||||
mi.category = info.subject
|
||||
except Exception, err:
|
||||
msg = u'Couldn\'t read metadata from pdf: %s with error %s'%(mi.title, unicode(err))
|
||||
print >>sys.stderr, msg.encode('utf8')
|
||||
return mi
|
||||
|
||||
class MetadataWriter(Thread):
|
||||
|
||||
def __init__(self, out_pdf, buf):
|
||||
@ -178,13 +109,4 @@ def set_metadata_pypdf(stream, mi):
|
||||
stream.write(out_str.read())
|
||||
stream.seek(0)
|
||||
|
||||
def get_cover(cover_path):
|
||||
with ImageMagick():
|
||||
wand = NewMagickWand()
|
||||
MagickReadImage(wand, cover_path)
|
||||
MagickSetImageFormat(wand, 'JPEG')
|
||||
MagickWriteImage(wand, '%s.jpg' % cover_path)
|
||||
return open('%s.jpg' % cover_path, 'rb').read()
|
||||
'''
|
||||
|
||||
|
||||
|
@ -79,6 +79,50 @@ extern "C" {
|
||||
return ans;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
pdfreflow_set_metadata(PyObject *self, PyObject *args) {
|
||||
char *pdfdata;
|
||||
Py_ssize_t size;
|
||||
PyObject *info;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "s#O", &pdfdata, &size, &info))
|
||||
return NULL;
|
||||
|
||||
if (!PyDict_Check(info)) {
|
||||
PyErr_SetString(PyExc_ValueError, "Info object must be a dictionary.");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char Title[10] = "Title", Author[10] = "Author", Keywords[10] = "Keywords";
|
||||
char *keys[3] = { Title, Author, Keywords };
|
||||
map<char *, char *> pinfo;
|
||||
PyObject *val = NULL, *utf8 = NULL;
|
||||
|
||||
for (int i = 0; i < 3; i++) {
|
||||
val = PyDict_GetItemString(info, keys[i]);
|
||||
if (!val || !PyUnicode_Check(val)) continue;
|
||||
utf8 = PyUnicode_AsUTF8String(val);
|
||||
if (!utf8) continue;
|
||||
pinfo[keys[i]] = PyString_AS_STRING(utf8);
|
||||
}
|
||||
|
||||
PyObject *ans = NULL;
|
||||
try {
|
||||
Reflow reflow(pdfdata, static_cast<std::ifstream::pos_type>(size));
|
||||
if (reflow.is_locked()) {
|
||||
PyErr_SetString(PyExc_ValueError, "Setting metadata not possible in encrypeted PDFs");
|
||||
return NULL;
|
||||
}
|
||||
string result = reflow.set_info(pinfo);
|
||||
ans = PyString_FromStringAndSize(result.c_str(), result.size());
|
||||
} catch (std::exception &e) {
|
||||
PyErr_SetString(PyExc_RuntimeError, e.what()); return NULL;
|
||||
} catch (...) {
|
||||
PyErr_SetString(PyExc_RuntimeError,
|
||||
"Unknown exception raised while getting metadata from PDF"); return NULL;
|
||||
}
|
||||
return ans;
|
||||
}
|
||||
|
||||
static
|
||||
PyMethodDef pdfreflow_methods[] = {
|
||||
@ -90,6 +134,10 @@ extern "C" {
|
||||
"get_metadata(pdf_data, cover)\n\n"
|
||||
"Get metadata and (optionally) cover from the specified PDF."
|
||||
},
|
||||
{"set_metadata", pdfreflow_set_metadata, METH_VARARGS,
|
||||
"get_metadata(info_dict)\n\n"
|
||||
"Set metadata in the specified PDF. Currently broken."
|
||||
},
|
||||
|
||||
{NULL, NULL, 0, NULL}
|
||||
};
|
||||
|
@ -680,6 +680,16 @@ void XMLOutputDev::drawImage(GfxState *state, Object *ref, Stream *str,
|
||||
colorMap, interpolate, maskColors, inlineImg);
|
||||
}
|
||||
|
||||
static char stream_pdf[15] = "stream.pdf";
|
||||
|
||||
class MemInStream : public MemStream {
|
||||
public:
|
||||
MemInStream(char *buf, size_t st, size_t sz, Object *obj) :
|
||||
MemStream(buf, st, sz, obj) {}
|
||||
~MemInStream() {}
|
||||
GooString *getFileName() { return new GooString(stream_pdf); }
|
||||
};
|
||||
|
||||
Reflow::Reflow(char *pdfdata, size_t sz) :
|
||||
pdfdata(pdfdata), current_font_size(-1), doc(NULL)
|
||||
{
|
||||
@ -690,7 +700,7 @@ Reflow::Reflow(char *pdfdata, size_t sz) :
|
||||
if (!globalParams)
|
||||
throw ReflowException("Failed to allocate Globalparams");
|
||||
}
|
||||
MemStream *str = new MemStream(pdfdata, 0, sz, &obj);
|
||||
MemInStream *str = new MemInStream(pdfdata, 0, sz, &obj);
|
||||
this->doc = new PDFDoc(str, NULL, NULL);
|
||||
|
||||
if (!this->doc->isOk()) {
|
||||
@ -909,3 +919,56 @@ char* Reflow::render_first_page(size_t *data_size,
|
||||
}
|
||||
return buffer;
|
||||
}
|
||||
|
||||
class MemOutStream : public OutStream {
|
||||
private:
|
||||
ostringstream out;
|
||||
|
||||
public:
|
||||
MemOutStream() :OutStream() {}
|
||||
~MemOutStream() {}
|
||||
void close() {}
|
||||
int getPos() { return out.tellp(); }
|
||||
void put(char c) { out.put(c); }
|
||||
void printf (const char *format, ...) {
|
||||
vector<char> buf;
|
||||
size_t written = strlen(format)*5;
|
||||
va_list ap;
|
||||
do {
|
||||
buf.reserve(written + 20);
|
||||
va_start(ap, format);
|
||||
written = vsnprintf(&buf[0], buf.capacity(), format, ap);
|
||||
va_end(ap);
|
||||
} while (written >= buf.capacity());
|
||||
out.write(&buf[0], written);
|
||||
}
|
||||
};
|
||||
|
||||
string Reflow::set_info(map<char *, char *> sinfo) {
|
||||
XRef *xref = this->doc->getXRef();
|
||||
if (!xref) throw ReflowException("No XRef table");
|
||||
Object *trailer_dict = xref->getTrailerDict();
|
||||
if (!trailer_dict || !trailer_dict->isDict()) throw ReflowException("No trailer dictionary");
|
||||
Object tmp;
|
||||
char INFO[5] = "Info";
|
||||
Object *info = trailer_dict->dictLookup(INFO, &tmp);
|
||||
if (!info) {
|
||||
info = new Object();
|
||||
info->initDict(xref);
|
||||
}
|
||||
if (!info->isDict()) throw ReflowException("Invalid info object");
|
||||
|
||||
for (map<char *, char *>::iterator it = sinfo.begin(); it != sinfo.end(); it++) {
|
||||
Object *tmp = new Object();
|
||||
tmp->initString(new GooString((*it).second));
|
||||
info->dictSet((*it).first, tmp);
|
||||
}
|
||||
|
||||
trailer_dict->dictSet(INFO, info);
|
||||
char out[20] = "/t/out.pdf";
|
||||
this->doc->saveAs(new GooString(out), writeForceRewrite);
|
||||
string ans;
|
||||
return ans;
|
||||
}
|
||||
|
||||
|
||||
|
@ -74,6 +74,9 @@ class Reflow {
|
||||
|
||||
/* Dump the PDF outline as the file outline.xml in the current directory */
|
||||
void dump_outline();
|
||||
|
||||
/* Set the info dictionary. Currently broken. */
|
||||
string set_info(map<char *, char *> info);
|
||||
};
|
||||
|
||||
class XMLString {
|
||||
|
Loading…
x
Reference in New Issue
Block a user