mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Move the iterate over elems loop in native code as well
This commit is contained in:
parent
e33c18459a
commit
7687c3dab6
@ -10,65 +10,20 @@
|
|||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <stack>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
typedef struct {
|
struct PyObjectDeleter {
|
||||||
PyObject_HEAD
|
void operator()(PyObject *obj) {
|
||||||
/* Type-specific fields go here. */
|
Py_XDECREF(obj);
|
||||||
PyObject *buf;
|
|
||||||
size_t used;
|
|
||||||
std::vector<std::string> *nsmap;
|
|
||||||
} Serializer;
|
|
||||||
|
|
||||||
|
|
||||||
static void
|
|
||||||
dealloc(Serializer* self)
|
|
||||||
{
|
|
||||||
Py_CLEAR(self->buf);
|
|
||||||
if (self->nsmap) delete self->nsmap;
|
|
||||||
Py_TYPE(self)->tp_free((PyObject*)self);
|
|
||||||
}
|
|
||||||
|
|
||||||
static PyObject *
|
|
||||||
alloc(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
|
||||||
{
|
|
||||||
Serializer *self;
|
|
||||||
|
|
||||||
self = (Serializer *)type->tp_alloc(type, 0);
|
|
||||||
if (self != NULL) {
|
|
||||||
self->used = 0;
|
|
||||||
self->buf = NULL;
|
|
||||||
self->nsmap = new (std::nothrow) std::vector<std::string>();
|
|
||||||
if (!self->nsmap) { PyErr_NoMemory(); dealloc(self); self = NULL; }
|
|
||||||
}
|
|
||||||
return (PyObject *)self;
|
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
// unique_ptr that uses Py_XDECREF as the destructor function.
|
||||||
|
typedef std::unique_ptr<PyObject, PyObjectDeleter> pyunique_ptr;
|
||||||
|
|
||||||
|
|
||||||
static inline bool
|
#define write_str_literal(x) this->write_data(x, sizeof(x)-1)
|
||||||
ensure_space(Serializer *self, size_t amt) {
|
|
||||||
size_t required = amt + self->used;
|
|
||||||
if (!self->buf) {
|
|
||||||
self->buf = PyBytes_FromStringAndSize(NULL, std::max(required, static_cast<size_t>(128u * 1024u)));
|
|
||||||
if (!self->buf) return false;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (required > static_cast<size_t>(PyBytes_GET_SIZE(self->buf))) {
|
|
||||||
if (_PyBytes_Resize(&(self->buf), std::max(required, static_cast<size_t>(2 * PyBytes_GET_SIZE(self->buf)))) != 0) return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool
|
|
||||||
write_data(Serializer *self, const char *data, size_t sz) {
|
|
||||||
if (!ensure_space(self, sz)) return false;
|
|
||||||
memcpy(PyBytes_AS_STRING(self->buf) + self->used, data, sz);
|
|
||||||
self->used += sz;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define write_str_literal(self, x) write_data(self, x, sizeof(x)-1)
|
|
||||||
|
|
||||||
#define UTF8_ACCEPT 0
|
#define UTF8_ACCEPT 0
|
||||||
#define UTF8_REJECT 1
|
#define UTF8_REJECT 1
|
||||||
@ -137,12 +92,74 @@ write_hex16(char *out, uint16_t val) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static bool
|
static inline bool
|
||||||
write_string_as_json(Serializer *self, const char *str)
|
namespaces_are_equal(const char *a, const char *b, size_t len) {
|
||||||
{
|
for (size_t i = 0; i < len; i++) {
|
||||||
|
if (a[i] != b[i]) return false;
|
||||||
|
if (!b[i]) return true;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
class StringOrNone {
|
||||||
|
PyObject *temp, *orig;
|
||||||
|
const char *data;
|
||||||
|
public:
|
||||||
|
StringOrNone(PyObject *x) : temp(0), orig(x), data(0) {
|
||||||
|
if (x && x != Py_None) {
|
||||||
|
if (PyUnicode_Check(x)) {
|
||||||
|
#if PY_MAJOR_VERSION > 2
|
||||||
|
this->data = PyUnicode_AsUTF8(x);
|
||||||
|
#else
|
||||||
|
this->temp = PyUnicode_AsUTF8String(x);
|
||||||
|
if (this->temp) this->data = PyBytes_AS_STRING(this->temp);
|
||||||
|
#endif
|
||||||
|
} else if (PyBytes_Check(x)) { this->data = PyBytes_AS_STRING(x); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
~StringOrNone() {
|
||||||
|
Py_CLEAR(this->temp);
|
||||||
|
Py_CLEAR(this->orig);
|
||||||
|
}
|
||||||
|
PyObject* get() { return this->orig; }
|
||||||
|
const char *c_str() { return this->data; }
|
||||||
|
explicit operator bool() { return this->orig ? true : false; }
|
||||||
|
};
|
||||||
|
|
||||||
|
class Serializer {
|
||||||
|
PyObject *buf = NULL;
|
||||||
|
size_t used = 0;
|
||||||
|
std::vector<std::string> nsmap;
|
||||||
|
|
||||||
|
bool
|
||||||
|
ensure_space(size_t amt) {
|
||||||
|
size_t required = amt + this->used;
|
||||||
|
if (!this->buf) {
|
||||||
|
this->buf = PyBytes_FromStringAndSize(NULL, std::max(required, static_cast<size_t>(128u * 1024u)));
|
||||||
|
if (!this->buf) return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (required > static_cast<size_t>(PyBytes_GET_SIZE(this->buf))) {
|
||||||
|
if (_PyBytes_Resize(&(this->buf), std::max(
|
||||||
|
required, static_cast<size_t>(2 * PyBytes_GET_SIZE(this->buf)))) != 0) return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
write_data(const char *data, size_t sz) {
|
||||||
|
if (!this->ensure_space(sz)) return false;
|
||||||
|
memcpy(PyBytes_AS_STRING(this->buf) + this->used, data, sz);
|
||||||
|
this->used += sz;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
write_string_as_json(const char *str) {
|
||||||
const char *s = str;
|
const char *s = str;
|
||||||
if (!ensure_space(self, 32)) return false;
|
if (!this->ensure_space(32)) return false;
|
||||||
char *b = PyBytes_AS_STRING(self->buf) + self->used;
|
char *b = PyBytes_AS_STRING(this->buf) + this->used;
|
||||||
|
|
||||||
*b++ = '"';
|
*b++ = '"';
|
||||||
while (*s != 0) {
|
while (*s != 0) {
|
||||||
@ -214,66 +231,42 @@ write_string_as_json(Serializer *self, const char *str)
|
|||||||
* Update self to know about the new bytes,
|
* Update self to know about the new bytes,
|
||||||
* and set up b to write another encoded character.
|
* and set up b to write another encoded character.
|
||||||
*/
|
*/
|
||||||
self->used = b - PyBytes_AS_STRING(self->buf);
|
this->used = b - PyBytes_AS_STRING(this->buf);
|
||||||
if (!ensure_space(self, 32)) return false;
|
if (!this->ensure_space(32)) return false;
|
||||||
b = PyBytes_AS_STRING(self->buf) + self->used;
|
b = PyBytes_AS_STRING(this->buf) + this->used;
|
||||||
}
|
}
|
||||||
*b++ = '"';
|
*b++ = '"';
|
||||||
self->used = b - PyBytes_AS_STRING(self->buf);
|
this->used = b - PyBytes_AS_STRING(this->buf);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline int
|
||||||
static PyObject*
|
namespace_index(const char *ns, size_t nslen) {
|
||||||
pywrite(Serializer *self, PyObject *arg) {
|
for (size_t i = 0; i < this->nsmap.size(); i++) {
|
||||||
const char *data;
|
if (namespaces_are_equal(this->nsmap[i].c_str(), ns, nslen)) return i;
|
||||||
size_t sz;
|
|
||||||
PyObject *temp = NULL;
|
|
||||||
if (PyBytes_Check(arg)) {
|
|
||||||
data = PyBytes_AS_STRING(arg);
|
|
||||||
sz = PyBytes_GET_SIZE(arg);
|
|
||||||
} else if (PyUnicode_Check(arg)) {
|
|
||||||
#if PY_MAJOR_VERSION > 2
|
|
||||||
Py_ssize_t ssz;
|
|
||||||
data = PyUnicode_AsUTF8AndSize(arg, &ssz);
|
|
||||||
sz = ssz;
|
|
||||||
if (data == NULL) return NULL;
|
|
||||||
#else
|
|
||||||
temp = PyUnicode_AsUTF8String(arg);
|
|
||||||
if (temp == NULL) return NULL;
|
|
||||||
data = PyBytes_AS_STRING(temp);
|
|
||||||
sz = PyBytes_GET_SIZE(temp);
|
|
||||||
#endif
|
|
||||||
} else {
|
|
||||||
PyErr_SetString(PyExc_TypeError, "A unicode or bytes object expected");
|
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
bool ok = write_data(self, data, sz);
|
this->nsmap.push_back(std::string(ns, nslen));
|
||||||
Py_CLEAR(temp);
|
return this->nsmap.size() - 1;
|
||||||
if (!ok) return NULL;
|
|
||||||
Py_RETURN_NONE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool
|
bool
|
||||||
namespaces_are_equal(const char *a, const char *b, size_t len) {
|
add_comment(const char *text, const char *tail, const char *type) {
|
||||||
for (size_t i = 0; i < len; i++) {
|
if (!write_str_literal("{\"s\":")) return false;
|
||||||
if (a[i] != b[i]) return false;
|
if (!this->write_string_as_json(type)) return false;
|
||||||
if (!b[i]) return true;
|
if (text) {
|
||||||
|
if (!write_str_literal(",\"x\":")) return false;
|
||||||
|
if (!this->write_string_as_json(text)) return false;
|
||||||
}
|
}
|
||||||
|
if (tail) {
|
||||||
|
if (!write_str_literal(",\"l\":")) return false;
|
||||||
|
if (!this->write_string_as_json(tail)) return false;
|
||||||
|
}
|
||||||
|
if (!write_str_literal("}")) return false;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int
|
bool
|
||||||
namespace_index(Serializer *self, const char *ns, size_t nslen) {
|
write_attr(PyObject *args) {
|
||||||
for (size_t i = 0; i < self->nsmap->size(); i++) {
|
|
||||||
if (namespaces_are_equal((*self->nsmap)[i].c_str(), ns, nslen)) return i;
|
|
||||||
}
|
|
||||||
self->nsmap->push_back(std::string(ns, nslen));
|
|
||||||
return self->nsmap->size() - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool
|
|
||||||
write_attr(Serializer *self, PyObject *args) {
|
|
||||||
const char *attr, *val;
|
const char *attr, *val;
|
||||||
#if PY_MAJOR_VERSION > 2
|
#if PY_MAJOR_VERSION > 2
|
||||||
if (!PyArg_ParseTuple(args, "ss", &attr, &val)) return false;
|
if (!PyArg_ParseTuple(args, "ss", &attr, &val)) return false;
|
||||||
@ -284,18 +277,18 @@ write_attr(Serializer *self, PyObject *args) {
|
|||||||
const char *attr_name = attr;
|
const char *attr_name = attr;
|
||||||
int nsindex = -1;
|
int nsindex = -1;
|
||||||
if (b) {
|
if (b) {
|
||||||
nsindex = namespace_index(self, attr + 1, b - attr - 1);
|
nsindex = this->namespace_index(attr + 1, b - attr - 1);
|
||||||
attr_name = b + 1;
|
attr_name = b + 1;
|
||||||
}
|
}
|
||||||
if (!write_str_literal(self, "[")) goto end;
|
if (!write_str_literal("[")) goto end;
|
||||||
if (!write_string_as_json(self, attr_name)) goto end;
|
if (!this->write_string_as_json(attr_name)) goto end;
|
||||||
if (!write_str_literal(self, ",")) goto end;
|
if (!write_str_literal(",")) goto end;
|
||||||
if (!write_string_as_json(self, val)) goto end;
|
if (!this->write_string_as_json(val)) goto end;
|
||||||
if (nsindex > -1) {
|
if (nsindex > -1) {
|
||||||
char buf[32];
|
char buf[32];
|
||||||
write_data(self, buf, snprintf(buf, sizeof(buf), ",%d", nsindex));
|
this->write_data(buf, snprintf(buf, sizeof(buf), ",%d", nsindex));
|
||||||
}
|
}
|
||||||
if (!write_str_literal(self, "]")) goto end;
|
if (!write_str_literal("]")) goto end;
|
||||||
|
|
||||||
end:
|
end:
|
||||||
#if PY_MAJOR_VERSION < 3
|
#if PY_MAJOR_VERSION < 3
|
||||||
@ -304,170 +297,148 @@ end:
|
|||||||
return PyErr_Occurred() ? false : true;
|
return PyErr_Occurred() ? false : true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject*
|
|
||||||
start_tag(Serializer *self, PyObject *args) {
|
bool
|
||||||
const char *tag, *text, *tail;
|
start_tag(const char *tag, const char *text, const char *tail, PyObject *items) {
|
||||||
PyObject *items;
|
if (!PyList_Check(items)) { PyErr_SetString(PyExc_TypeError, "attrs of a tag must be a list"); return false; }
|
||||||
#if PY_MAJOR_VERSION > 2
|
|
||||||
if (!PyArg_ParseTuple(args, "zzzO!", &tag, &text, &tail, &PyList_Type, &items)) return NULL;
|
|
||||||
#else
|
|
||||||
if (!PyArg_ParseTuple(args, "etetetO!", "UTF-8", &tag, "UTF-8", &text, "UTF-8", &tail, &PyList_Type, &items)) return NULL;
|
|
||||||
#endif
|
|
||||||
Py_ssize_t num_attrs = PyList_Size(items);
|
Py_ssize_t num_attrs = PyList_Size(items);
|
||||||
const char *b = strrchr(tag, '}');
|
const char *b = strrchr(tag, '}');
|
||||||
const char *tag_name = tag;
|
const char *tag_name = tag;
|
||||||
int nsindex = -1;
|
int nsindex = -1;
|
||||||
if (b) {
|
if (b) {
|
||||||
nsindex = namespace_index(self, tag + 1, b - tag - 1);
|
nsindex = this->namespace_index(tag + 1, b - tag - 1);
|
||||||
tag_name = b + 1;
|
tag_name = b + 1;
|
||||||
}
|
}
|
||||||
if (!write_str_literal(self, "{\"n\":")) goto end;
|
if (!write_str_literal("{\"n\":")) return false;
|
||||||
if (!write_string_as_json(self, tag_name)) goto end;
|
if (!this->write_string_as_json(tag_name)) return false;
|
||||||
if (nsindex > -1) {
|
if (nsindex > -1) {
|
||||||
char buf[32];
|
char buf[32];
|
||||||
write_data(self, buf, snprintf(buf, sizeof(buf), ",\"s\":%d", nsindex));
|
this->write_data(buf, snprintf(buf, sizeof(buf), ",\"s\":%d", nsindex));
|
||||||
}
|
}
|
||||||
if (text) {
|
if (text) {
|
||||||
if (!write_str_literal(self, ",\"x\":")) goto end;
|
if (!write_str_literal(",\"x\":")) return false;
|
||||||
if (!write_string_as_json(self, text)) goto end;
|
if (!this->write_string_as_json(text)) return false;
|
||||||
}
|
}
|
||||||
if (tail) {
|
if (tail) {
|
||||||
if (!write_str_literal(self, ",\"l\":")) goto end;
|
if (!write_str_literal(",\"l\":")) return false;
|
||||||
if (!write_string_as_json(self, tail)) goto end;
|
if (!this->write_string_as_json(tail)) return false;
|
||||||
}
|
}
|
||||||
if (num_attrs > 0) {
|
if (num_attrs > 0) {
|
||||||
if (!write_str_literal(self, ",\"a\":[")) goto end;
|
if (!write_str_literal(",\"a\":[")) return false;
|
||||||
for (Py_ssize_t i = 0; i < num_attrs; i++) {
|
for (Py_ssize_t i = 0; i < num_attrs; i++) {
|
||||||
if (i) { if (!write_str_literal(self, ",")) goto end; }
|
if (i) { if (!write_str_literal(",")) return false; }
|
||||||
if (!write_attr(self, PyList_GET_ITEM(items, i))) goto end;
|
if (!this->write_attr(PyList_GET_ITEM(items, i))) return false;
|
||||||
}
|
}
|
||||||
if (!write_str_literal(self, "]")) goto end;
|
if (!write_str_literal("]")) return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
end:
|
return true;
|
||||||
#if PY_MAJOR_VERSION < 3
|
|
||||||
PyMem_Free(tag); PyMem_Free(text); PyMem_Free(tail);
|
|
||||||
#endif
|
|
||||||
if (PyErr_Occurred()) return NULL;
|
|
||||||
Py_RETURN_NONE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject*
|
bool
|
||||||
add_comment(Serializer *self, PyObject *args) {
|
add_nsmap() {
|
||||||
const char *text, *tail, *type;
|
if (!write_str_literal("[")) return false;
|
||||||
#if PY_MAJOR_VERSION > 2
|
|
||||||
if (!PyArg_ParseTuple(args, "zzs", &text, &tail, &type)) return NULL;
|
|
||||||
#else
|
|
||||||
if (!PyArg_ParseTuple(args, "etets", "UTF-8", &text, "UTF-8", &tail, &type)) return NULL;
|
|
||||||
#endif
|
|
||||||
if (!write_str_literal(self, "{\"s\":")) goto end;
|
|
||||||
if (!write_string_as_json(self, type)) goto end;
|
|
||||||
if (text) {
|
|
||||||
if (!write_str_literal(self, ",\"x\":")) goto end;
|
|
||||||
if (!write_string_as_json(self, text)) goto end;
|
|
||||||
}
|
|
||||||
if (tail) {
|
|
||||||
if (!write_str_literal(self, ",\"l\":")) goto end;
|
|
||||||
if (!write_string_as_json(self, tail)) goto end;
|
|
||||||
}
|
|
||||||
if (!write_str_literal(self, "}")) goto end;
|
|
||||||
end:
|
|
||||||
#if PY_MAJOR_VERSION < 3
|
|
||||||
PyMem_Free(text); PyMem_Free(tail);
|
|
||||||
#endif
|
|
||||||
if (PyErr_Occurred()) return NULL;
|
|
||||||
Py_RETURN_NONE;
|
|
||||||
}
|
|
||||||
|
|
||||||
static PyObject*
|
|
||||||
add_nsmap(Serializer *self, PyObject *args) {
|
|
||||||
(void)args;
|
|
||||||
if (!write_str_literal(self, "[")) return NULL;
|
|
||||||
bool is_first = true;
|
bool is_first = true;
|
||||||
for (auto x : *self->nsmap) {
|
for (auto x : this->nsmap) {
|
||||||
if (is_first) is_first = false;
|
if (is_first) is_first = false;
|
||||||
else if (!write_str_literal(self, ",")) return NULL;
|
else if (!write_str_literal(",")) return false;
|
||||||
if (!write_string_as_json(self, x.c_str())) return NULL;
|
if (!this->write_string_as_json(x.c_str())) return false;
|
||||||
}
|
}
|
||||||
if (!write_str_literal(self, "]")) return NULL;
|
if (!write_str_literal("]")) return false;
|
||||||
Py_RETURN_NONE;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject*
|
public:
|
||||||
done(Serializer *self, PyObject *arg) {
|
Serializer() = default;
|
||||||
(void)arg;
|
~Serializer() {
|
||||||
if (!self->buf) return PyBytes_FromString("");
|
Py_CLEAR(this->buf);
|
||||||
if (_PyBytes_Resize(&self->buf, self->used) != 0) return NULL;
|
}
|
||||||
PyObject *ans = self->buf;
|
|
||||||
self->buf = NULL;
|
PyObject*
|
||||||
self->used = 0;
|
serialize(PyObject *args) {
|
||||||
self->nsmap->clear();
|
PyObject *root, *Comment;
|
||||||
|
if (!PyArg_ParseTuple(args, "OO", &root, &Comment)) return NULL;
|
||||||
|
std::stack<pyunique_ptr> stack;
|
||||||
|
std::vector<pyunique_ptr> children;
|
||||||
|
Py_INCREF(root);
|
||||||
|
stack.push(pyunique_ptr(root));
|
||||||
|
write_str_literal("{\"version\":1,\"tree\":");
|
||||||
|
|
||||||
|
while(!stack.empty()) {
|
||||||
|
pyunique_ptr e(std::move(stack.top()));
|
||||||
|
stack.pop();
|
||||||
|
PyObject *elem = e.get();
|
||||||
|
if (PyBytes_CheckExact(elem)) {
|
||||||
|
if (!this->write_data(PyBytes_AS_STRING(elem), PyBytes_GET_SIZE(elem))) return NULL;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
StringOrNone tag(PyObject_GetAttrString(elem, "tag"));
|
||||||
|
StringOrNone text(PyObject_GetAttrString(elem, "text")), tail(PyObject_GetAttrString(elem, "tail"));
|
||||||
|
if (!tag || PyCallable_Check(tag.get())) {
|
||||||
|
const char *type = (tag && tag.get() == Comment) ? "c" : "o";
|
||||||
|
if (!this->add_comment(text.c_str(), tail.c_str(), type)) return NULL;
|
||||||
|
} else {
|
||||||
|
pyunique_ptr attrs(PyObject_CallMethod(elem, "items", NULL));
|
||||||
|
if (!attrs) return NULL;
|
||||||
|
if (!this->start_tag(tag.c_str(), text.c_str(), tail.c_str(), attrs.get())) return NULL;
|
||||||
|
pyunique_ptr iterator(PyObject_GetIter(elem));
|
||||||
|
if (!iterator) return NULL;
|
||||||
|
children.clear();
|
||||||
|
while(true) {
|
||||||
|
PyObject *child = PyIter_Next(iterator.get());
|
||||||
|
if (!child) { if (PyErr_Occurred()) return NULL; break; }
|
||||||
|
children.push_back(pyunique_ptr(child));
|
||||||
|
}
|
||||||
|
if (children.size() > 0) {
|
||||||
|
#define push_literal(x) { \
|
||||||
|
PyObject *lt = PyBytes_FromStringAndSize(x, sizeof(x) - 1); \
|
||||||
|
if (!lt) return NULL; \
|
||||||
|
stack.push(pyunique_ptr(lt));}
|
||||||
|
if (!write_str_literal(",\"c\":[")) return NULL;
|
||||||
|
push_literal("]}");
|
||||||
|
for (size_t i = children.size(); i-- > 0;) {
|
||||||
|
stack.push(std::move(children[i]));
|
||||||
|
if (i != 0) push_literal(",");
|
||||||
|
}
|
||||||
|
#undef push_literal
|
||||||
|
} else if (!write_str_literal("}")) return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!write_str_literal(",\"nsmap\":")) return NULL;
|
||||||
|
if (!this->add_nsmap()) return NULL;
|
||||||
|
if (!write_str_literal("}")) return NULL;
|
||||||
|
|
||||||
|
if (_PyBytes_Resize(&this->buf, this->used) != 0) return NULL;
|
||||||
|
PyObject *ans = this->buf;
|
||||||
|
this->buf = NULL;
|
||||||
|
this->used = 0;
|
||||||
|
this->nsmap.clear();
|
||||||
return ans;
|
return ans;
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
static PyObject*
|
||||||
|
serialize(PyObject *self, PyObject *args) {
|
||||||
|
(void)self;
|
||||||
|
try {
|
||||||
|
Serializer s;
|
||||||
|
return s.serialize(args);
|
||||||
|
} catch(const std::exception & err) {
|
||||||
|
PyErr_Format(PyExc_ValueError, "An error occurred while trying to serialize to JSON: %s", err.what());
|
||||||
|
return NULL;
|
||||||
|
} catch (...) {
|
||||||
|
PyErr_SetString(PyExc_ValueError, "An unknown error occurred while trying to serialize to JSON");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Boilerplate {{{
|
// Boilerplate {{{
|
||||||
static PyMethodDef Serializer_methods[] = {
|
|
||||||
{"start_tag", (PyCFunction)start_tag, METH_VARARGS,
|
|
||||||
"Start serializing a tag"
|
|
||||||
},
|
|
||||||
{"add_comment", (PyCFunction)add_comment, METH_VARARGS,
|
|
||||||
"Add a comment"
|
|
||||||
},
|
|
||||||
{"write", (PyCFunction)pywrite, METH_O,
|
|
||||||
"Write the specified unicode or bytes object"
|
|
||||||
},
|
|
||||||
{"add_nsmap", (PyCFunction)add_nsmap, METH_NOARGS,
|
|
||||||
"Add the namespace map"
|
|
||||||
},
|
|
||||||
{"done", (PyCFunction)done, METH_NOARGS,
|
|
||||||
"Get the serialized output"
|
|
||||||
},
|
|
||||||
{NULL} /* Sentinel */
|
|
||||||
};
|
|
||||||
|
|
||||||
PyTypeObject SerializerType = {
|
|
||||||
PyVarObject_HEAD_INIT(NULL, 0)
|
|
||||||
/* tp_name */ "html_as_json.Serializer",
|
|
||||||
/* tp_basicsize */ sizeof(Serializer),
|
|
||||||
/* tp_itemsize */ 0,
|
|
||||||
/* tp_dealloc */ (destructor)dealloc,
|
|
||||||
/* tp_print */ 0,
|
|
||||||
/* tp_getattr */ 0,
|
|
||||||
/* tp_setattr */ 0,
|
|
||||||
/* tp_compare */ 0,
|
|
||||||
/* tp_repr */ 0,
|
|
||||||
/* tp_as_number */ 0,
|
|
||||||
/* tp_as_sequence */ 0,
|
|
||||||
/* tp_as_mapping */ 0,
|
|
||||||
/* tp_hash */ 0,
|
|
||||||
/* tp_call */ 0,
|
|
||||||
/* tp_str */ 0,
|
|
||||||
/* tp_getattro */ 0,
|
|
||||||
/* tp_setattro */ 0,
|
|
||||||
/* tp_as_buffer */ 0,
|
|
||||||
/* tp_flags */ Py_TPFLAGS_DEFAULT,
|
|
||||||
/* tp_doc */ "Serializer",
|
|
||||||
/* tp_traverse */ 0,
|
|
||||||
/* tp_clear */ 0,
|
|
||||||
/* tp_richcompare */ 0,
|
|
||||||
/* tp_weaklistoffset */ 0,
|
|
||||||
/* tp_iter */ 0,
|
|
||||||
/* tp_iternext */ 0,
|
|
||||||
/* tp_methods */ Serializer_methods,
|
|
||||||
/* tp_members */ 0,
|
|
||||||
/* tp_getset */ 0,
|
|
||||||
/* tp_base */ 0,
|
|
||||||
/* tp_dict */ 0,
|
|
||||||
/* tp_descr_get */ 0,
|
|
||||||
/* tp_descr_set */ 0,
|
|
||||||
/* tp_dictoffset */ 0,
|
|
||||||
/* tp_init */ 0,
|
|
||||||
/* tp_alloc */ 0,
|
|
||||||
/* tp_new */ alloc,
|
|
||||||
};
|
|
||||||
|
|
||||||
static char doc[] = "Serialize HTML as JSON efficiently";
|
static char doc[] = "Serialize HTML as JSON efficiently";
|
||||||
static PyMethodDef methods[] = {
|
static PyMethodDef methods[] = {
|
||||||
|
{"serialize", (PyCFunction)serialize, METH_VARARGS,
|
||||||
|
"Serialize the provided lxml tree to JSON"
|
||||||
|
},
|
||||||
{NULL} /* Sentinel */
|
{NULL} /* Sentinel */
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -494,19 +465,11 @@ CALIBRE_MODINIT_FUNC inithtml_as_json(void) {
|
|||||||
|
|
||||||
PyObject* m;
|
PyObject* m;
|
||||||
|
|
||||||
if (PyType_Ready(&SerializerType) < 0) {
|
|
||||||
INITERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
m = INITMODULE;
|
m = INITMODULE;
|
||||||
if (m == NULL) {
|
if (m == NULL) {
|
||||||
INITERROR;
|
INITERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
PyModule_AddObject(m, "Serializer", (PyObject *)&SerializerType);
|
|
||||||
|
|
||||||
|
|
||||||
#if PY_MAJOR_VERSION >= 3
|
#if PY_MAJOR_VERSION >= 3
|
||||||
return m;
|
return m;
|
||||||
#endif
|
#endif
|
||||||
|
@ -756,43 +756,15 @@ def ensure_body(root):
|
|||||||
|
|
||||||
def html_as_json(root):
|
def html_as_json(root):
|
||||||
try:
|
try:
|
||||||
Serializer = plugins['html_as_json'][0].Serializer
|
serialize = plugins['html_as_json'][0].serialize
|
||||||
except KeyError:
|
except KeyError:
|
||||||
return as_bytes(json.dumps(html_as_dict(root), ensure_ascii=False, separators=(',', ':')))
|
return as_bytes(json.dumps(html_as_dict(root), ensure_ascii=False, separators=(',', ':')))
|
||||||
s = Serializer()
|
ensure_body(root)
|
||||||
s.write(b'{"version":1,"tree":')
|
for child in tuple(root.iterchildren('*')):
|
||||||
stack = [root]
|
if child.tag.partition('}')[-1] not in ('head', 'body'):
|
||||||
|
root.remove(child)
|
||||||
while stack:
|
root.text = root.tail = None
|
||||||
elem = stack.pop()
|
return serialize(root, Comment)
|
||||||
if isinstance(elem, bytes):
|
|
||||||
s.write(elem)
|
|
||||||
continue
|
|
||||||
tag = getattr(elem, 'tag', html_as_json)
|
|
||||||
if callable(tag):
|
|
||||||
if tag is Comment:
|
|
||||||
s.add_comment(elem.text, elem.tail, 'c')
|
|
||||||
else:
|
|
||||||
tail = getattr(elem, 'tail', None)
|
|
||||||
if tail:
|
|
||||||
s.add_comment(None, tail, 'o')
|
|
||||||
continue
|
|
||||||
s.start_tag(elem.tag, elem.text, elem.tail, elem.items())
|
|
||||||
children = tuple(elem.iterchildren())
|
|
||||||
if children:
|
|
||||||
s.write(b',"c":[')
|
|
||||||
stack.append(b']}')
|
|
||||||
first_child = children[0]
|
|
||||||
for c in reversed(children):
|
|
||||||
stack.append(c)
|
|
||||||
if c is not first_child:
|
|
||||||
stack.append(b',')
|
|
||||||
else:
|
|
||||||
s.write(b'}')
|
|
||||||
s.write(b',"nsmap":')
|
|
||||||
s.add_nsmap()
|
|
||||||
s.write(b'}')
|
|
||||||
return s.done()
|
|
||||||
|
|
||||||
|
|
||||||
def html_as_dict(root):
|
def html_as_dict(root):
|
||||||
|
@ -239,16 +239,8 @@ class ContentTest(LibraryBaseTest):
|
|||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
def test_html_as_json(self): # {{{
|
def test_html_as_json(self): # {{{
|
||||||
from calibre.constants import plugins
|
|
||||||
from calibre.srv.render_book import html_as_json
|
from calibre.srv.render_book import html_as_json
|
||||||
from calibre.ebooks.oeb.parse_utils import html5_parse
|
from calibre.ebooks.oeb.parse_utils import html5_parse
|
||||||
Serializer = plugins['html_as_json'][0].Serializer
|
|
||||||
s = Serializer()
|
|
||||||
d = 'a' * (127 * 1024)
|
|
||||||
s.write(d)
|
|
||||||
d = d.encode('ascii')
|
|
||||||
s.write(d)
|
|
||||||
self.ae(s.done(), (d + d))
|
|
||||||
|
|
||||||
def t(html, body_children, nsmap=('http://www.w3.org/1999/xhtml',)):
|
def t(html, body_children, nsmap=('http://www.w3.org/1999/xhtml',)):
|
||||||
root = html5_parse(html)
|
root = html5_parse(html)
|
||||||
@ -268,4 +260,6 @@ class ContentTest(LibraryBaseTest):
|
|||||||
)
|
)
|
||||||
text = '🐈\n\t\\mūs"'
|
text = '🐈\n\t\\mūs"'
|
||||||
t("<p id='{}'>Peña".format(text), [{"n":"p","s":0,"x":"Peña","a":[['id',text]]}])
|
t("<p id='{}'>Peña".format(text), [{"n":"p","s":0,"x":"Peña","a":[['id',text]]}])
|
||||||
|
text = 'a' * (127 * 1024)
|
||||||
|
t('<p>{0}<p>{0}'.format(text), [{"n":"p","s":0,"x":text}, {'n':'p','s':0,'x':text}])
|
||||||
# }}}
|
# }}}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user