From c3a4cb2796c18b9bc89af0789c167a9d237dcea5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 17 Sep 2024 10:19:59 +0530 Subject: [PATCH] A faster deepcopy that specializes to C code for builtin types --- src/calibre/utils/speedup.c | 125 +++++++++++++++++++++++++++++++++++- 1 file changed, 123 insertions(+), 2 deletions(-) diff --git a/src/calibre/utils/speedup.c b/src/calibre/utils/speedup.c index fc3a209f6e..f89f4cb9b6 100644 --- a/src/calibre/utils/speedup.c +++ b/src/calibre/utils/speedup.c @@ -520,8 +520,9 @@ get_element_char_length(PyObject *self, PyObject *args) { int is_ignored_tag = 0; size_t ans = 0; #define EQ(x) memcmp(ltagname, #x, sizeof(#x) - 1) == 0 - if (EQ(script) || EQ(noscript) || EQ(style) || EQ(title)) is_ignored_tag = 1; - if (EQ(img) || EQ(svg)) ans += 1000; + if (EQ(script) || EQ(noscript) || EQ(style) || EQ(title)) { + is_ignored_tag = 1; + } else if (EQ(img) || EQ(svg)) ans += 1000; #undef EQ if (tail != Py_None) ans += count_chars_in(tail); if (text != Py_None && !is_ignored_tag) ans += count_chars_in(text); @@ -529,7 +530,127 @@ get_element_char_length(PyObject *self, PyObject *args) { } +typedef struct deepcopy_data { + PyObject *python_deepcopy, *memo; +} deepcopy_data; + +static PyObject* deepcopy_object(PyObject *, deepcopy_data *); + +static PyObject* +insert_into_cache(PyObject *o, deepcopy_data *d) { + if (!o) return NULL; + PyObject *key = PyLong_FromVoidPtr(o); + if (!key) { Py_DECREF(o); return NULL; } + if (PyDict_SetItem(d->memo, key, o) != 0) { Py_DECREF(key); Py_DECREF(o); return NULL; } + return o; +} + +static PyObject* +deepcopy_list(PyObject *o, deepcopy_data *d) { + PyObject *ans = insert_into_cache(PyList_New(PyList_GET_SIZE(o)), d); + if (!ans) return NULL; + for (Py_ssize_t i = 0; i < PyList_GET_SIZE(o); i++) { + PyObject *t = deepcopy_object(PyList_GET_ITEM(o, i), d); + if (!t) { Py_CLEAR(t); return NULL; } + PyList_SET_ITEM(ans, i, t); + } + return ans; +} + +static PyObject* +deepcopy_tuple(PyObject *o, deepcopy_data *d) { + PyObject *ans = insert_into_cache(PyTuple_New(PyTuple_GET_SIZE(o)), d); + if (!ans) return NULL; + for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(o); i++) { + PyObject *t = deepcopy_object(PyTuple_GET_ITEM(o, i), d); + if (!t) { Py_CLEAR(ans); return NULL; } + PyTuple_SET_ITEM(ans, i, t); + } + return ans; +} + + +static PyObject* +deepcopy_dict(PyObject *o, deepcopy_data *d) { + PyObject *key, *value; + Py_ssize_t pos = 0; + PyObject *ans = insert_into_cache(PyDict_New(), d); + if (!ans) return NULL; + + while (PyDict_Next(o, &pos, &key, &value)) { + PyObject *tk = deepcopy_object(key, d); + if (!tk) break; + PyObject *tv = deepcopy_object(value, d); + if (!tv) { Py_DECREF(tk); break; } + int ok = PyDict_SetItem(ans, tk, tv) == 0; + Py_DECREF(tk); Py_DECREF(tv); + if (!ok) break; + } + if (PyErr_Occurred()) { Py_DECREF(ans); return NULL; } + return ans; +} + +static PyObject* +deepcopy_set(PyObject *o, deepcopy_data *d, int is_frozen) { + PyObject *ans = insert_into_cache(is_frozen ? PyFrozenSet_New(NULL) : PySet_New(NULL), d); + if (!ans) return NULL; + PyObject *iter = PyObject_GetIter(o); + if (!iter) { Py_DECREF(ans); return NULL; } + PyObject *key; + while ((key = PyIter_Next(iter)) != NULL) { + PyObject *t = deepcopy_object(key, d); + if (!t) break; + if (PySet_Add(ans, t) != 0) { Py_DECREF(t); break; } + } + Py_DECREF(iter); + if (PyErr_Occurred()) { Py_DECREF(ans); return NULL; } + return ans; +} + +static PyObject* +deepcopy_object(PyObject *o, deepcopy_data *d) { + PyObject *key = PyLong_FromVoidPtr(o); + if (!key) return NULL; + PyObject *cached = PyDict_GetItem(d->memo, key); + Py_DECREF(key); + if (cached) return Py_NewRef(cached); + PyTypeObject *type; + // Fast case for builtin immutable types + if ( + o == Py_None || o == Py_True || o == Py_False || (type = Py_TYPE(o)) == &PyUnicode_Type || + type == &PyBytes_Type || type == &PyLong_Type || type == &PyFloat_Type || type == &PyMemoryView_Type || + type == &PyComplex_Type || type == &PyType_Type || type == &PyFunction_Type || type == &PyCode_Type || + o == Py_Ellipsis || o == Py_NotImplemented + ) return Py_NewRef(o); + if (type == &PyList_Type) return deepcopy_list(o, d); + if (type == &PyTuple_Type) return deepcopy_tuple(o, d); + if (type == &PyDict_Type) return deepcopy_dict(o, d); + if (type == &PySet_Type) return deepcopy_set(o, d, 0); + if (type == &PyFrozenSet_Type) return deepcopy_set(o, d, 1); + if (PyWeakref_Check(o)) return Py_NewRef(o); + return PyObject_CallFunctionObjArgs(d->python_deepcopy, o, d->memo); +} + +static PyObject* +deepcopy(PyObject *self, PyObject *o) { + PyObject *memo = PyDict_New(); + if (!memo) return NULL; + PyObject *copy_module = PyImport_ImportModule("copy"); + if (!copy_module) { Py_DECREF(memo); return NULL; } + PyObject *python_deepcopy = PyObject_GetAttrString(copy_module, "deepcopy"); + Py_DECREF(copy_module); + if (!python_deepcopy) { Py_DECREF(memo); return NULL; } + deepcopy_data d = {.python_deepcopy = python_deepcopy, .memo = memo}; + PyObject *ans = deepcopy_object(o, &d); + Py_DECREF(python_deepcopy); Py_DECREF(memo); + return ans; +} + static PyMethodDef speedup_methods[] = { + {"deepcopy", deepcopy, METH_O, + "deepcopy(object)\n\nFast implementation of deepcopy()" + }, + {"parse_date", speedup_parse_date, METH_VARARGS, "parse_date()\n\nParse ISO dates faster (specialized for dates stored in the calibre db)." },