A faster deepcopy that specializes to C code for builtin types

This commit is contained in:
Kovid Goyal 2024-09-17 10:19:59 +05:30
parent 02e7bc75a6
commit c3a4cb2796
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -520,8 +520,9 @@ get_element_char_length(PyObject *self, PyObject *args) {
int is_ignored_tag = 0;
size_t ans = 0;
#define EQ(x) memcmp(ltagname, #x, sizeof(#x) - 1) == 0
if (EQ(script) || EQ(noscript) || EQ(style) || EQ(title)) is_ignored_tag = 1;
if (EQ(img) || EQ(svg)) ans += 1000;
if (EQ(script) || EQ(noscript) || EQ(style) || EQ(title)) {
is_ignored_tag = 1;
} else if (EQ(img) || EQ(svg)) ans += 1000;
#undef EQ
if (tail != Py_None) ans += count_chars_in(tail);
if (text != Py_None && !is_ignored_tag) ans += count_chars_in(text);
@ -529,7 +530,127 @@ get_element_char_length(PyObject *self, PyObject *args) {
}
typedef struct deepcopy_data {
PyObject *python_deepcopy, *memo;
} deepcopy_data;
static PyObject* deepcopy_object(PyObject *, deepcopy_data *);
static PyObject*
insert_into_cache(PyObject *o, deepcopy_data *d) {
if (!o) return NULL;
PyObject *key = PyLong_FromVoidPtr(o);
if (!key) { Py_DECREF(o); return NULL; }
if (PyDict_SetItem(d->memo, key, o) != 0) { Py_DECREF(key); Py_DECREF(o); return NULL; }
return o;
}
static PyObject*
deepcopy_list(PyObject *o, deepcopy_data *d) {
PyObject *ans = insert_into_cache(PyList_New(PyList_GET_SIZE(o)), d);
if (!ans) return NULL;
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(o); i++) {
PyObject *t = deepcopy_object(PyList_GET_ITEM(o, i), d);
if (!t) { Py_CLEAR(t); return NULL; }
PyList_SET_ITEM(ans, i, t);
}
return ans;
}
static PyObject*
deepcopy_tuple(PyObject *o, deepcopy_data *d) {
PyObject *ans = insert_into_cache(PyTuple_New(PyTuple_GET_SIZE(o)), d);
if (!ans) return NULL;
for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(o); i++) {
PyObject *t = deepcopy_object(PyTuple_GET_ITEM(o, i), d);
if (!t) { Py_CLEAR(ans); return NULL; }
PyTuple_SET_ITEM(ans, i, t);
}
return ans;
}
static PyObject*
deepcopy_dict(PyObject *o, deepcopy_data *d) {
PyObject *key, *value;
Py_ssize_t pos = 0;
PyObject *ans = insert_into_cache(PyDict_New(), d);
if (!ans) return NULL;
while (PyDict_Next(o, &pos, &key, &value)) {
PyObject *tk = deepcopy_object(key, d);
if (!tk) break;
PyObject *tv = deepcopy_object(value, d);
if (!tv) { Py_DECREF(tk); break; }
int ok = PyDict_SetItem(ans, tk, tv) == 0;
Py_DECREF(tk); Py_DECREF(tv);
if (!ok) break;
}
if (PyErr_Occurred()) { Py_DECREF(ans); return NULL; }
return ans;
}
static PyObject*
deepcopy_set(PyObject *o, deepcopy_data *d, int is_frozen) {
PyObject *ans = insert_into_cache(is_frozen ? PyFrozenSet_New(NULL) : PySet_New(NULL), d);
if (!ans) return NULL;
PyObject *iter = PyObject_GetIter(o);
if (!iter) { Py_DECREF(ans); return NULL; }
PyObject *key;
while ((key = PyIter_Next(iter)) != NULL) {
PyObject *t = deepcopy_object(key, d);
if (!t) break;
if (PySet_Add(ans, t) != 0) { Py_DECREF(t); break; }
}
Py_DECREF(iter);
if (PyErr_Occurred()) { Py_DECREF(ans); return NULL; }
return ans;
}
static PyObject*
deepcopy_object(PyObject *o, deepcopy_data *d) {
PyObject *key = PyLong_FromVoidPtr(o);
if (!key) return NULL;
PyObject *cached = PyDict_GetItem(d->memo, key);
Py_DECREF(key);
if (cached) return Py_NewRef(cached);
PyTypeObject *type;
// Fast case for builtin immutable types
if (
o == Py_None || o == Py_True || o == Py_False || (type = Py_TYPE(o)) == &PyUnicode_Type ||
type == &PyBytes_Type || type == &PyLong_Type || type == &PyFloat_Type || type == &PyMemoryView_Type ||
type == &PyComplex_Type || type == &PyType_Type || type == &PyFunction_Type || type == &PyCode_Type ||
o == Py_Ellipsis || o == Py_NotImplemented
) return Py_NewRef(o);
if (type == &PyList_Type) return deepcopy_list(o, d);
if (type == &PyTuple_Type) return deepcopy_tuple(o, d);
if (type == &PyDict_Type) return deepcopy_dict(o, d);
if (type == &PySet_Type) return deepcopy_set(o, d, 0);
if (type == &PyFrozenSet_Type) return deepcopy_set(o, d, 1);
if (PyWeakref_Check(o)) return Py_NewRef(o);
return PyObject_CallFunctionObjArgs(d->python_deepcopy, o, d->memo);
}
static PyObject*
deepcopy(PyObject *self, PyObject *o) {
PyObject *memo = PyDict_New();
if (!memo) return NULL;
PyObject *copy_module = PyImport_ImportModule("copy");
if (!copy_module) { Py_DECREF(memo); return NULL; }
PyObject *python_deepcopy = PyObject_GetAttrString(copy_module, "deepcopy");
Py_DECREF(copy_module);
if (!python_deepcopy) { Py_DECREF(memo); return NULL; }
deepcopy_data d = {.python_deepcopy = python_deepcopy, .memo = memo};
PyObject *ans = deepcopy_object(o, &d);
Py_DECREF(python_deepcopy); Py_DECREF(memo);
return ans;
}
static PyMethodDef speedup_methods[] = {
{"deepcopy", deepcopy, METH_O,
"deepcopy(object)\n\nFast implementation of deepcopy()"
},
{"parse_date", speedup_parse_date, METH_VARARGS,
"parse_date()\n\nParse ISO dates faster (specialized for dates stored in the calibre db)."
},