From a00d61d9217c7d7944169a5d4f443c4327e33ddc Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 8 Apr 2025 20:07:04 +0530 Subject: [PATCH] Add a python wrapper for pread --- src/calibre/utils/copy_files_test.py | 13 +++++ src/calibre/utils/speedup.c | 75 ++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+) diff --git a/src/calibre/utils/copy_files_test.py b/src/calibre/utils/copy_files_test.py index 0100d2d8c1..15900c6556 100644 --- a/src/calibre/utils/copy_files_test.py +++ b/src/calibre/utils/copy_files_test.py @@ -65,6 +65,19 @@ class TestCopyFiles(unittest.TestCase): contents = set(os.listdir(self.tdir)) - {'base', 'src'} self.ae(contents, {'One', 'three'}) + def test_pread_all(self): + from calibre_extensions.speedup import pread_all + n = os.path.join(self.tdir, 'base') + data = os.urandom(1137*1024) + with open(n, 'wb') as f: + f.write(data) + with open(n, 'rb') as f: + for n, offset in { + 0:0, 3:0, 13:13 + }.items(): + self.assertEqual(data[offset:offset+n], pread_all(f.fileno(), n, offset)) + self.assertEqual(data, pread_all(f.fileno(), len(data))) + def test_copying_of_trees(self): src, dest = self.s(), self.d() copy_tree(src, dest) diff --git a/src/calibre/utils/speedup.c b/src/calibre/utils/speedup.c index f6eab24b9a..7249b29953 100644 --- a/src/calibre/utils/speedup.c +++ b/src/calibre/utils/speedup.c @@ -28,6 +28,11 @@ typedef unsigned __int8 uint8_t; #else #include #endif +#ifdef _WIN32 +#include +#else +#include +#endif static PyObject* barename(PyObject *self, PyObject *tag) { @@ -705,6 +710,69 @@ deepcopy(PyObject *self, PyObject *o) { return ans; } + +static PyObject* +pread_all(PyObject *self, PyObject *args) { + int fd; unsigned long long n, offset = 0; + if (!PyArg_ParseTuple(args, "iK|K", &fd, &n, &offset)) return NULL; +#ifdef _WIN32 + PyObject *msvcrt = PyImport_ImportModule("msvcrt"); + if (!msvcrt) return NULL; + PyObject *get_osfhandle = PyObject_GetAttrString(msvcrt, "get_osfhandle"); + Py_CLEAR(msvcrt); + if (!get_osfhandle) return NULL; + PyObject *ret = PyObject_CallFunctionObjArgs(get_osfhandle, PyTuple_GET_ITEM(args, 0), NULL); + Py_CLEAR(get_osfhandle); + if (!ret) return NULL; + HANDLE file = (HANDLE)PyLong_AsUnsignedLongLong(ret); + Py_CLEAR(ret); +#endif + PyObject *output = PyBytes_FromStringAndSize(NULL, n); + if (!output || !n) return output; + size_t pos = 0; + char *buf = PyBytes_AS_STRING(output); + int saved_errno = 0; + Py_BEGIN_ALLOW_THREADS; + while (pos < n) { +#ifdef _WIN32 + DWORD nr = 0; + OVERLAPPED overlapped; + memset(&overlapped, 0, sizeof(OVERLAPPED)); + overlapped.OffsetHigh = (uint32_t)((offset & 0xFFFFFFFF00000000LL) >> 32); + overlapped.Offset = (uint32_t)(offset & 0xFFFFFFFFLL); + SetLastError(0); + BOOL ok = ReadFile(file, buf + pos, n - pos, &nr, &overlapped); + if (!ok) { + DWORD err = GetLastError(); + if (err != ERROR_HANDLE_EOF) saved_errno = err; + break; + } +#else + ssize_t nr = pread64(fd, buf + pos, n - pos, offset); + if (nr < 0) { + if (errno == EINTR || errno == EAGAIN || errno == EBUSY) continue; + saved_errno = errno; + break; + } else if (nr == 0) break; +#endif + pos += nr; + offset += nr; + } + Py_END_ALLOW_THREADS + if (saved_errno != 0) { + Py_CLEAR(output); +#ifdef _WIN32 + PyErr_SetFromWindowsErr(saved_errno); +#else + errno = saved_errno; + PyErr_SetFromErrno(PyExc_OSError); +#endif + return NULL; + } + if (pos < n && _PyBytes_Resize(&output, pos) != 0) return NULL; + return output; +} + static PyMethodDef speedup_methods[] = { {"deepcopy", deepcopy, METH_O, "deepcopy(object)\n\nFast implementation of deepcopy()" @@ -752,6 +820,13 @@ static PyMethodDef speedup_methods[] = { "set_thread_name(name)\n\nWrapper for pthread_setname_np" }, + {"pread_all", pread_all, METH_VARARGS, + "pread_all(fd, n, offset)\n\nRead upto n bytes from the specified fd at offset in a thread safe manner." + " If less than n bytes are returned it means there were less than n bytes in the file at offset." + " Only works with seekable regular files, not sockets/ttys/etc. Note that on Windows it moves the file pointer" + " so cannot be mixed with calls to tell() or ordinary reads." + }, + {"get_num_of_significant_chars", get_num_of_significant_chars, METH_O, "get_num_of_significant_chars(elem)\n\nGet the number of chars in specified tag" },