Much nicer implementation of create_recording using winrt facilities

2025-07-09 03:04:10 -04:00 · 2023-01-13 16:40:20 +05:30 · 2023-01-13 16:40:20 +05:30 · a7f713f68f
commit a7f713f68f
parent 27f206f116
1 changed files with 65 additions and 134 deletions
--- a/src/calibre/utils/windows/winspeech.cpp
+++ b/src/calibre/utils/windows/winspeech.cpp
@ -19,6 +19,16 @@ using namespace winrt::Windows::Foundation::Collections;
 using namespace winrt::Windows::Media::SpeechSynthesis;
 using namespace winrt::Windows::Storage::Streams;

+static PyObject*
+runtime_error_as_python_error(PyObject *exc_type, winrt::hresult_error const &ex, const char *file, const int line, const char *prefix="", PyObject *name=NULL) {
+    pyobject_raii msg(PyUnicode_FromWideChar(ex.message().c_str(), -1));
+    const HRESULT hr = ex.to_abi();
+    if (name) PyErr_Format(exc_type, "%s:%d:%s:[hr=0x%x] %V: %S", file, line, prefix, hr, msg.ptr(), "Out of memory", name);
+    else PyErr_Format(exc_type, "%s:%d:%s:[hr=0x%x] %V", file, line, prefix, hr, msg.ptr(), "Out of memory");
+    return NULL;
+}
+#define set_python_error_from_runtime(ex, ...) runtime_error_as_python_error(PyExc_OSError, ex, __FILE__, __LINE__, __VA_ARGS__)
+

 struct Synthesizer {
    PyObject_HEAD
@ -52,130 +62,50 @@ ensure_current_thread_has_message_queue(void) {
    PeekMessage(&msg, NULL, WM_USER, WM_USER, PM_NOREMOVE);
 }

-/*
-class CreateRecording {
-private:
-    DWORD main_thread_id;
-    std::wstring error_msg;
-    winrt::Windows::Storage::Streams::DataReader reader{nullptr};
-    unsigned long long stream_size, bytes_read;
-
-public:
-    CreateRecording() : main_thread_id(0), error_msg(), reader(nullptr), stream_size(0), bytes_read(0) {
-        main_thread_id = GetCurrentThreadId();
-        ensure_current_thread_has_message_queue();
-    }
-    CreateRecording& operator = (const CreateRecording &) = delete;
-    CreateRecording(const CreateRecording&) = delete;
-
-    void record_plain_text(SpeechSynthesizer ^synth, const wchar_t* text, PyObject *callback, std::shared_ptr<CreateRecording> self) {
-        StringReference rtext(text);
-        create_task(synth->SynthesizeTextToStreamAsync(rtext.GetString()), task_continuation_context::use_current()).then(
-                [self](task<SpeechSynthesisStream^> s) { self->threaded_save_stream(s, self); });
-        this->run_loop(callback);
-        reader = winrt::Windows::Storage::Streams::DataReader{nullptr};
-    }
-
-    void record_ssml(SpeechSynthesizer ^synth, const wchar_t* text, PyObject *callback, std::shared_ptr<CreateRecording> self) {
-        StringReference rtext(text);
-        create_task(synth->SynthesizeSsmlToStreamAsync(rtext.GetString()), task_continuation_context::use_current()).then(
-                [self](task<SpeechSynthesisStream^> s) { self->threaded_save_stream(s, self); });
-        this->run_loop(callback);
-        reader = winrt::Windows::Storage::Streams::DataReader{nullptr};
-    }
-
-private:
-
-    void send_message_to_main_thread(bool done = false) const {
-        PostThreadMessageA(main_thread_id, WM_USER, 0, done ? 1 : 0);
-    }
-
-    void threaded_save_stream(task<SpeechSynthesisStream^> stream_task, std::shared_ptr<CreateRecording> self) {
-        try {
-            SpeechSynthesisStream^ stream = stream_task.get();
-            stream_size = stream->Size;
-            reader = winrt::Windows::Storage::Streams::DataReader(stream);
-            this->chunked_read(self);
-            return;
-        } catch(winrt::hresult_error const& ex) {
-            error_msg += L"Could not synthesize speech from text: ";
-            error_msg += ex.message().c_str();
-        }
-        this->send_message_to_main_thread(true);
-    }
-
-    void chunked_read(std::shared_ptr<CreateRecording> self) {
-        create_task(reader.LoadAsync(16 * 1024), task_continuation_context::use_current()).then(
-                [self](task<unsigned int> s) { self->threaded_dispatch_chunk(s, self); });
-    }
-
-    void threaded_dispatch_chunk(task<unsigned int> bytes_loaded, std::shared_ptr<CreateRecording> self) {
-        try {
-            unsigned int n = bytes_loaded.get();
-            bytes_read += n;
-            fprintf(stderr, "11111111 %u\n", n);
-            if (n > 0) {
-                this->send_message_to_main_thread();
-            }
-            if (bytes_read < stream_size) {
-                this->chunked_read(self);
-                return;
-            }
-        } catch(winrt::hresult_error const& ex) {
-            error_msg += L"Could not read data from synthesized speech stream: ";
-            error_msg += ex.message().c_str();
-        }
-        this->send_message_to_main_thread(true);
-    }
-
-    void run_loop(PyObject *callback) {
-        MSG msg;
-        while (true) {
-            BOOL ret = GetMessage(&msg, NULL, 0, 0);
-            if (ret == 0) { PyErr_SetString(PyExc_OSError, "WM_QUIT received"); return;  }
-            if (ret == -1) { PyErr_SetFromWindowsErr(0); return; }
-            if (msg.message == WM_USER) {
-                if (!this->commit_chunks(callback)) { break; }
-                if (msg.lParam == 1) break;
-            } else {
-                DispatchMessage(&msg);
-            }
-        }
-
-        if (error_msg.size() > 0) {
-            pyobject_raii err(PyUnicode_FromWideChar(error_msg.data(), -1));
-            PyErr_Format(PyExc_OSError, "%V", err.ptr(), "Could not create error message unicode object");
-            return;
-        }
-        this->commit_chunks(callback);
-    }
-
-    bool commit_chunks(PyObject *callback) {
-        // Platform::Array<byte> ^a;
-        // while ((a = queue.pop()) != nullptr) {
-        //     pyobject_raii ret(PyObject_CallFunction(callback, "y#", (const char*)a->Data, static_cast<Py_ssize_t>(a->Length)));
-        //     if (!ret) return false;
-        // }
-        return true;
-    }
-};
-
-
 static PyObject*
 Synthesizer_create_recording(Synthesizer *self, PyObject *args) {
    wchar_raii pytext;
    PyObject *callback;
-	if (!PyArg_ParseTuple(args, "O&O", py_to_wchar_no_none, &pytext, &callback)) return NULL;
+    int is_ssml = 0;
+	if (!PyArg_ParseTuple(args, "O&O|p", py_to_wchar_no_none, &pytext, &callback, &is_ssml)) return NULL;
    if (!PyCallable_Check(callback)) { PyErr_SetString(PyExc_TypeError, "callback must be callable"); return NULL; }
-    auto cr = std::make_shared<CreateRecording>();
-    cr->record_plain_text(self->synth, pytext.ptr(), callback, cr);
+
+    ensure_current_thread_has_message_queue();
+    SpeechSynthesisStream stream{nullptr};
+    try {
+        if (is_ssml) stream = self->synth.SynthesizeSsmlToStreamAsync(pytext.as_view()).get();
+        else stream = self->synth.SynthesizeTextToStreamAsync(pytext.as_view()).get();
+    } catch(winrt::hresult_error const& ex) {
+        return set_python_error_from_runtime(ex, "Failed to get SpeechSynthesisStream from text");
+    }
+    unsigned long long stream_size = stream.Size(), bytes_read = 0;
+    DataReader reader(stream);
+    unsigned int n;
+    const static unsigned int chunk_size = 16 * 1024;
+    while (bytes_read < stream_size) {
+        try {
+            n = reader.LoadAsync(chunk_size).get();
+        } catch(winrt::hresult_error const& ex) {
+            return set_python_error_from_runtime(ex, "Failed to load data from DataReader");
+        }
+        if (n > 0) {
+            bytes_read += n;
+            pyobject_raii b(PyBytes_FromStringAndSize(NULL, n));
+            if (!b) return NULL;
+            unsigned char *p = reinterpret_cast<unsigned char*>(PyBytes_AS_STRING(b.ptr()));
+            reader.ReadBytes(winrt::array_view(p, p + n));
+            pyobject_raii ret(PyObject_CallFunctionObjArgs(callback, b.ptr(), NULL));
+        }
+    }
+
    if (PyErr_Occurred()) return NULL;
    Py_RETURN_NONE;
 }
-*/
+

 static PyObject*
 voice_as_dict(VoiceInformation const& voice) {
+    try {
        const char *gender = "";
        switch (voice.Gender()) {
            case VoiceGender::Male: gender = "male"; break;
@ -188,16 +118,19 @@ voice_as_dict(VoiceInformation const& voice) {
            "language", voice.Language().c_str(),
            "gender", gender
        );
+    } catch(winrt::hresult_error const& ex) {
+        return set_python_error_from_runtime(ex);
+    }
 }


 static PyObject*
 all_voices(PyObject* /*self*/, PyObject* /*args*/) { INITIALIZE_COM_IN_FUNCTION
+    try {
        auto voices = SpeechSynthesizer::AllVoices();
        pyobject_raii ans(PyTuple_New(voices.Size()));
        if (!ans) return NULL;
        Py_ssize_t i = 0;
-    try {
        for(auto const& voice : voices) {
            PyObject *v = voice_as_dict(voice);
            if (v) {
@ -206,11 +139,10 @@ all_voices(PyObject* /*self*/, PyObject* /*args*/) { INITIALIZE_COM_IN_FUNCTION
                return NULL;
            }
        }
-    } catch(winrt::hresult_error const& ex) {
-        error_from_hresult(ex.to_abi(), "Failed to list all voices");
-        return NULL;
-    }
        return ans.detach();
+    } catch(winrt::hresult_error const& ex) {
+        return set_python_error_from_runtime(ex);
+    }
 }

 static PyObject*
@ -218,14 +150,13 @@ default_voice(PyObject* /*self*/, PyObject* /*args*/) { INITIALIZE_COM_IN_FUNCTI
    try {
        return voice_as_dict(SpeechSynthesizer::DefaultVoice());
    } catch(winrt::hresult_error const& ex) {
-        error_from_hresult(ex.to_abi(), "Failed to list all voices");
-        return NULL;
+        return set_python_error_from_runtime(ex);
    }
 }

 #define M(name, args) { #name, (PyCFunction)Synthesizer_##name, args, ""}
 static PyMethodDef Synthesizer_methods[] = {
-    // M(create_recording, METH_VARARGS),
+    M(create_recording, METH_VARARGS),
    {NULL, NULL, 0, NULL}
 };
 #undef M