Switch to C++/WinRT from C++/CX

2025-08-11 09:13:57 -04:00 · 2023-01-13 14:52:24 +05:30 · 2023-01-13 14:52:24 +05:30 · c7468a5f9a
commit c7468a5f9a
parent 4c89a7e697
2 changed files with 155 additions and 94 deletions
--- a/setup/extensions.json
+++ b/setup/extensions.json
@ -190,7 +190,7 @@
        "headers": "calibre/utils/cpp_binding.h calibre/utils/windows/common.h",
        "sources": "calibre/utils/windows/winspeech.cpp",
        "libraries": "WindowsApp",
-        "cflags": "/X /std:c++17 /ZW /bigobj /await /permissive- /WX /Zc:twoPhase-"
+        "cflags": "/X /std:c++17 /bigobj /await /permissive- /WX /Zc:twoPhase-"
    },
    {
        "name": "wpd",
--- a/src/calibre/utils/windows/winspeech.cpp
+++ b/src/calibre/utils/windows/winspeech.cpp
@ -7,32 +7,23 @@
 #include "common.h"

 #include <array>
-#include <collection.h>
+#include <deque>
+#include <memory>
 #include <winrt/base.h>
-#include <ppltasks.h>
 #include <winrt/Windows.Foundation.Collections.h>
-#include <windows.foundation.h>
-#include <windows.media.speechsynthesis.h>
-#include <windows.storage.streams.h>
+#include <winrt/Windows.Storage.Streams.h>
+#include <winrt/Windows.Media.SpeechSynthesis.h>

-using namespace Windows::Foundation;
-using namespace Windows::Foundation::Collections;
-using namespace Windows::Media::SpeechSynthesis;
-using namespace Windows::Storage::Streams;
-using namespace Platform;
-using namespace Concurrency;
+using namespace winrt::Windows::Foundation;
+using namespace winrt::Windows::Foundation::Collections;
+using namespace winrt::Windows::Media::SpeechSynthesis;
+using namespace winrt::Windows::Storage::Streams;

-// static void
-// wait_for_async( Windows::Foundation::IAsyncInfo ^op ) {
-//     while(op->Status == Windows::Foundation::AsyncStatus::Started) {
-//         CoreWindow::GetForCurrentThread()->Dispatcher->ProcessEvents(CoreProcessEventsOption::ProcessAllIfPresent);
-//     }
-// }

-typedef struct {
+struct Synthesizer {
    PyObject_HEAD
-    SpeechSynthesizer ^synth;
-} Synthesizer;
+    SpeechSynthesizer synth{nullptr};
+};


 static PyTypeObject SynthesizerType = {
@ -43,7 +34,7 @@ static PyObject *
 Synthesizer_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { INITIALIZE_COM_IN_FUNCTION
 	Synthesizer *self = (Synthesizer *) type->tp_alloc(type, 0);
    if (self) {
-        self->synth = ref new SpeechSynthesizer();
+        self->synth = SpeechSynthesizer();
    }
    if (self && !PyErr_Occurred()) com.detach();
    return (PyObject*)self;
@ -51,102 +42,163 @@ Synthesizer_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { INITIALIZE

 static void
 Synthesizer_dealloc(Synthesizer *self) {
-    self->synth = nullptr;
+    self->synth = SpeechSynthesizer{nullptr};
    CoUninitialize();
 }

-#define WM_DONE (WM_USER + 0)
-
 static void
 ensure_current_thread_has_message_queue(void) {
    MSG msg;
    PeekMessage(&msg, NULL, WM_USER, WM_USER, PM_NOREMOVE);
 }

-static bool
-send_done_message_to_thread(DWORD thread_id) {
-    return PostThreadMessageA(thread_id, WM_DONE, 0, 0);
+/*
+class CreateRecording {
+private:
+    DWORD main_thread_id;
+    std::wstring error_msg;
+    winrt::Windows::Storage::Streams::DataReader reader{nullptr};
+    unsigned long long stream_size, bytes_read;
+
+public:
+    CreateRecording() : main_thread_id(0), error_msg(), reader(nullptr), stream_size(0), bytes_read(0) {
+        main_thread_id = GetCurrentThreadId();
+        ensure_current_thread_has_message_queue();
+    }
+    CreateRecording& operator = (const CreateRecording &) = delete;
+    CreateRecording(const CreateRecording&) = delete;
+
+    void record_plain_text(SpeechSynthesizer ^synth, const wchar_t* text, PyObject *callback, std::shared_ptr<CreateRecording> self) {
+        StringReference rtext(text);
+        create_task(synth->SynthesizeTextToStreamAsync(rtext.GetString()), task_continuation_context::use_current()).then(
+                [self](task<SpeechSynthesisStream^> s) { self->threaded_save_stream(s, self); });
+        this->run_loop(callback);
+        reader = winrt::Windows::Storage::Streams::DataReader{nullptr};
    }

-static bool
-pump_till_done(void) {
+    void record_ssml(SpeechSynthesizer ^synth, const wchar_t* text, PyObject *callback, std::shared_ptr<CreateRecording> self) {
+        StringReference rtext(text);
+        create_task(synth->SynthesizeSsmlToStreamAsync(rtext.GetString()), task_continuation_context::use_current()).then(
+                [self](task<SpeechSynthesisStream^> s) { self->threaded_save_stream(s, self); });
+        this->run_loop(callback);
+        reader = winrt::Windows::Storage::Streams::DataReader{nullptr};
+    }
+
+private:
+
+    void send_message_to_main_thread(bool done = false) const {
+        PostThreadMessageA(main_thread_id, WM_USER, 0, done ? 1 : 0);
+    }
+
+    void threaded_save_stream(task<SpeechSynthesisStream^> stream_task, std::shared_ptr<CreateRecording> self) {
+        try {
+            SpeechSynthesisStream^ stream = stream_task.get();
+            stream_size = stream->Size;
+            reader = winrt::Windows::Storage::Streams::DataReader(stream);
+            this->chunked_read(self);
+            return;
+        } catch(winrt::hresult_error const& ex) {
+            error_msg += L"Could not synthesize speech from text: ";
+            error_msg += ex.message().c_str();
+        }
+        this->send_message_to_main_thread(true);
+    }
+
+    void chunked_read(std::shared_ptr<CreateRecording> self) {
+        create_task(reader.LoadAsync(16 * 1024), task_continuation_context::use_current()).then(
+                [self](task<unsigned int> s) { self->threaded_dispatch_chunk(s, self); });
+    }
+
+    void threaded_dispatch_chunk(task<unsigned int> bytes_loaded, std::shared_ptr<CreateRecording> self) {
+        try {
+            unsigned int n = bytes_loaded.get();
+            bytes_read += n;
+            fprintf(stderr, "11111111 %u\n", n);
+            if (n > 0) {
+                this->send_message_to_main_thread();
+            }
+            if (bytes_read < stream_size) {
+                this->chunked_read(self);
+                return;
+            }
+        } catch(winrt::hresult_error const& ex) {
+            error_msg += L"Could not read data from synthesized speech stream: ";
+            error_msg += ex.message().c_str();
+        }
+        this->send_message_to_main_thread(true);
+    }
+
+    void run_loop(PyObject *callback) {
        MSG msg;
        while (true) {
            BOOL ret = GetMessage(&msg, NULL, 0, 0);
-        if (ret == 0) { PyErr_SetString(PyExc_OSError, "WM_QUIT received"); return false; } // WM_QUIT
-        if (ret == -1) { PyErr_SetFromWindowsErr(0); return false; }
-		if (msg.message == WM_DONE) {
-            break;
-        }
+            if (ret == 0) { PyErr_SetString(PyExc_OSError, "WM_QUIT received"); return;  }
+            if (ret == -1) { PyErr_SetFromWindowsErr(0); return; }
+            if (msg.message == WM_USER) {
+                if (!this->commit_chunks(callback)) { break; }
+                if (msg.lParam == 1) break;
+            } else {
                DispatchMessage(&msg);
            }
+        }
+
+        if (error_msg.size() > 0) {
+            pyobject_raii err(PyUnicode_FromWideChar(error_msg.data(), -1));
+            PyErr_Format(PyExc_OSError, "%V", err.ptr(), "Could not create error message unicode object");
+            return;
+        }
+        this->commit_chunks(callback);
+    }
+
+    bool commit_chunks(PyObject *callback) {
+        // Platform::Array<byte> ^a;
+        // while ((a = queue.pop()) != nullptr) {
+        //     pyobject_raii ret(PyObject_CallFunction(callback, "y#", (const char*)a->Data, static_cast<Py_ssize_t>(a->Length)));
+        //     if (!ret) return false;
+        // }
        return true;
    }
+};
+

 static PyObject*
 Synthesizer_create_recording(Synthesizer *self, PyObject *args) {
    wchar_raii pytext;
-	if (!PyArg_ParseTuple(args, "O&", py_to_wchar_no_none, &pytext)) return NULL;
-    StringReference text(pytext.ptr());
-    bool error_ocurred = false;
-    HRESULT hr = S_OK;
-    std::array<wchar_t, 2048> error_msg;
-    DataReader ^reader = nullptr;
-    DWORD main_thread_id = GetCurrentThreadId();
-    unsigned long long stream_size;
-    unsigned int bytes_read;
-
-    create_task(self->synth->SynthesizeTextToStreamAsync(text.GetString()), task_continuation_context::use_current()
-    ).then([&reader, &stream_size](task<SpeechSynthesisStream^> stream_task) {
-        SpeechSynthesisStream^ stream = stream_task.get();
-        stream_size = stream->Size;
-        reader = ref new DataReader(stream);
-        return reader->LoadAsync((unsigned int)stream_size);
-    }).then([main_thread_id, &bytes_read, &error_msg, &error_ocurred, &reader](task<unsigned int> bytes_read_task) {
-        try {
-            bytes_read = bytes_read_task.get();
-        } catch (Exception ^ex) {
-            std::swprintf(error_msg.data(), error_msg.size(), L"Could not synthesize speech from text: %ls", ex->Message->Data());
-            error_ocurred = true;
-        }
-        send_done_message_to_thread(main_thread_id);
-    });
-
-    if (!pump_till_done()) return NULL;
-
-    if (error_ocurred) {
-        pyobject_raii err(PyUnicode_FromWideChar(error_msg.data(), -1));
-        PyErr_Format(PyExc_OSError, "%V", err.ptr(), "Could not create error message unicode object");
-        return NULL;
-    }
-    auto data = ref new Platform::Array<byte>(bytes_read);
-    reader->ReadBytes(data);
-    return PyBytes_FromStringAndSize((const char*)data->Data, bytes_read);
+    PyObject *callback;
+	if (!PyArg_ParseTuple(args, "O&O", py_to_wchar_no_none, &pytext, &callback)) return NULL;
+    if (!PyCallable_Check(callback)) { PyErr_SetString(PyExc_TypeError, "callback must be callable"); return NULL; }
+    auto cr = std::make_shared<CreateRecording>();
+    cr->record_plain_text(self->synth, pytext.ptr(), callback, cr);
+    if (PyErr_Occurred()) return NULL;
+    Py_RETURN_NONE;
 }
+*/

 static PyObject*
-voice_as_dict(VoiceInformation ^voice) {
+voice_as_dict(VoiceInformation const& voice) {
    const char *gender = "";
-    switch (voice->Gender) {
+    switch (voice.Gender()) {
        case VoiceGender::Male: gender = "male"; break;
        case VoiceGender::Female: gender = "female"; break;
    }
    return Py_BuildValue("{su su su su ss}",
-        "display_name", voice->DisplayName? voice->DisplayName->Data() : NULL,
-        "description", voice->Description ? voice->Description->Data() : NULL,
-        "id", voice->Id ? voice->Id->Data(): NULL,
-        "language", voice->Language ? voice->Language->Data() : NULL,
+        "display_name", voice.DisplayName().c_str(),
+        "description", voice.Description().c_str(),
+        "id", voice.Id().c_str(),
+        "language", voice.Language().c_str(),
        "gender", gender
    );
 }

+
 static PyObject*
 all_voices(PyObject* /*self*/, PyObject* /*args*/) { INITIALIZE_COM_IN_FUNCTION
-    IVectorView<VoiceInformation^>^ voices = SpeechSynthesizer::AllVoices;
-    pyobject_raii ans(PyTuple_New(voices->Size));
+    auto voices = SpeechSynthesizer::AllVoices();
+    pyobject_raii ans(PyTuple_New(voices.Size()));
    if (!ans) return NULL;
    Py_ssize_t i = 0;
-    for(auto voice : voices) {
+    try {
+        for(auto const& voice : voices) {
            PyObject *v = voice_as_dict(voice);
            if (v) {
                PyTuple_SET_ITEM(ans.ptr(), i++, v);
@ -154,17 +206,26 @@ all_voices(PyObject* /*self*/, PyObject* /*args*/) { INITIALIZE_COM_IN_FUNCTION
                return NULL;
            }
        }
+    } catch(winrt::hresult_error const& ex) {
+        error_from_hresult(ex.to_abi(), "Failed to list all voices");
+        return NULL;
+    }
    return ans.detach();
 }

 static PyObject*
 default_voice(PyObject* /*self*/, PyObject* /*args*/) { INITIALIZE_COM_IN_FUNCTION
-    return voice_as_dict(SpeechSynthesizer::DefaultVoice);
+    try {
+        return voice_as_dict(SpeechSynthesizer::DefaultVoice());
+    } catch(winrt::hresult_error const& ex) {
+        error_from_hresult(ex.to_abi(), "Failed to list all voices");
+        return NULL;
+    }
 }

 #define M(name, args) { #name, (PyCFunction)Synthesizer_##name, args, ""}
 static PyMethodDef Synthesizer_methods[] = {
-    M(create_recording, METH_VARARGS),
+    // M(create_recording, METH_VARARGS),
    {NULL, NULL, 0, NULL}
 };
 #undef M