From 882d56549157bb3bd95589d0242a2e3bbcb72648 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 12 Jan 2023 17:30:40 +0530 Subject: [PATCH] Method to save speech as WAV data --- src/calibre/utils/windows/winspeech.cpp | 81 +++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/src/calibre/utils/windows/winspeech.cpp b/src/calibre/utils/windows/winspeech.cpp index 9e811e9c4a..b468742092 100644 --- a/src/calibre/utils/windows/winspeech.cpp +++ b/src/calibre/utils/windows/winspeech.cpp @@ -6,8 +6,10 @@ */ #include "common.h" +#include #include #include +#include #include #include #include @@ -17,6 +19,15 @@ using namespace Windows::Foundation; using namespace Windows::Foundation::Collections; using namespace Windows::Media::SpeechSynthesis; using namespace Windows::Storage::Streams; +using namespace Platform; +using namespace Concurrency; + +// static void +// wait_for_async( Windows::Foundation::IAsyncInfo ^op ) { +// while(op->Status == Windows::Foundation::AsyncStatus::Started) { +// CoreWindow::GetForCurrentThread()->Dispatcher->ProcessEvents(CoreProcessEventsOption::ProcessAllIfPresent); +// } +// } typedef struct { PyObject_HEAD @@ -44,6 +55,75 @@ Synthesizer_dealloc(Synthesizer *self) { CoUninitialize(); } +#define WM_DONE (WM_USER + 0) + +static void +ensure_current_thread_has_message_queue(void) { + MSG msg; + PeekMessage(&msg, NULL, WM_USER, WM_USER, PM_NOREMOVE); +} + +static bool +send_done_message_to_thread(DWORD thread_id) { + return PostThreadMessageA(thread_id, WM_DONE, 0, 0); +} + +static bool +pump_till_done(void) { + MSG msg; + while (true) { + BOOL ret = GetMessage(&msg, NULL, 0, 0); + if (ret == 0) { PyErr_SetString(PyExc_OSError, "WM_QUIT received"); return false; } // WM_QUIT + if (ret == -1) { PyErr_SetFromWindowsErr(0); return false; } + if (msg.message == WM_DONE) { + break; + } + DispatchMessage(&msg); + } + return true; +} + +static PyObject* +Synthesizer_create_recording(Synthesizer *self, PyObject *args) { + wchar_raii pytext; + if (!PyArg_ParseTuple(args, "O&", py_to_wchar_no_none, &pytext)) return NULL; + StringReference text(pytext.ptr()); + bool error_ocurred = false; + HRESULT hr = S_OK; + std::array error_msg; + DataReader ^reader = nullptr; + DWORD main_thread_id = GetCurrentThreadId(); + unsigned long long stream_size; + unsigned int bytes_read; + + create_task(self->synth->SynthesizeTextToStreamAsync(text.GetString()), task_continuation_context::use_current() + ).then([&reader, &stream_size](task stream_task) { + SpeechSynthesisStream^ stream = stream_task.get(); + stream_size = stream->Size; + reader = ref new DataReader(stream); + return reader->LoadAsync((unsigned int)stream_size); + }).then([main_thread_id, &bytes_read, &error_msg, &error_ocurred, &reader](task bytes_read_task) { + try { + bytes_read = bytes_read_task.get(); + } catch (Exception ^ex) { + std::swprintf(error_msg.data(), error_msg.size(), L"Could not synthesize speech from text: %ls", ex->Message->Data()); + error_ocurred = true; + } + send_done_message_to_thread(main_thread_id); + }); + + if (!pump_till_done()) return NULL; + + if (error_ocurred) { + pyobject_raii err(PyUnicode_FromWideChar(error_msg.data(), -1)); + PyErr_Format(PyExc_OSError, "%V", err.ptr(), "Could not create error message unicode object"); + return NULL; + } + auto data = ref new Platform::Array(bytes_read); + reader->ReadBytes(data); + return PyBytes_FromStringAndSize((const char*)data->Data, bytes_read); +} + static PyObject* voice_as_dict(VoiceInformation ^voice) { const char *gender = ""; @@ -84,6 +164,7 @@ default_voice(PyObject* /*self*/, PyObject* /*args*/) { INITIALIZE_COM_IN_FUNCTI #define M(name, args) { #name, (PyCFunction)Synthesizer_##name, args, ""} static PyMethodDef Synthesizer_methods[] = { + M(create_recording, METH_VARARGS), {NULL, NULL, 0, NULL} }; #undef M