diff --git a/setup/extensions.json b/setup/extensions.json index 6161b71d3f..b27071bf5b 100644 --- a/setup/extensions.json +++ b/setup/extensions.json @@ -190,7 +190,7 @@ "headers": "calibre/utils/cpp_binding.h calibre/utils/windows/common.h", "sources": "calibre/utils/windows/winspeech.cpp", "libraries": "WindowsApp", - "cflags": "/X /std:c++17 /ZW /bigobj /await /permissive- /WX /Zc:twoPhase-" + "cflags": "/X /std:c++17 /bigobj /await /permissive- /WX /Zc:twoPhase-" }, { "name": "wpd", diff --git a/src/calibre/utils/windows/winspeech.cpp b/src/calibre/utils/windows/winspeech.cpp index b468742092..3ffe5a15d8 100644 --- a/src/calibre/utils/windows/winspeech.cpp +++ b/src/calibre/utils/windows/winspeech.cpp @@ -7,32 +7,23 @@ #include "common.h" #include -#include +#include +#include #include -#include #include -#include -#include -#include +#include +#include -using namespace Windows::Foundation; -using namespace Windows::Foundation::Collections; -using namespace Windows::Media::SpeechSynthesis; -using namespace Windows::Storage::Streams; -using namespace Platform; -using namespace Concurrency; +using namespace winrt::Windows::Foundation; +using namespace winrt::Windows::Foundation::Collections; +using namespace winrt::Windows::Media::SpeechSynthesis; +using namespace winrt::Windows::Storage::Streams; -// static void -// wait_for_async( Windows::Foundation::IAsyncInfo ^op ) { -// while(op->Status == Windows::Foundation::AsyncStatus::Started) { -// CoreWindow::GetForCurrentThread()->Dispatcher->ProcessEvents(CoreProcessEventsOption::ProcessAllIfPresent); -// } -// } -typedef struct { +struct Synthesizer { PyObject_HEAD - SpeechSynthesizer ^synth; -} Synthesizer; + SpeechSynthesizer synth{nullptr}; +}; static PyTypeObject SynthesizerType = { @@ -43,7 +34,7 @@ static PyObject * Synthesizer_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { INITIALIZE_COM_IN_FUNCTION Synthesizer *self = (Synthesizer *) type->tp_alloc(type, 0); if (self) { - self->synth = ref new SpeechSynthesizer(); + self->synth = SpeechSynthesizer(); } if (self && !PyErr_Occurred()) com.detach(); return (PyObject*)self; @@ -51,120 +42,190 @@ Synthesizer_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { INITIALIZE static void Synthesizer_dealloc(Synthesizer *self) { - self->synth = nullptr; + self->synth = SpeechSynthesizer{nullptr}; CoUninitialize(); } -#define WM_DONE (WM_USER + 0) - static void ensure_current_thread_has_message_queue(void) { MSG msg; PeekMessage(&msg, NULL, WM_USER, WM_USER, PM_NOREMOVE); } -static bool -send_done_message_to_thread(DWORD thread_id) { - return PostThreadMessageA(thread_id, WM_DONE, 0, 0); -} +/* +class CreateRecording { +private: + DWORD main_thread_id; + std::wstring error_msg; + winrt::Windows::Storage::Streams::DataReader reader{nullptr}; + unsigned long long stream_size, bytes_read; -static bool -pump_till_done(void) { - MSG msg; - while (true) { - BOOL ret = GetMessage(&msg, NULL, 0, 0); - if (ret == 0) { PyErr_SetString(PyExc_OSError, "WM_QUIT received"); return false; } // WM_QUIT - if (ret == -1) { PyErr_SetFromWindowsErr(0); return false; } - if (msg.message == WM_DONE) { - break; - } - DispatchMessage(&msg); +public: + CreateRecording() : main_thread_id(0), error_msg(), reader(nullptr), stream_size(0), bytes_read(0) { + main_thread_id = GetCurrentThreadId(); + ensure_current_thread_has_message_queue(); } - return true; -} + CreateRecording& operator = (const CreateRecording &) = delete; + CreateRecording(const CreateRecording&) = delete; + + void record_plain_text(SpeechSynthesizer ^synth, const wchar_t* text, PyObject *callback, std::shared_ptr self) { + StringReference rtext(text); + create_task(synth->SynthesizeTextToStreamAsync(rtext.GetString()), task_continuation_context::use_current()).then( + [self](task s) { self->threaded_save_stream(s, self); }); + this->run_loop(callback); + reader = winrt::Windows::Storage::Streams::DataReader{nullptr}; + } + + void record_ssml(SpeechSynthesizer ^synth, const wchar_t* text, PyObject *callback, std::shared_ptr self) { + StringReference rtext(text); + create_task(synth->SynthesizeSsmlToStreamAsync(rtext.GetString()), task_continuation_context::use_current()).then( + [self](task s) { self->threaded_save_stream(s, self); }); + this->run_loop(callback); + reader = winrt::Windows::Storage::Streams::DataReader{nullptr}; + } + +private: + + void send_message_to_main_thread(bool done = false) const { + PostThreadMessageA(main_thread_id, WM_USER, 0, done ? 1 : 0); + } + + void threaded_save_stream(task stream_task, std::shared_ptr self) { + try { + SpeechSynthesisStream^ stream = stream_task.get(); + stream_size = stream->Size; + reader = winrt::Windows::Storage::Streams::DataReader(stream); + this->chunked_read(self); + return; + } catch(winrt::hresult_error const& ex) { + error_msg += L"Could not synthesize speech from text: "; + error_msg += ex.message().c_str(); + } + this->send_message_to_main_thread(true); + } + + void chunked_read(std::shared_ptr self) { + create_task(reader.LoadAsync(16 * 1024), task_continuation_context::use_current()).then( + [self](task s) { self->threaded_dispatch_chunk(s, self); }); + } + + void threaded_dispatch_chunk(task bytes_loaded, std::shared_ptr self) { + try { + unsigned int n = bytes_loaded.get(); + bytes_read += n; + fprintf(stderr, "11111111 %u\n", n); + if (n > 0) { + this->send_message_to_main_thread(); + } + if (bytes_read < stream_size) { + this->chunked_read(self); + return; + } + } catch(winrt::hresult_error const& ex) { + error_msg += L"Could not read data from synthesized speech stream: "; + error_msg += ex.message().c_str(); + } + this->send_message_to_main_thread(true); + } + + void run_loop(PyObject *callback) { + MSG msg; + while (true) { + BOOL ret = GetMessage(&msg, NULL, 0, 0); + if (ret == 0) { PyErr_SetString(PyExc_OSError, "WM_QUIT received"); return; } + if (ret == -1) { PyErr_SetFromWindowsErr(0); return; } + if (msg.message == WM_USER) { + if (!this->commit_chunks(callback)) { break; } + if (msg.lParam == 1) break; + } else { + DispatchMessage(&msg); + } + } + + if (error_msg.size() > 0) { + pyobject_raii err(PyUnicode_FromWideChar(error_msg.data(), -1)); + PyErr_Format(PyExc_OSError, "%V", err.ptr(), "Could not create error message unicode object"); + return; + } + this->commit_chunks(callback); + } + + bool commit_chunks(PyObject *callback) { + // Platform::Array ^a; + // while ((a = queue.pop()) != nullptr) { + // pyobject_raii ret(PyObject_CallFunction(callback, "y#", (const char*)a->Data, static_cast(a->Length))); + // if (!ret) return false; + // } + return true; + } +}; + static PyObject* Synthesizer_create_recording(Synthesizer *self, PyObject *args) { wchar_raii pytext; - if (!PyArg_ParseTuple(args, "O&", py_to_wchar_no_none, &pytext)) return NULL; - StringReference text(pytext.ptr()); - bool error_ocurred = false; - HRESULT hr = S_OK; - std::array error_msg; - DataReader ^reader = nullptr; - DWORD main_thread_id = GetCurrentThreadId(); - unsigned long long stream_size; - unsigned int bytes_read; - - create_task(self->synth->SynthesizeTextToStreamAsync(text.GetString()), task_continuation_context::use_current() - ).then([&reader, &stream_size](task stream_task) { - SpeechSynthesisStream^ stream = stream_task.get(); - stream_size = stream->Size; - reader = ref new DataReader(stream); - return reader->LoadAsync((unsigned int)stream_size); - }).then([main_thread_id, &bytes_read, &error_msg, &error_ocurred, &reader](task bytes_read_task) { - try { - bytes_read = bytes_read_task.get(); - } catch (Exception ^ex) { - std::swprintf(error_msg.data(), error_msg.size(), L"Could not synthesize speech from text: %ls", ex->Message->Data()); - error_ocurred = true; - } - send_done_message_to_thread(main_thread_id); - }); - - if (!pump_till_done()) return NULL; - - if (error_ocurred) { - pyobject_raii err(PyUnicode_FromWideChar(error_msg.data(), -1)); - PyErr_Format(PyExc_OSError, "%V", err.ptr(), "Could not create error message unicode object"); - return NULL; - } - auto data = ref new Platform::Array(bytes_read); - reader->ReadBytes(data); - return PyBytes_FromStringAndSize((const char*)data->Data, bytes_read); + PyObject *callback; + if (!PyArg_ParseTuple(args, "O&O", py_to_wchar_no_none, &pytext, &callback)) return NULL; + if (!PyCallable_Check(callback)) { PyErr_SetString(PyExc_TypeError, "callback must be callable"); return NULL; } + auto cr = std::make_shared(); + cr->record_plain_text(self->synth, pytext.ptr(), callback, cr); + if (PyErr_Occurred()) return NULL; + Py_RETURN_NONE; } +*/ static PyObject* -voice_as_dict(VoiceInformation ^voice) { +voice_as_dict(VoiceInformation const& voice) { const char *gender = ""; - switch (voice->Gender) { + switch (voice.Gender()) { case VoiceGender::Male: gender = "male"; break; case VoiceGender::Female: gender = "female"; break; } return Py_BuildValue("{su su su su ss}", - "display_name", voice->DisplayName? voice->DisplayName->Data() : NULL, - "description", voice->Description ? voice->Description->Data() : NULL, - "id", voice->Id ? voice->Id->Data(): NULL, - "language", voice->Language ? voice->Language->Data() : NULL, + "display_name", voice.DisplayName().c_str(), + "description", voice.Description().c_str(), + "id", voice.Id().c_str(), + "language", voice.Language().c_str(), "gender", gender ); } + static PyObject* all_voices(PyObject* /*self*/, PyObject* /*args*/) { INITIALIZE_COM_IN_FUNCTION - IVectorView^ voices = SpeechSynthesizer::AllVoices; - pyobject_raii ans(PyTuple_New(voices->Size)); + auto voices = SpeechSynthesizer::AllVoices(); + pyobject_raii ans(PyTuple_New(voices.Size())); if (!ans) return NULL; Py_ssize_t i = 0; - for(auto voice : voices) { - PyObject *v = voice_as_dict(voice); - if (v) { - PyTuple_SET_ITEM(ans.ptr(), i++, v); - } else { - return NULL; + try { + for(auto const& voice : voices) { + PyObject *v = voice_as_dict(voice); + if (v) { + PyTuple_SET_ITEM(ans.ptr(), i++, v); + } else { + return NULL; + } } + } catch(winrt::hresult_error const& ex) { + error_from_hresult(ex.to_abi(), "Failed to list all voices"); + return NULL; } return ans.detach(); } static PyObject* default_voice(PyObject* /*self*/, PyObject* /*args*/) { INITIALIZE_COM_IN_FUNCTION - return voice_as_dict(SpeechSynthesizer::DefaultVoice); + try { + return voice_as_dict(SpeechSynthesizer::DefaultVoice()); + } catch(winrt::hresult_error const& ex) { + error_from_hresult(ex.to_abi(), "Failed to list all voices"); + return NULL; + } } #define M(name, args) { #name, (PyCFunction)Synthesizer_##name, args, ""} static PyMethodDef Synthesizer_methods[] = { - M(create_recording, METH_VARARGS), + // M(create_recording, METH_VARARGS), {NULL, NULL, 0, NULL} }; #undef M