diff --git a/src/calibre/utils/windows/winspeech.cpp b/src/calibre/utils/windows/winspeech.cpp index 291eee17d6..edd8b0c2e8 100644 --- a/src/calibre/utils/windows/winspeech.cpp +++ b/src/calibre/utils/windows/winspeech.cpp @@ -8,7 +8,10 @@ #include #include +#include #include +#include +#include #include #include #include @@ -44,15 +47,26 @@ typedef uint64_t id_type; static std::mutex output_lock; static DWORD main_thread_id; -template static void -__debug_multiple(T x, Args... args) { - std::cerr << x << " "; - __debug_multiple(args...); +template static void +__debug_multiple_impl(T x) { + if constexpr (std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v) { + std::cerr << winrt::to_string(x); + } else { + std::cerr << x; + } } template static void __debug_multiple(T x) { - std::cerr << x << std::endl; + __debug_multiple_impl(x); + std::cerr << std::endl; +} + +template static void +__debug_multiple(T x, Args... args) { + __debug_multiple_impl(x); + std::cerr << " "; + __debug_multiple(args...); } template static void @@ -71,14 +85,6 @@ enum { EXIT_REQUESTED }; -// trim from end (in place) -static inline void -rtrim(std::string &s) { - s.erase(std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { - return !std::isspace(ch); - }).base(), s.end()); -} - static std::vector split(std::wstring_view const &src, std::wstring const &delim = L" ") { size_t pos; @@ -350,6 +356,8 @@ output_error(id_type cmd_id, std::string_view const &msg, std::string_view const #define CATCH_ALL_EXCEPTIONS(msg, cmd_id) \ catch(winrt::hresult_error const& ex) { \ output_error(cmd_id, msg, winrt::to_string(ex.message()), __LINE__, ex.to_abi()); \ +} catch(const std::system_error& ex) { \ + output_error(cmd_id, msg, "system_error with code: " + std::to_string(ex.code().value()) + " and meaning: " + ex.what(), __LINE__); \ } catch (std::exception const &ex) { \ output_error(cmd_id, msg, ex.what(), __LINE__); \ } catch (std::string const &ex) { \ @@ -360,369 +368,6 @@ output_error(id_type cmd_id, std::string_view const &msg, std::string_view const output_error(cmd_id, msg, "Unknown exception type was raised", __LINE__); \ } -/* Legacy code {{{ - -template -class WeakRefs { - private: - std::mutex weak_ref_lock; - std::unordered_map refs; - id_type counter; - public: - id_type register_ref(T *self) { - std::scoped_lock lock(weak_ref_lock); - auto id = ++counter; - refs[id] = self; - return id; - } - void unregister_ref(T* self) { - std::scoped_lock lock(weak_ref_lock); - auto id = self->clear_id(); - refs.erase(id); - self->~T(); - } - void use_ref(id_type id, std::function callback) { - std::scoped_lock lock(weak_ref_lock); - try { - callback(refs.at(id)); - } catch (std::out_of_range) { - callback(NULL); - } - } -}; - -enum class EventType { - playback_state_changed = 1, media_opened, media_failed, media_ended, source_changed, cue_entered, cue_exited, track_failed -}; - -class Event { - private: - EventType type; - public: - Event(EventType type) : type(type) {} - Event(const Event &source) : type(source.type) {} -}; - -class SynthesizerImplementation { - private: - id_type id; - DWORD creation_thread_id; - SpeechSynthesizer synth{nullptr}; - MediaPlayer player{nullptr}; - MediaSource current_source{nullptr}; - SpeechSynthesisStream current_stream{nullptr}; - MediaPlaybackItem currently_playing{nullptr}; - - struct { - MediaPlaybackSession::PlaybackStateChanged_revoker playback_state_changed; - MediaPlayer::MediaEnded_revoker media_ended; MediaPlayer::MediaOpened_revoker media_opened; - MediaPlayer::MediaFailed_revoker media_failed; MediaPlayer::SourceChanged_revoker source_changed; - - MediaPlaybackItem::TimedMetadataTracksChanged_revoker timed_metadata_tracks_changed; - std::vector cue_entered; - std::vector cue_exited; - std::vector track_failed; - } revoker; - - std::vector events; - std::mutex events_lock; - - public: - SynthesizerImplementation(); - - void add_simple_event(EventType type) { - try { - std::scoped_lock lock(events_lock); - events.emplace_back(type); - } catch(...) {} - } - - SpeechSynthesisStream synthesize(const std::wstring_view &text, bool is_ssml = false) { - if (is_ssml) return synth.SynthesizeSsmlToStreamAsync(text).get(); - return synth.SynthesizeTextToStreamAsync(text).get(); - } - - void speak(const std::wstring_view &text, bool is_ssml = false) { - revoker.cue_entered.clear(); - revoker.cue_exited.clear(); - revoker.track_failed.clear(); - current_stream = synthesize(text, is_ssml); - current_source = MediaSource::CreateFromStream(current_stream, current_stream.ContentType()); - currently_playing = MediaPlaybackItem(current_source); - auto self_id = id; - revoker.timed_metadata_tracks_changed = currently_playing.TimedMetadataTracksChanged(winrt::auto_revoke, [self_id](auto, auto const &args) { - auto change_type = args.CollectionChange(); - auto index = args.Index(); - synthesizer_weakrefs.use_ref(self_id, [change_type, index](auto s) { - if (!s) return; - switch (change_type) { - case CollectionChange::ItemInserted: { - s->register_metadata_handler_for_speech(s->currently_playing.TimedMetadataTracks().GetAt(index)); - } break; - case CollectionChange::Reset: - for (auto const& track : s->currently_playing.TimedMetadataTracks()) { - s->register_metadata_handler_for_speech(track); - } - break; - }}); - }); - player.Source(currently_playing); - for (auto const &track : currently_playing.TimedMetadataTracks()) { - register_metadata_handler_for_speech(track); - } - } - - bool is_creation_thread() const noexcept { - return creation_thread_id == GetCurrentThreadId(); - } - - id_type clear_id() noexcept { - auto ans = id; - id = 0; - return ans; - } - - void register_metadata_handler_for_speech(TimedMetadataTrack const& track) { - fprintf(stderr, "99999999999 registering metadata handler\n"); - auto self_id = id; -#define simple_event_listener(method, event_type) \ - revoker.event_type.push_back(method(winrt::auto_revoke, [self_id](auto, const auto&) { \ - fprintf(stderr, "111111111 %s %u\n", #event_type, GetCurrentThreadId()); fflush(stderr); \ - synthesizer_weakrefs.use_ref(self_id, [](auto s) { \ - if (!s) return; \ - s->add_simple_event(EventType::event_type); \ - fprintf(stderr, "2222222222 %d\n", s->player.PlaybackSession().PlaybackState()); \ - }); \ - })); - simple_event_listener(track.CueEntered, cue_entered); - simple_event_listener(track.CueExited, cue_exited); - simple_event_listener(track.TrackFailed, track_failed); -#undef simple_event_listener - track.CueEntered([](auto, const auto&) { - fprintf(stderr, "cue entered\n"); fflush(stderr); - }); -} - - -}; - -struct Synthesizer { - PyObject_HEAD - SynthesizerImplementation impl; -}; - -static PyTypeObject SynthesizerType = { - PyVarObject_HEAD_INIT(NULL, 0) -}; - -static WeakRefs synthesizer_weakrefs; - -SynthesizerImplementation::SynthesizerImplementation() { - events.reserve(128); - synth = SpeechSynthesizer(); - synth.Options().IncludeSentenceBoundaryMetadata(true); - synth.Options().IncludeWordBoundaryMetadata(true); - player = MediaPlayer(); - player.AudioCategory(MediaPlayerAudioCategory::Speech); - player.AutoPlay(true); - creation_thread_id = GetCurrentThreadId(); - id = synthesizer_weakrefs.register_ref(this); - auto self_id = id; -#define simple_event_listener(method, event_type) \ - revoker.event_type = method(winrt::auto_revoke, [self_id](auto, const auto&) { \ - fprintf(stderr, "111111111 %s %u\n", #event_type, GetCurrentThreadId()); fflush(stderr); \ - synthesizer_weakrefs.use_ref(self_id, [](auto s) { \ - if (!s) return; \ - s->add_simple_event(EventType::event_type); \ - fprintf(stderr, "2222222222 %d\n", s->player.PlaybackSession().PlaybackState()); \ - }); \ - }); - simple_event_listener(player.PlaybackSession().PlaybackStateChanged, playback_state_changed); - simple_event_listener(player.MediaOpened, media_opened); - simple_event_listener(player.MediaFailed, media_failed); - simple_event_listener(player.MediaEnded, media_ended); - simple_event_listener(player.SourceChanged, source_changed); -#undef simple_event_listener - player.PlaybackSession().PlaybackStateChanged([](auto, const auto&) { - fprintf(stderr, "111111111 %s %u\n", "playback state changed", GetCurrentThreadId()); fflush(stderr); \ - }); - player.MediaOpened([](auto, const auto&) { - fprintf(stderr, "111111111 %s %u\n", "media opened", GetCurrentThreadId()); fflush(stderr); \ - }); - player.MediaFailed([](auto, const auto&) { - fprintf(stderr, "111111111 %s %u\n", "media failed", GetCurrentThreadId()); fflush(stderr); \ - }); - player.MediaEnded([](auto, const auto&) { - fprintf(stderr, "111111111 %s %u\n", "media ended", GetCurrentThreadId()); fflush(stderr); \ - }); -} - -static PyObject* -Synthesizer_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { INITIALIZE_COM_IN_FUNCTION - Synthesizer *self = (Synthesizer *) type->tp_alloc(type, 0); - if (self) { - auto i = &self->impl; - try { - new (i) SynthesizerImplementation(); - } CATCH_ALL_EXCEPTIONS("Failed to create SynthesizerImplementation object"); - if (PyErr_Occurred()) { Py_CLEAR(self); } - } - if (self) com.detach(); - return (PyObject*)self; -} - -static void -Synthesizer_dealloc(Synthesizer *self) { - auto *i = &self->impl; - try { - synthesizer_weakrefs.unregister_ref(i); - } CATCH_ALL_EXCEPTIONS("Failed to destruct SynthesizerImplementation"); - if (PyErr_Occurred()) { PyErr_Print(); } - Py_TYPE(self)->tp_free((PyObject*)self); - CoUninitialize(); -} - -static void -ensure_current_thread_has_message_queue(void) { - MSG msg; - PeekMessage(&msg, NULL, WM_USER, WM_USER, PM_NOREMOVE); -} - -#define PREPARE_METHOD_CALL ensure_current_thread_has_message_queue(); if (!self->impl.is_creation_thread()) { PyErr_SetString(PyExc_RuntimeError, "Cannot use a Synthesizer object from a thread other than the thread it was created in"); return NULL; } - -static PyObject* -Synthesizer_speak(Synthesizer *self, PyObject *args) { - PREPARE_METHOD_CALL; - wchar_raii pytext; - int is_ssml = 0; - if (!PyArg_ParseTuple(args, "O&|p", py_to_wchar_no_none, &pytext, &is_ssml)) return NULL; - try { - self->impl.speak(pytext.as_view(), (bool)is_ssml); - } CATCH_ALL_EXCEPTIONS("Failed to start speaking text"); - if (PyErr_Occurred()) return NULL; - Py_RETURN_NONE; -} - - -static PyObject* -Synthesizer_create_recording(Synthesizer *self, PyObject *args) { - PREPARE_METHOD_CALL; - wchar_raii pytext; - PyObject *callback; - int is_ssml = 0; - if (!PyArg_ParseTuple(args, "O&O|p", py_to_wchar_no_none, &pytext, &callback, &is_ssml)) return NULL; - if (!PyCallable_Check(callback)) { PyErr_SetString(PyExc_TypeError, "callback must be callable"); return NULL; } - - SpeechSynthesisStream stream{nullptr}; - try { - stream = self->impl.synthesize(pytext.as_view(), (bool)is_ssml); - } CATCH_ALL_EXCEPTIONS( "Failed to get SpeechSynthesisStream from text"); - if (PyErr_Occurred()) return NULL; - unsigned long long stream_size = stream.Size(), bytes_read = 0; - DataReader reader(stream); - unsigned int n; - const static unsigned int chunk_size = 16 * 1024; - while (bytes_read < stream_size) { - try { - n = reader.LoadAsync(chunk_size).get(); - } CATCH_ALL_EXCEPTIONS("Failed to load data from DataReader"); - if (PyErr_Occurred()) return NULL; - if (n > 0) { - bytes_read += n; - pyobject_raii b(PyBytes_FromStringAndSize(NULL, n)); - if (!b) return NULL; - unsigned char *p = reinterpret_cast(PyBytes_AS_STRING(b.ptr())); - reader.ReadBytes(winrt::array_view(p, p + n)); - pyobject_raii ret(PyObject_CallFunctionObjArgs(callback, b.ptr(), NULL)); - } - } - - if (PyErr_Occurred()) return NULL; - Py_RETURN_NONE; -} - - -static PyObject* -voice_as_dict(VoiceInformation const& voice) { - try { - const char *gender = ""; - switch (voice.Gender()) { - case VoiceGender::Male: gender = "male"; break; - case VoiceGender::Female: gender = "female"; break; - } - return Py_BuildValue("{su su su su ss}", - "display_name", voice.DisplayName().c_str(), - "description", voice.Description().c_str(), - "id", voice.Id().c_str(), - "language", voice.Language().c_str(), - "gender", gender - ); - } CATCH_ALL_EXCEPTIONS("Could not convert Voice to dict"); - return NULL; -} - - -static PyObject* -all_voices(PyObject*, PyObject*) { INITIALIZE_COM_IN_FUNCTION - try { - auto voices = SpeechSynthesizer::AllVoices(); - pyobject_raii ans(PyTuple_New(voices.Size())); - if (!ans) return NULL; - Py_ssize_t i = 0; - for(auto const& voice : voices) { - PyObject *v = voice_as_dict(voice); - if (v) { - PyTuple_SET_ITEM(ans.ptr(), i++, v); - } else { - return NULL; - } - } - return ans.detach(); - } CATCH_ALL_EXCEPTIONS("Could not get all voices"); - return NULL; -} - -static PyObject* -default_voice(PyObject*, PyObject*) { INITIALIZE_COM_IN_FUNCTION - try { - return voice_as_dict(SpeechSynthesizer::DefaultVoice()); - } CATCH_ALL_EXCEPTIONS("Could not get default voice"); - return NULL; -} - -#define M(name, args) { #name, (PyCFunction)Synthesizer_##name, args, ""} -static PyMethodDef Synthesizer_methods[] = { - M(create_recording, METH_VARARGS), - M(speak, METH_VARARGS), - {NULL, NULL, 0, NULL} -}; -#undef M - -static PyObject* -pump_waiting_messages(PyObject*, PyObject*) { - UINT firstMsg = 0, lastMsg = 0; - MSG msg; - bool found = false; - // Read all of the messages in this next loop, - // removing each message as we read it. - while (PeekMessage(&msg, NULL, firstMsg, lastMsg, PM_REMOVE)) { - // If it's a quit message, we're out of here. - if (msg.message == WM_QUIT) { - Py_RETURN_NONE; - } - found = true; - // Otherwise, dispatch the message. - DispatchMessage(&msg); - } // End of PeekMessage while loop - - if (found) Py_RETURN_TRUE; - Py_RETURN_FALSE; -} - - -}}} */ - - struct Revokers { MediaPlaybackSession::PlaybackStateChanged_revoker playback_state_changed; MediaPlayer::MediaEnded_revoker media_ended; MediaPlayer::MediaOpened_revoker media_opened; @@ -767,12 +412,10 @@ class Synthesizer { // }}} winrt::fire_and_forget save(id_type cmd_id, std::wstring_view const &text, bool is_ssml, std::vector &&buf, std::ofstream &&outfile); - void load_stream_for_save(SpeechSynthesisStream const &&stream, id_type cmd_id); + void start_save_stream(SpeechSynthesisStream const &&stream, id_type cmd_id); void initialize() { synth = SpeechSynthesizer(); - synth.Options().IncludeSentenceBoundaryMetadata(true); - synth.Options().IncludeWordBoundaryMetadata(true); player = MediaPlayer(); player.AudioCategory(MediaPlayerAudioCategory::Speech); player.AutoPlay(true); @@ -809,6 +452,59 @@ class Synthesizer { static Synthesizer sx; +static size_t +decode_into(std::string_view src, std::wstring_view dest) { + int n = MultiByteToWideChar(CP_UTF8, 0, src.data(), (int)src.size(), (wchar_t*)dest.data(), (int)dest.size()); + if (n == 0 && src.size() > 0) { + throw std::system_error(GetLastError(), std::system_category(), "Failed to decode cued text"); + } + return n; +} + +static std::wstring_view +parse_cued_text(std::string_view src, Marks &marks, std::wstring_view dest) { + size_t dest_pos = 0; + if (dest.size() < src.size()) throw std::exception("Destination buffer for parse_cued_text() too small"); + while (src.size()) { + auto pos = src.find('\0'); + size_t limit = pos == std::string_view::npos ? src.size() : pos; + if (limit) { + dest_pos += decode_into(src.substr(0, limit), dest.substr(dest_pos, dest.size() - dest_pos)); + src = src.substr(limit, src.size() - limit); + } + if (pos != std::string_view::npos) { + src = src.substr(1, src.size() - 1); + if (src.size() >= 4) { + uint32_t mark = *((uint32_t*)src.data()); + marks.emplace_back(mark, (uint32_t)dest_pos); + src = src.substr(4, src.size() - 4); + } + } + } + return dest.substr(0, dest_pos); +} + +static std::wstring_view +read_from_shm(id_type cmd_id, const std::wstring_view size, const std::wstring &address, std::vector &buf, Marks &marks, bool is_cued=false) { + id_type shm_size = parse_id(size); + handle_raii_null handle(OpenFileMappingW(FILE_MAP_READ, false, address.data())); + if (!handle) { + output_error(cmd_id, "Could not open shared memory at: " + winrt::to_string(address), winrt::to_string(get_last_error()), __LINE__); + return {}; + } + mapping_raii mapping(MapViewOfFile(handle.ptr(), FILE_MAP_READ, 0, 0, (SIZE_T)shm_size)); + if (!mapping) { + output_error(cmd_id, "Could not map shared memory", winrt::to_string(get_last_error()), __LINE__); + return {}; + } + buf.reserve(shm_size + 2); + std::string_view src((const char*)mapping.ptr(), shm_size); + std::wstring_view dest(buf.data(), buf.capacity()); + if (is_cued) return parse_cued_text(src, marks, dest); + return std::wstring_view(buf.data(), decode_into(src, dest)); +} + + // Speak {{{ void Synthesizer::on_cue_entered(id_type cmd_id, const winrt::hstring &label, const SpeechCue &cue) { std::scoped_lock sl(recursive_lock); @@ -910,6 +606,8 @@ winrt::fire_and_forget Synthesizer::speak(id_type cmd_id, std::wstring_view cons current_cmd_id.store(cmd_id); current_text_storage = std::move(buf); current_marks = std::move(marks); + synth.Options().IncludeSentenceBoundaryMetadata(true); + synth.Options().IncludeWordBoundaryMetadata(true); } output(cmd_id, "synthesizing", {{"ssml", is_ssml}, {"num_marks", current_marks.size()}, {"text_length", text.size()}}); bool ok = false; @@ -927,69 +625,6 @@ winrt::fire_and_forget Synthesizer::speak(id_type cmd_id, std::wstring_view cons } } -static size_t -decode_into(std::string_view src, std::wstring_view dest) { - int n = MultiByteToWideChar(CP_UTF8, 0, src.data(), (int)src.size(), (wchar_t*)dest.data(), (int)dest.size()); - if (n == 0 && src.size() > 0) { - switch (GetLastError()) { - case ERROR_INSUFFICIENT_BUFFER: - throw std::exception("Output buffer too small while decoding cued text"); - case ERROR_INVALID_FLAGS: - throw std::exception("Invalid flags while decoding cued text"); - case ERROR_INVALID_PARAMETER: - throw std::exception("Invalid parameters while decoding cued text"); - case ERROR_NO_UNICODE_TRANSLATION: - throw std::exception("Invalid UTF-8 found while decoding cued text"); - default: - throw std::exception("Unknown error while decoding cued text"); - } - } - return n; -} - -static std::wstring_view -parse_cued_text(std::string_view src, Marks &marks, std::wstring_view dest) { - size_t dest_pos = 0; - if (dest.size() < src.size()) throw std::exception("Destination buffer for parse_cued_text() too small"); - while (src.size()) { - auto pos = src.find('\0'); - size_t limit = pos == std::string_view::npos ? src.size() : pos; - if (limit) { - dest_pos += decode_into(src.substr(0, limit), dest.substr(dest_pos, dest.size() - dest_pos)); - src = src.substr(limit, src.size() - limit); - } - if (pos != std::string_view::npos) { - src = src.substr(1, src.size() - 1); - if (src.size() >= 4) { - uint32_t mark = *((uint32_t*)src.data()); - marks.emplace_back(mark, (uint32_t)dest_pos); - src = src.substr(4, src.size() - 4); - } - } - } - return dest.substr(0, dest_pos); -} - -static std::wstring_view -read_from_shm(id_type cmd_id, const std::wstring_view size, const std::wstring_view address, std::vector &buf, Marks &marks, bool is_cued=false) { - id_type shm_size = parse_id(size); - handle_raii handle(OpenFileMappingW(FILE_MAP_READ, false, address.data())); - if (handle.ptr() == INVALID_HANDLE_VALUE) { - output_error(cmd_id, "Could not open shared memory at", winrt::to_string(address), __LINE__); - return {}; - } - mapping_raii mapping(MapViewOfFile(handle.ptr(), FILE_MAP_READ, 0, 0, (SIZE_T)shm_size)); - if (!mapping) { - output_error(cmd_id, "Could not map shared memory with error", std::to_string(GetLastError()), __LINE__); - return {}; - } - buf.reserve(shm_size + 2); - std::string_view src((const char*)mapping.ptr(), shm_size); - std::wstring_view dest(buf.data(), buf.capacity()); - if (is_cued) return parse_cued_text(src, marks, dest); - return std::wstring_view(buf.data(), decode_into(src, dest)); -} - static void handle_speak(id_type cmd_id, std::vector &parts) { bool is_ssml = false, is_shm = false, is_cued = false; @@ -1006,7 +641,7 @@ handle_speak(id_type cmd_id, std::vector &parts) { std::vector buf; std::wstring_view text; if (is_shm) { - text = read_from_shm(cmd_id, parts.at(0), parts.at(1), buf, marks, is_cued); + text = read_from_shm(cmd_id, parts.at(0), std::wstring(parts.at(1)), buf, marks, is_cued); if (text.size() == 0) return; } else { address = join(parts); @@ -1021,11 +656,43 @@ handle_speak(id_type cmd_id, std::vector &parts) { // }}} // Save {{{ +static winrt::fire_and_forget +save_stream(SpeechSynthesisStream const &&stream, std::ofstream &&outfile, id_type cmd_id) { + unsigned long long stream_size = stream.Size(), bytes_read = 0; + DataReader reader(stream); + unsigned int n; + const static unsigned int chunk_size = 16 * 1024; + uint8_t buf[chunk_size]; + while (bytes_read < stream_size) { + bool ok = false; + try { + n = co_await reader.LoadAsync(chunk_size); + ok = true; + } CATCH_ALL_EXCEPTIONS("Failed to load data from DataReader", cmd_id); + if (!ok) break; + if (n > 0) { + bytes_read += n; + ok = false; + try { + reader.ReadBytes(winrt::array_view(buf, buf + n)); + outfile.write((const char*)buf, n); + if (!outfile.good()) throw "Failed to write to output file"; + ok = true; + } CATCH_ALL_EXCEPTIONS("Failed to save bytes from DataReader to file", cmd_id); + if (!ok) break; + } + } + outfile.close(); + output(cmd_id, "saved", {{"size", bytes_read}}); +} + void -Synthesizer::load_stream_for_save(SpeechSynthesisStream const &&stream, id_type cmd_id) { +Synthesizer::start_save_stream(SpeechSynthesisStream const &&stream, id_type cmd_id) { std::scoped_lock sl(recursive_lock); - if (cmd_id != current_cmd_id.load()) return; - current_stream = stream; + try { + save_stream(std::move(stream), std::move(outfile), cmd_id); + } CATCH_ALL_EXCEPTIONS("Failed to save loaded stream", cmd_id); + stop_current_activity(); } winrt::fire_and_forget Synthesizer::save(id_type cmd_id, std::wstring_view const &text, bool is_ssml, std::vector &&buf, std::ofstream &&out) { @@ -1035,8 +702,9 @@ winrt::fire_and_forget Synthesizer::save(id_type cmd_id, std::wstring_view const current_cmd_id.store(cmd_id); current_text_storage = std::move(buf); outfile = std::move(out); + synth.Options().IncludeSentenceBoundaryMetadata(false); + synth.Options().IncludeWordBoundaryMetadata(false); } - output(cmd_id, "saving", {{"ssml", is_ssml}}); bool ok = false; try { if (is_ssml) stream = co_await synth.SynthesizeSsmlToStreamAsync(text); @@ -1046,7 +714,7 @@ winrt::fire_and_forget Synthesizer::save(id_type cmd_id, std::wstring_view const if (ok) { if (main_loop_is_running.load()) { try { - load_stream_for_save(std::move(stream), cmd_id); + sx.start_save_stream(std::move(stream), cmd_id); } CATCH_ALL_EXCEPTIONS("Failed to load synthesized stream for save", cmd_id); } } @@ -1058,16 +726,20 @@ handle_save(id_type cmd_id, std::vector &parts) { try { is_ssml = parts.at(0) == L"ssml"; } catch (std::exception const&) { - throw std::string("Not a well formed save command"); + throw "Not a well formed save command"s; } std::vector buf; - std::wstring_view text; std::wstring address; Marks marks; - text = read_from_shm(cmd_id, parts.at(2), parts.at(3), buf, marks); + std::wstring_view text = read_from_shm(cmd_id, parts.at(1), std::wstring(parts.at(2)), buf, marks); if (text.size() == 0) return; + parts.erase(parts.begin(), parts.begin() + 3); *((wchar_t*)text.data() + text.size()) = 0; // ensure NULL termination - std::ofstream outfile(parts.at(1), std::ios::out | std::ios::trunc); + auto filename = join(parts); + auto path = std::filesystem::absolute(filename); + std::ofstream outfile(path.string(), std::ios::out | std::ios::trunc); + if (!outfile.good()) throw "Failed to create: " + path.string(); + output(cmd_id, "saving", {{"ssml", is_ssml}, {"output_path", path.string()}}); sx.save(cmd_id, text, is_ssml, std::move(buf), std::move(outfile)); } // }}} @@ -1132,6 +804,9 @@ run_main_loop(PyObject*, PyObject*) { std::cout.imbue(std::locale("C")); std::cin.imbue(std::locale("C")); std::cerr.imbue(std::locale("C")); + std::wcin.imbue(std::locale("C")); + std::wcout.imbue(std::locale("C")); + std::wcerr.imbue(std::locale("C")); } CATCH_ALL_EXCEPTIONS("Failed to set stdio locales to C", 0); winrt::init_apartment(); // MTA (multi-threaded apartment) main_thread_id = GetCurrentThreadId(); diff --git a/src/calibre/utils/windows/winspeech.py b/src/calibre/utils/windows/winspeech.py index ad2c762884..bdd4e4eddf 100644 --- a/src/calibre/utils/windows/winspeech.py +++ b/src/calibre/utils/windows/winspeech.py @@ -3,14 +3,15 @@ import json +import os import struct import sys from contextlib import closing from queue import Queue from threading import Thread -from calibre.utils.shm import SharedMemory from calibre.utils.ipc.simple_worker import start_pipe_worker +from calibre.utils.shm import SharedMemory SSML_SAMPLE = ''' @@ -61,9 +62,8 @@ def encode_to_file_object(text, output) -> int: return sz -def develop_speech(text='Lucca Brazzi sleeps with the fishes.', mark_words=True): +def develop_loop(wait_for, *commands): p = start_worker() - print('\x1b[32mSpeaking', text, '\x1b[39m]]'[:-2], flush=True) q = Queue() def echo_output(p): @@ -79,6 +79,36 @@ def develop_speech(text='Lucca Brazzi sleeps with the fishes.', mark_words=True) Thread(name='Echo', target=echo_output, args=(p,), daemon=True).start() exit_code = 0 + with closing(p.stdin), closing(p.stdout): + try: + send('1 echo Synthesizer started') + send('1 volume 0.1') + for command in commands: + send(command) + while True: + m = q.get() + if m['related_to'] != wait_for: + continue + if m['payload_type'] == 'media_state_changed' and m['state'] == 'ended': + break + if m['payload_type'] == 'saved': + break + if m['payload_type'] == 'error': + exit_code = 1 + break + send(f'333 echo Synthesizer exiting with exit code: {exit_code}') + send(f'334 exit {exit_code}') + ec = p.wait(1) + print(f'Worker exited with code: {os.waitstatus_to_exitcode(p.wait(1))}', file=sys.stderr, flush=True) + raise SystemExit(ec) + finally: + if p.poll() is None: + p.kill() + raise SystemExit(1) + + +def develop_speech(text='Lucca Brazzi sleeps with the fishes.', mark_words=True): + print('\x1b[32mSpeaking', text, '\x1b[39m]]'[:-2], flush=True) st = 'ssml' if '