diff --git a/src/calibre/utils/windows/winspeech.cpp b/src/calibre/utils/windows/winspeech.cpp index 68169803b3..78f5f6639c 100644 --- a/src/calibre/utils/windows/winspeech.cpp +++ b/src/calibre/utils/windows/winspeech.cpp @@ -393,21 +393,29 @@ output_error(id_type cmd_id, std::string_view const &msg, std::string_view const output(cmd_id, "error", std::move(m)); } -#define CATCH_ALL_EXCEPTIONS(msg, cmd_id) \ - catch(winrt::hresult_error const& ex) { \ - output_error(cmd_id, msg, winrt::to_string(ex.message()), __LINE__, ex.to_abi()); \ -} catch(const std::system_error& ex) { \ - output_error(cmd_id, msg, "system_error with code: " + std::to_string(ex.code().value()) + " and meaning: " + ex.what(), __LINE__); \ -} catch (std::exception const &ex) { \ - output_error(cmd_id, msg, ex.what(), __LINE__); \ -} catch (std::string const &ex) { \ - output_error(cmd_id, msg, ex, __LINE__); \ -} catch (std::wstring const &ex) { \ - output_error(cmd_id, msg, winrt::to_string(ex), __LINE__); \ -} catch (...) { \ - output_error(cmd_id, msg, "Unknown exception type was raised", __LINE__); \ +static bool +run_catching_exceptions(std::function f, std::string_view const &msg, int64_t line, id_type cmd_id=0) { + bool ok = false; + try { + f(); + ok = true; + } catch(winrt::hresult_error const& ex) { + output_error(cmd_id, msg, winrt::to_string(ex.message()), line, ex.to_abi()); + } catch(const std::system_error& ex) { + output_error(cmd_id, msg, "system_error with code: " + std::to_string(ex.code().value()) + " and meaning: " + ex.what(), line); + } catch (std::exception const &ex) { + output_error(cmd_id, msg, ex.what(), line); + } catch (std::string const &ex) { + output_error(cmd_id, msg, ex, line); + } catch (std::wstring const &ex) { + output_error(cmd_id, msg, winrt::to_string(ex), line); + } catch (...) { + output_error(cmd_id, msg, "Unknown exception type was raised", line); + } + return ok; } + struct Revokers { MediaPlaybackSession::PlaybackStateChanged_revoker playback_state_changed; MediaPlayer::MediaEnded_revoker media_ended; MediaPlayer::MediaOpened_revoker media_opened; @@ -424,132 +432,14 @@ struct Mark { Mark(uint32_t id, uint32_t pos) : id(id), pos_in_text(pos) {} }; -typedef std::vector Marks; - -class Synthesizer { - private: - SpeechSynthesizer synth{nullptr}; - MediaPlayer player{nullptr}; - MediaSource current_source{nullptr}; - SpeechSynthesisStream current_stream{nullptr}; - MediaPlaybackItem current_item{nullptr}; - std::vector current_text_storage; - Marks current_marks; +struct Marks { + std::vector entries; int32_t last_reported_mark_index; - std::atomic current_cmd_id; - - Revokers revoker; - std::recursive_mutex recursive_lock; - - public: - // Speak {{{ - void register_metadata_handler_for_track(uint32_t index, id_type cmd_id); - void load_stream_for_playback(SpeechSynthesisStream const &&stream, id_type cmd_id, bool is_cued); - winrt::fire_and_forget speak(id_type cmd_id, std::wstring_view const &text, bool is_ssml, bool is_cued, std::vector &&buf, Marks const && marks); - void register_metadata_handler_for_speech(id_type cmd_id, long index); - bool cmd_id_is_current(id_type cmd_id) const noexcept { return current_cmd_id.load() == cmd_id; } - void on_cue_entered(id_type cmd_id, const winrt::hstring &label, const SpeechCue &cue); - // }}} - - winrt::fire_and_forget save(id_type cmd_id, std::wstring_view const &text, bool is_ssml, std::vector &&buf, std::filesystem::path path); - void start_save_stream(SpeechSynthesisStream const &&stream, std::filesystem::path path, id_type cmd_id); - - void initialize() { - synth = SpeechSynthesizer(); - player = MediaPlayer(); - player.AudioCategory(MediaPlayerAudioCategory::Speech); - player.AutoPlay(true); - } - - void output(id_type cmd_id, std::string_view const& type, json_val const && x) { - std::scoped_lock sl(recursive_lock); - if (cmd_id_is_current(cmd_id)) ::output(cmd_id, type, std::move(x)); - } - - void stop_current_activity() { - std::scoped_lock sl(recursive_lock); - if (current_cmd_id.load()) { - current_cmd_id.store(0); - revoker = {}; - current_source = MediaSource{nullptr}; - current_stream = SpeechSynthesisStream{nullptr}; - current_item = MediaPlaybackItem{nullptr}; - player.Pause(); - current_text_storage = std::vector(); - current_marks = Marks(); - last_reported_mark_index = -1; - } - } - - double volume() const { - return synth.Options().AudioVolume(); - } - - void volume(double val) { - if (val < 0 || val > 1) throw std::out_of_range("Invalid volume value must be between 0 and 1"); - std::scoped_lock sl(recursive_lock); - synth.Options().AudioVolume(val); - } - - double rate() const { - return synth.Options().SpeakingRate(); - } - - void rate(double val) { - if (val < 0.5 || val > 6.0) throw std::out_of_range("Invalid rate value must be between 0.5 and 6"); - std::scoped_lock sl(recursive_lock); - synth.Options().SpeakingRate(val); - } - - double pitch() const { - return synth.Options().AudioPitch(); - } - - void pitch(double val) { - if (val < 0 || val > 2) throw std::out_of_range("Invalid pitch value must be between 0 and 2"); - std::scoped_lock sl(recursive_lock); - synth.Options().AudioPitch(val); - } - - void pause() const { - player.Pause(); - } - - void play() const { - player.Play(); - } - - bool toggle() const { - switch (player.PlaybackSession().PlaybackState()) { - case MediaPlaybackState::Playing: pause(); return true; - case MediaPlaybackState::Paused: play(); return true; - default: return false; - } - } - - MediaPlaybackState playback_state() const { - return player.PlaybackSession().PlaybackState(); - } - - DeviceInformation audio_device() const { - return player.AudioDevice(); - } - - void audio_device(DeviceInformation const &di) const { - player.AudioDevice(di); - } - - VoiceInformation voice() const { - return synth.Voice(); - } - - void voice(VoiceInformation const &v) const { - return synth.Voice(v); - } - + Marks() : entries(), last_reported_mark_index(-1) {} }; -static Synthesizer sx; +static SpeechSynthesizer speech_synthesizer{nullptr}; +static MediaPlayer media_player{nullptr}; static size_t decode_into(std::string_view src, std::wstring_view dest) { @@ -575,7 +465,7 @@ parse_cued_text(std::string_view src, Marks &marks, std::wstring_view dest) { src = src.substr(1, src.size() - 1); if (src.size() >= 4) { uint32_t mark = *((uint32_t*)src.data()); - marks.emplace_back(mark, (uint32_t)dest_pos); + marks.entries.emplace_back(mark, (uint32_t)dest_pos); src = src.substr(4, src.size() - 4); } } @@ -605,124 +495,46 @@ read_from_shm(id_type cmd_id, const std::wstring_view size, const std::wstring & // Speak {{{ -void Synthesizer::on_cue_entered(id_type cmd_id, const winrt::hstring &label, const SpeechCue &cue) { - std::scoped_lock sl(recursive_lock); - if (!cmd_id_is_current(cmd_id)) return; - output(cmd_id, "cue_entered", json_val(label, cue)); - if (label != L"SpeechWord") return; - uint32_t pos = cue.StartPositionInInput().Value(); - for (int32_t i = std::max(0, last_reported_mark_index); i < (int32_t)current_marks.size(); i++) { - int32_t idx = -1; - if (current_marks[i].pos_in_text > pos) { - idx = i-1; - if (idx == last_reported_mark_index && current_marks[i].pos_in_text - pos < 3) idx = i; - } else if (current_marks[i].pos_in_text == pos) idx = i; - if (idx > -1) { - output(cmd_id, "mark_reached", {{"id", current_marks[idx].id}}); - last_reported_mark_index = idx; - break; - } - } -} +static Revokers speak_revoker = {}; -void Synthesizer::register_metadata_handler_for_speech(id_type cmd_id, long index) { - std::scoped_lock sl(recursive_lock); - if (!cmd_id_is_current(cmd_id)) return; - if (index < 0) { - for (uint32_t i = 0; i < current_item.TimedMetadataTracks().Size(); i++) { - register_metadata_handler_for_track(i, cmd_id); - } - } else { - register_metadata_handler_for_track(index, cmd_id); - } -} +static void +register_metadata_handler_for_track(MediaPlaybackTimedMetadataTrackList const &tracks, uint32_t index, id_type cmd_id, std::shared_ptr marks) { + TimedMetadataTrack track = tracks.GetAt(index); + tracks.SetPresentationMode((unsigned int)index, TimedMetadataTrackPresentationMode::ApplicationPresented); -void -Synthesizer::register_metadata_handler_for_track(uint32_t index, id_type cmd_id) { - TimedMetadataTrack track = current_item.TimedMetadataTracks().GetAt(index); - std::scoped_lock sl(recursive_lock); - if (current_cmd_id.load() != cmd_id) return; - revoker.cue_entered.push_back(track.CueEntered(winrt::auto_revoke, [cmd_id](auto track, const auto& args) { - if (main_loop_is_running.load()) sx.on_cue_entered(cmd_id, track.Label(), args.Cue().template as()); + speak_revoker.cue_entered.push_back(track.CueEntered(winrt::auto_revoke, [cmd_id, marks](auto track, const auto& args) { + if (main_loop_is_running.load()) { + auto label = track.Label(); + auto cue = args.Cue().template as(); + output(cmd_id, "cue_entered", {label, cue}); + if (label != L"SpeechWord") return; + uint32_t pos = cue.StartPositionInInput().Value(); + for (int32_t i = std::max(0, marks->last_reported_mark_index); i < (int32_t)marks->entries.size(); i++) { + int32_t idx = -1; + if (marks->entries[i].pos_in_text > pos) { + idx = i-1; + if (idx == marks->last_reported_mark_index && marks->entries[i].pos_in_text - pos < 3) idx = i; + } else if (marks->entries[i].pos_in_text == pos) idx = i; + if (idx > -1) { + output(cmd_id, "mark_reached", {{"id", marks->entries[idx].id}}); + marks->last_reported_mark_index = idx; + break; + } + } + } })); - revoker.cue_exited.push_back(track.CueExited(winrt::auto_revoke, [cmd_id](auto track, const auto& args) { - if (main_loop_is_running.load()) sx.output( + + speak_revoker.cue_exited.push_back(track.CueExited(winrt::auto_revoke, [cmd_id](auto track, const auto& args) { + if (main_loop_is_running.load()) output( cmd_id, "cue_exited", json_val(track.Label(), args.Cue().template as())); })); - revoker.track_failed.push_back(track.TrackFailed(winrt::auto_revoke, [cmd_id](auto, const auto& args) { - if (main_loop_is_running.load()) sx.output( + + speak_revoker.track_failed.push_back(track.TrackFailed(winrt::auto_revoke, [cmd_id](auto, const auto& args) { + if (main_loop_is_running.load()) output( cmd_id, "track_failed", {}); })); - current_item.TimedMetadataTracks().SetPresentationMode((unsigned int)index, TimedMetadataTrackPresentationMode::ApplicationPresented); -} +}; -void -Synthesizer::load_stream_for_playback(SpeechSynthesisStream const &&stream, id_type cmd_id, bool is_cued) { - std::scoped_lock sl(recursive_lock); - if (cmd_id != current_cmd_id.load()) return; - current_stream = stream; - current_source = MediaSource::CreateFromStream(current_stream, current_stream.ContentType()); - - revoker.playback_state_changed = player.PlaybackSession().PlaybackStateChanged( - winrt::auto_revoke, [cmd_id](auto session, auto const&) { - if (main_loop_is_running.load()) sx.output( - cmd_id, "playback_state_changed", {{"state", session.PlaybackState()}}); - }); - revoker.media_opened = player.MediaOpened(winrt::auto_revoke, [cmd_id](auto player, auto const&) { - if (main_loop_is_running.load()) sx.output( - cmd_id, "media_state_changed", {{"state", "opened"}}); - }); - revoker.media_ended = player.MediaEnded(winrt::auto_revoke, [cmd_id](auto player, auto const&) { - if (main_loop_is_running.load()) sx.output( - cmd_id, "media_state_changed", {{"state", "ended"}}); - }); - revoker.media_failed = player.MediaFailed(winrt::auto_revoke, [cmd_id](auto player, auto const& args) { - if (main_loop_is_running.load()) sx.output( - cmd_id, "media_state_changed", {{"state", "failed"}, {"error", args.ErrorMessage()}, {"code", args.Error()}}); - }); - current_item = MediaPlaybackItem(current_source); - - revoker.timed_metadata_tracks_changed = current_item.TimedMetadataTracksChanged(winrt::auto_revoke, - [cmd_id](auto, auto const &args) { - auto change_type = args.CollectionChange(); - long index; - switch (change_type) { - case CollectionChange::ItemInserted: index = args.Index(); break; - case CollectionChange::Reset: index = -1; break; - default: index = -2; break; - } - if (index > -2 && main_loop_is_running.load()) sx.register_metadata_handler_for_speech(cmd_id, index); - }); - register_metadata_handler_for_speech(cmd_id, -1); - - player.Source(current_item); -} - -winrt::fire_and_forget Synthesizer::speak(id_type cmd_id, std::wstring_view const &text, bool is_ssml, bool is_cued, std::vector &&buf, Marks const && marks) { - SpeechSynthesisStream stream{nullptr}; - { std::scoped_lock sl(recursive_lock); - stop_current_activity(); - current_cmd_id.store(cmd_id); - current_text_storage = std::move(buf); - current_marks = std::move(marks); - synth.Options().IncludeSentenceBoundaryMetadata(true); - synth.Options().IncludeWordBoundaryMetadata(true); - } - output(cmd_id, "synthesizing", {{"ssml", is_ssml}, {"num_marks", current_marks.size()}, {"text_length", text.size()}}); - bool ok = false; - try { - if (is_ssml) stream = co_await synth.SynthesizeSsmlToStreamAsync(text); - else stream = co_await synth.SynthesizeTextToStreamAsync(text); - ok = true; - } CATCH_ALL_EXCEPTIONS("Failed to synthesize speech", cmd_id); - if (ok) { - if (main_loop_is_running.load()) { - try { - load_stream_for_playback(std::move(stream), cmd_id, is_cued); - } CATCH_ALL_EXCEPTIONS("Failed to load synthesized stream for playback", cmd_id); - } - } -} static void handle_speak(id_type cmd_id, std::vector &parts) { @@ -736,11 +548,11 @@ handle_speak(id_type cmd_id, std::vector &parts) { } parts.erase(parts.begin(), parts.begin() + 2); std::wstring address; - Marks marks; + auto marks = std::make_shared(); std::vector buf; std::wstring_view text; if (is_shm) { - text = read_from_shm(cmd_id, parts.at(0), std::wstring(parts.at(1)), buf, marks, is_cued); + text = read_from_shm(cmd_id, parts.at(0), std::wstring(parts.at(1)), buf, *marks, is_cued); if (text.size() == 0) return; } else { address = join(parts); @@ -750,12 +562,62 @@ handle_speak(id_type cmd_id, std::vector &parts) { address.copy(buf.data(), address.size()); } *((wchar_t*)text.data() + text.size()) = 0; // ensure NULL termination - sx.speak(cmd_id, text, is_ssml, is_cued, std::move(buf), std::move(marks)); + + output(cmd_id, "synthesizing", {{"ssml", is_ssml}, {"num_marks", marks->entries.size()}, {"text_length", text.size()}}); + bool ok = false; + SpeechSynthesisStream stream{nullptr}; + if (!run_catching_exceptions([&]() { + speech_synthesizer.Options().IncludeSentenceBoundaryMetadata(true); + speech_synthesizer.Options().IncludeWordBoundaryMetadata(true); + if (is_ssml) stream = speech_synthesizer.SynthesizeSsmlToStreamAsync(text).get(); + else stream = speech_synthesizer.SynthesizeTextToStreamAsync(text).get(); + ok = true; + }, "Failed to synthesize speech", __LINE__, cmd_id)) return; + + speak_revoker = {}; // delete any revokers previously installed + MediaSource source(MediaSource::CreateFromStream(stream, stream.ContentType())); + + speak_revoker.playback_state_changed = media_player.PlaybackSession().PlaybackStateChanged( + winrt::auto_revoke, [cmd_id](auto session, auto const&) { + if (main_loop_is_running.load()) output( + cmd_id, "playback_state_changed", {{"state", session.PlaybackState()}}); + }); + speak_revoker.media_opened = media_player.MediaOpened(winrt::auto_revoke, [cmd_id](auto player, auto const&) { + if (main_loop_is_running.load()) output( + cmd_id, "media_state_changed", {{"state", "opened"}}); + }); + speak_revoker.media_ended = media_player.MediaEnded(winrt::auto_revoke, [cmd_id](auto player, auto const&) { + if (main_loop_is_running.load()) output( + cmd_id, "media_state_changed", {{"state", "ended"}}); + }); + speak_revoker.media_failed = media_player.MediaFailed(winrt::auto_revoke, [cmd_id](auto player, auto const& args) { + if (main_loop_is_running.load()) output( + cmd_id, "media_state_changed", {{"state", "failed"}, {"error", args.ErrorMessage()}, {"code", args.Error()}}); + }); + auto playback_item = std::make_shared(source); + + speak_revoker.timed_metadata_tracks_changed = playback_item->TimedMetadataTracksChanged(winrt::auto_revoke, + [cmd_id, playback_item_weak_ref = std::weak_ptr(playback_item), marks](auto, auto const &args) { + auto change_type = args.CollectionChange(); + long index; + switch (change_type) { + case CollectionChange::ItemInserted: index = args.Index(); break; + case CollectionChange::Reset: index = -1; break; + default: index = -2; break; + } + auto pi{ playback_item_weak_ref.lock() }; + if (index > -2 && pi && main_loop_is_running.load()) register_metadata_handler_for_track(pi->TimedMetadataTracks(), index, cmd_id, marks); + }); + + for (uint32_t i = 0; i < playback_item->TimedMetadataTracks().Size(); i++) { + register_metadata_handler_for_track(playback_item->TimedMetadataTracks(), i, cmd_id, marks); + } + media_player.Source(*playback_item); } // }}} // Save {{{ -static winrt::fire_and_forget +static void save_stream(SpeechSynthesisStream const &&stream, std::filesystem::path path, id_type cmd_id) { unsigned long long stream_size = stream.Size(), bytes_read = 0; DataReader reader(stream); @@ -763,66 +625,26 @@ save_stream(SpeechSynthesisStream const &&stream, std::filesystem::path path, id const static unsigned int chunk_size = 16 * 1024; std::array buf; std::ofstream outfile; - bool ok = false; - try { + if (!run_catching_exceptions([&](){ outfile.open(path.string(), std::ios::out | std::ios::trunc); - ok = true; - } CATCH_ALL_EXCEPTIONS("Failed to create file: " + path.string(), cmd_id); - if (!ok) co_return; + }, "Failed to create file: " + path.string(), __LINE__, cmd_id)) return; + while (bytes_read < stream_size) { - try { - n = co_await reader.LoadAsync(chunk_size); - ok = true; - } CATCH_ALL_EXCEPTIONS("Failed to load data from DataReader", cmd_id); - if (!ok) co_return; + if (!run_catching_exceptions([&]() { + n = reader.LoadAsync(chunk_size).get(); + }, "Failed to load data from DataReader", __LINE__, cmd_id)) return; if (n > 0) { bytes_read += n; - ok = false; - try { + if (!run_catching_exceptions([&]() { reader.ReadBytes(winrt::array_view(buf.data(), buf.data() + n)); outfile.write((const char*)buf.data(), n); if (!outfile.good()) throw "Failed to write to output file"; - ok = true; - } CATCH_ALL_EXCEPTIONS("Failed to save bytes from DataReader to file", cmd_id); - if (!ok) co_return; + }, "Failed to save bytes from DataReader to file", __LINE__, cmd_id)) return; } } output(cmd_id, "saved", {{"size", bytes_read}}); } -void -Synthesizer::start_save_stream(SpeechSynthesisStream const &&stream, std::filesystem::path path, id_type cmd_id) { - std::scoped_lock sl(recursive_lock); - try { - save_stream(std::move(stream), path, cmd_id); - } CATCH_ALL_EXCEPTIONS("Failed to save loaded stream", cmd_id); - stop_current_activity(); -} - -winrt::fire_and_forget Synthesizer::save(id_type cmd_id, std::wstring_view const &text, bool is_ssml, std::vector &&buf, std::filesystem::path path) { - SpeechSynthesisStream stream{nullptr}; - { std::scoped_lock sl(recursive_lock); - stop_current_activity(); - current_cmd_id.store(cmd_id); - current_text_storage = std::move(buf); - synth.Options().IncludeSentenceBoundaryMetadata(false); - synth.Options().IncludeWordBoundaryMetadata(false); - } - bool ok = false; - try { - if (is_ssml) stream = co_await synth.SynthesizeSsmlToStreamAsync(text); - else stream = co_await synth.SynthesizeTextToStreamAsync(text); - ok = true; - } CATCH_ALL_EXCEPTIONS("Failed to synthesize speech", cmd_id); - if (ok) { - if (main_loop_is_running.load()) { - try { - sx.start_save_stream(std::move(stream), path, cmd_id); - } CATCH_ALL_EXCEPTIONS("Failed to load synthesized stream for save", cmd_id); - } - } -} - static void handle_save(id_type cmd_id, std::vector &parts) { bool is_ssml; @@ -841,7 +663,14 @@ handle_save(id_type cmd_id, std::vector &parts) { auto filename = join(parts); auto path = std::filesystem::absolute(filename); output(cmd_id, "saving", {{"ssml", is_ssml}, {"output_path", path.string()}}); - sx.save(cmd_id, text, is_ssml, std::move(buf), path); + SpeechSynthesisStream stream{nullptr}; + speech_synthesizer.Options().IncludeSentenceBoundaryMetadata(false); + speech_synthesizer.Options().IncludeWordBoundaryMetadata(false); + if (!run_catching_exceptions([&]() { + if (is_ssml) stream = speech_synthesizer.SynthesizeSsmlToStreamAsync(text).get(); + else stream = speech_synthesizer.SynthesizeTextToStreamAsync(text).get(); + }, "Failed to synthesize speech", __LINE__, cmd_id)) return; + save_stream(std::move(stream), path, cmd_id); } // }}} @@ -862,18 +691,17 @@ static const std::unordered_map handlers = { }}, {"play", [](id_type cmd_id, std::vector parts, int64_t*) { - sx.play(); - output(cmd_id, "play", {{"playback_state", sx.playback_state()}}); + media_player.Play(); + output(cmd_id, "play", {{"playback_state", media_player.PlaybackSession().PlaybackState()}}); }}, {"pause", [](id_type cmd_id, std::vector parts, int64_t*) { - sx.play(); - output(cmd_id, "pause", {{"playback_state", sx.playback_state()}}); + media_player.Pause(); + output(cmd_id, "pause", {{"playback_state", media_player.PlaybackSession().PlaybackState()}}); }}, {"state", [](id_type cmd_id, std::vector parts, int64_t*) { - sx.play(); - output(cmd_id, "state", {{"playback_state", sx.playback_state()}}); + output(cmd_id, "state", {{"playback_state", media_player.PlaybackSession().PlaybackState()}}); }}, {"default_voice", [](id_type cmd_id, std::vector parts, int64_t*) { @@ -895,25 +723,28 @@ static const std::unordered_map handlers = { {"volume", [](id_type cmd_id, std::vector parts, int64_t*) { if (parts.size()) { auto vol = parse_double(parts[0].data()); - sx.volume(vol); + if (vol < 0 || vol > 1) throw std::out_of_range("Invalid volume value must be between 0 and 1"); + speech_synthesizer.Options().AudioVolume(vol); } - output(cmd_id, "volume", {{"value", sx.volume()}}); + output(cmd_id, "volume", {{"value", speech_synthesizer.Options().AudioVolume()}}); }}, {"rate", [](id_type cmd_id, std::vector parts, int64_t*) { if (parts.size()) { auto rate = parse_double(parts[0].data()); - sx.rate(rate); + if (rate < 0.5 || rate > 6.0) throw std::out_of_range("Invalid rate value must be between 0.5 and 6"); + speech_synthesizer.Options().SpeakingRate(rate); } - output(cmd_id, "rate", {{"value", sx.rate()}}); + output(cmd_id, "rate", {{"value", speech_synthesizer.Options().SpeakingRate()}}); }}, {"pitch", [](id_type cmd_id, std::vector parts, int64_t*) { if (parts.size()) { - auto rate = parse_double(parts[0].data()); - sx.rate(rate); + auto pitch = parse_double(parts[0].data()); + if (pitch < 0 || pitch > 2) throw std::out_of_range("Invalid pitch value must be between 0 and 2"); + speech_synthesizer.Options().AudioPitch(pitch); } - output(cmd_id, "pitch", {{"pitch", sx.rate()}}); + output(cmd_id, "pitch", {{"pitch", speech_synthesizer.Options().AudioPitch()}}); }}, {"save", [](id_type cmd_id, std::vector parts, int64_t*) { @@ -932,7 +763,7 @@ handle_stdin_message(winrt::hstring const &&msg) { bool ok = false; std::vector parts; int64_t exit_code = -1; - try { + if (!run_catching_exceptions([&]() { parts = split(msg); command = parts.at(1); cmd_id = parse_id(parts.at(0)); if (cmd_id == 0) { @@ -940,56 +771,60 @@ handle_stdin_message(winrt::hstring const &&msg) { } parts.erase(parts.begin(), parts.begin() + 2); ok = true; - } CATCH_ALL_EXCEPTIONS((std::string("Invalid input message: ") + winrt::to_string(msg)), 0); - if (ok) { - handler_function handler; - std::string cmd(winrt::to_string(command)); - try { - handler = handlers.at(cmd.c_str()); - } catch (std::out_of_range) { - output_error(cmd_id, "Unknown command", cmd, __LINE__); - return exit_code; - } - try { - handler(cmd_id, parts, &exit_code); - } CATCH_ALL_EXCEPTIONS("Error handling input message", cmd_id); + }, "Invalid input message: " + winrt::to_string(msg), __LINE__)) return exit_code; + handler_function handler; + std::string cmd(winrt::to_string(command)); + try { + handler = handlers.at(cmd.c_str()); + } catch (std::out_of_range) { + output_error(cmd_id, "Unknown command", cmd, __LINE__); + return exit_code; } + run_catching_exceptions([&]() { + handler(cmd_id, parts, &exit_code); + }, "Error handling input message", __LINE__, cmd_id); return exit_code; } - static PyObject* run_main_loop(PyObject*, PyObject*) { - try { + if (!run_catching_exceptions([]() { std::cout.imbue(std::locale("C")); std::cin.imbue(std::locale("C")); std::cerr.imbue(std::locale("C")); std::wcin.imbue(std::locale("C")); std::wcout.imbue(std::locale("C")); std::wcerr.imbue(std::locale("C")); - } CATCH_ALL_EXCEPTIONS("Failed to set stdio locales to C", 0); - winrt::init_apartment(winrt::apartment_type::multi_threaded); - main_thread_id = GetCurrentThreadId(); - MSG msg; - int64_t exit_code = 0; - bool ok = false; - try { - new (&sx) Synthesizer(); - sx.initialize(); - ok = true; - } CATCH_ALL_EXCEPTIONS("Error initializing Synthesizer", 0); - if (!ok) return PyLong_FromUnsignedLongLong(1); + }, "Failed to set stdio locales to C", __LINE__)) { + return PyLong_FromLongLong(1); + } - Py_BEGIN_ALLOW_THREADS; - main_loop_is_running.store(true); - PeekMessage(&msg, NULL, WM_USER, WM_USER, PM_NOREMOVE); // ensure we have a message queue + if (!run_catching_exceptions([]() { + winrt::init_apartment(winrt::apartment_type::multi_threaded); + }, "Failed to initialize COM", __LINE__)) { + return PyLong_FromLongLong(1); + } + + main_thread_id = GetCurrentThreadId(); + + if (!run_catching_exceptions([]() { + speech_synthesizer = SpeechSynthesizer(); + media_player = MediaPlayer(); + media_player.AudioCategory(MediaPlayerAudioCategory::Speech); + media_player.AutoPlay(true); + }, "Failed to initialize SpeechSynthesizer and MediaPlayer", __LINE__)) { + return PyLong_FromLongLong(1); + } if (_isatty(_fileno(stdin))) { std::cout << "Welcome to winspeech. Type exit to quit." << std::endl; } + int64_t exit_code = -1; + main_loop_is_running.store(true); + Py_BEGIN_ALLOW_THREADS; std::string input_buffer; - while (true) { + while (exit_code < 0) { try { if (!std::getline(std::cin, input_buffer)) { if (!std::cin.eof()) exit_code = 1; @@ -997,7 +832,10 @@ run_main_loop(PyObject*, PyObject*) { } rtrim(input_buffer); if (input_buffer.size() > 0) { - if ((exit_code = handle_stdin_message(std::move(winrt::to_hstring(input_buffer)))) >= 0) break; + run_catching_exceptions([&]() { + exit_code = handle_stdin_message(std::move(winrt::to_hstring(input_buffer))); + }, "Error handling STDIN message", __LINE__); + if (exit_code >= 0) break; } } catch(...) { exit_code = 1; @@ -1005,14 +843,13 @@ run_main_loop(PyObject*, PyObject*) { break; } } - - main_loop_is_running.store(false); Py_END_ALLOW_THREADS; + main_loop_is_running.store(false); try { - sx.stop_current_activity(); - (&sx)->~Synthesizer(); - } CATCH_ALL_EXCEPTIONS("Error stopping all activity", 0); + speech_synthesizer = SpeechSynthesizer{nullptr}; + media_player = MediaPlayer{nullptr}; + } catch(...) {} return PyLong_FromLongLong(exit_code); }