Refactor speech code to make it simpler and hopefully more robust

Also get rid of the catch macro
2025-08-30 23:00:21 -04:00 · 2023-01-29 14:58:54 +05:30 · 2023-01-29 14:58:54 +05:30 · cf0cc595b6
commit cf0cc595b6
parent 356091be76
1 changed files with 188 additions and 351 deletions
--- a/src/calibre/utils/windows/winspeech.cpp
+++ b/src/calibre/utils/windows/winspeech.cpp
@ -393,21 +393,29 @@ output_error(id_type cmd_id, std::string_view const &msg, std::string_view const
    output(cmd_id, "error", std::move(m));
 }
-#define CATCH_ALL_EXCEPTIONS(msg, cmd_id) \
+static bool
-  catch(winrt::hresult_error const& ex) { \
+run_catching_exceptions(std::function<void(void)> f, std::string_view const &msg, int64_t line, id_type cmd_id=0) {
-    output_error(cmd_id, msg, winrt::to_string(ex.message()), __LINE__, ex.to_abi()); \
+    bool ok = false;
-} catch(const std::system_error& ex) { \
+    try {
-    output_error(cmd_id, msg, "system_error with code: " + std::to_string(ex.code().value()) + " and meaning: " + ex.what(), __LINE__); \
+        f();
-} catch (std::exception const &ex) { \
+        ok = true;
-    output_error(cmd_id, msg, ex.what(), __LINE__); \
+    } catch(winrt::hresult_error const& ex) {
-} catch (std::string const &ex) { \
+        output_error(cmd_id, msg, winrt::to_string(ex.message()), line, ex.to_abi());
-    output_error(cmd_id, msg, ex, __LINE__); \
+    } catch(const std::system_error& ex) {
-} catch (std::wstring const &ex) { \
+        output_error(cmd_id, msg, "system_error with code: " + std::to_string(ex.code().value()) + " and meaning: " + ex.what(), line);
-    output_error(cmd_id, msg, winrt::to_string(ex), __LINE__); \
+    } catch (std::exception const &ex) {
-} catch (...) { \
+        output_error(cmd_id, msg, ex.what(), line);
-    output_error(cmd_id, msg, "Unknown exception type was raised", __LINE__); \
+    } catch (std::string const &ex) {
        output_error(cmd_id, msg, ex, line);
    } catch (std::wstring const &ex) {
        output_error(cmd_id, msg, winrt::to_string(ex), line);
    } catch (...) {
        output_error(cmd_id, msg, "Unknown exception type was raised", line);
    }
    return ok;
 }
 struct Revokers {
    MediaPlaybackSession::PlaybackStateChanged_revoker playback_state_changed;
    MediaPlayer::MediaEnded_revoker media_ended; MediaPlayer::MediaOpened_revoker media_opened;
@ -424,132 +432,14 @@ struct Mark {
    Mark(uint32_t id, uint32_t pos) : id(id), pos_in_text(pos) {}
 };
-typedef std::vector<Mark> Marks;
+struct Marks {
-
+    std::vector<Mark> entries;
 class Synthesizer {
    private:
    SpeechSynthesizer synth{nullptr};
    MediaPlayer player{nullptr};
    MediaSource current_source{nullptr};
    SpeechSynthesisStream current_stream{nullptr};
    MediaPlaybackItem current_item{nullptr};
    std::vector<wchar_t> current_text_storage;
    Marks current_marks;
    int32_t last_reported_mark_index;
-    std::atomic<id_type> current_cmd_id;
+    Marks() : entries(), last_reported_mark_index(-1) {}
    Revokers revoker;
    std::recursive_mutex recursive_lock;
    public:
    // Speak {{{
    void register_metadata_handler_for_track(uint32_t index, id_type cmd_id);
    void load_stream_for_playback(SpeechSynthesisStream const &&stream, id_type cmd_id, bool is_cued);
    winrt::fire_and_forget speak(id_type cmd_id, std::wstring_view const &text, bool is_ssml, bool is_cued, std::vector<wchar_t> &&buf, Marks const && marks);
    void register_metadata_handler_for_speech(id_type cmd_id, long index);
    bool cmd_id_is_current(id_type cmd_id) const noexcept { return current_cmd_id.load() == cmd_id; }
    void on_cue_entered(id_type cmd_id, const winrt::hstring &label, const SpeechCue &cue);
    // }}}
    winrt::fire_and_forget save(id_type cmd_id, std::wstring_view const &text, bool is_ssml, std::vector<wchar_t> &&buf, std::filesystem::path path);
    void start_save_stream(SpeechSynthesisStream const &&stream, std::filesystem::path path, id_type cmd_id);
    void initialize() {
        synth = SpeechSynthesizer();
        player = MediaPlayer();
        player.AudioCategory(MediaPlayerAudioCategory::Speech);
        player.AutoPlay(true);
    }
    void output(id_type cmd_id, std::string_view const& type, json_val const && x) {
        std::scoped_lock sl(recursive_lock);
        if (cmd_id_is_current(cmd_id)) ::output(cmd_id, type, std::move(x));
    }
    void stop_current_activity() {
        std::scoped_lock sl(recursive_lock);
        if (current_cmd_id.load()) {
            current_cmd_id.store(0);
            revoker = {};
            current_source = MediaSource{nullptr};
            current_stream = SpeechSynthesisStream{nullptr};
            current_item = MediaPlaybackItem{nullptr};
            player.Pause();
            current_text_storage = std::vector<wchar_t>();
            current_marks = Marks();
            last_reported_mark_index = -1;
        }
    }
    double volume() const {
        return synth.Options().AudioVolume();
    }
    void volume(double val) {
        if (val < 0 || val > 1) throw std::out_of_range("Invalid volume value must be between 0 and 1");
        std::scoped_lock sl(recursive_lock);
        synth.Options().AudioVolume(val);
    }
    double rate() const {
        return synth.Options().SpeakingRate();
    }
    void rate(double val) {
        if (val < 0.5 || val > 6.0) throw std::out_of_range("Invalid rate value must be between 0.5 and 6");
        std::scoped_lock sl(recursive_lock);
        synth.Options().SpeakingRate(val);
    }
    double pitch() const {
        return synth.Options().AudioPitch();
    }
    void pitch(double val) {
        if (val < 0 || val > 2) throw std::out_of_range("Invalid pitch value must be between 0 and 2");
        std::scoped_lock sl(recursive_lock);
        synth.Options().AudioPitch(val);
    }
    void pause() const {
        player.Pause();
    }
    void play() const {
        player.Play();
    }
    bool toggle() const {
        switch (player.PlaybackSession().PlaybackState()) {
            case MediaPlaybackState::Playing: pause(); return true;
            case MediaPlaybackState::Paused: play(); return true;
            default: return false;
        }
    }
    MediaPlaybackState playback_state() const {
        return player.PlaybackSession().PlaybackState();
    }
    DeviceInformation audio_device() const {
        return player.AudioDevice();
    }
    void audio_device(DeviceInformation const &di) const {
        player.AudioDevice(di);
    }
    VoiceInformation voice() const {
        return synth.Voice();
    }
    void voice(VoiceInformation const &v) const {
        return synth.Voice(v);
    }
 };
-static Synthesizer sx;
+static SpeechSynthesizer speech_synthesizer{nullptr};
 static MediaPlayer media_player{nullptr};
 static size_t
 decode_into(std::string_view src, std::wstring_view dest) {
@ -575,7 +465,7 @@ parse_cued_text(std::string_view src, Marks &marks, std::wstring_view dest) {
            src = src.substr(1, src.size() - 1);
            if (src.size() >= 4) {
                uint32_t mark = *((uint32_t*)src.data());
-                marks.emplace_back(mark, (uint32_t)dest_pos);
+                marks.entries.emplace_back(mark, (uint32_t)dest_pos);
                src = src.substr(4, src.size() - 4);
            }
        }
@ -605,124 +495,46 @@ read_from_shm(id_type cmd_id, const std::wstring_view size, const std::wstring &
 // Speak {{{
-void Synthesizer::on_cue_entered(id_type cmd_id, const winrt::hstring &label, const SpeechCue &cue) {
+static Revokers speak_revoker = {};
-    std::scoped_lock sl(recursive_lock);
+
-    if (!cmd_id_is_current(cmd_id)) return;
+static void
-    output(cmd_id, "cue_entered", json_val(label, cue));
+register_metadata_handler_for_track(MediaPlaybackTimedMetadataTrackList const &tracks, uint32_t index, id_type cmd_id, std::shared_ptr<Marks> marks) {
    TimedMetadataTrack track = tracks.GetAt(index);
    tracks.SetPresentationMode((unsigned int)index, TimedMetadataTrackPresentationMode::ApplicationPresented);
    speak_revoker.cue_entered.push_back(track.CueEntered(winrt::auto_revoke, [cmd_id, marks](auto track, const auto& args) {
        if (main_loop_is_running.load()) {
            auto label = track.Label();
            auto cue = args.Cue().template as<SpeechCue>();
            output(cmd_id, "cue_entered", {label, cue});
            if (label != L"SpeechWord") return;
            uint32_t pos = cue.StartPositionInInput().Value();
-    for (int32_t i = std::max(0, last_reported_mark_index); i < (int32_t)current_marks.size(); i++) {
+            for (int32_t i = std::max(0, marks->last_reported_mark_index); i < (int32_t)marks->entries.size(); i++) {
                int32_t idx = -1;
-        if (current_marks[i].pos_in_text > pos) {
+                if (marks->entries[i].pos_in_text > pos) {
                    idx = i-1;
-            if (idx == last_reported_mark_index && current_marks[i].pos_in_text - pos < 3) idx = i;
+                    if (idx == marks->last_reported_mark_index && marks->entries[i].pos_in_text - pos < 3) idx = i;
-        } else if (current_marks[i].pos_in_text == pos) idx = i;
+                } else if (marks->entries[i].pos_in_text == pos) idx = i;
                if (idx > -1) {
-            output(cmd_id, "mark_reached", {{"id", current_marks[idx].id}});
+                    output(cmd_id, "mark_reached", {{"id", marks->entries[idx].id}});
-            last_reported_mark_index = idx;
+                    marks->last_reported_mark_index = idx;
                    break;
                }
            }
 }
 void Synthesizer::register_metadata_handler_for_speech(id_type cmd_id, long index) {
    std::scoped_lock sl(recursive_lock);
    if (!cmd_id_is_current(cmd_id)) return;
    if (index < 0) {
        for (uint32_t i = 0; i < current_item.TimedMetadataTracks().Size(); i++) {
            register_metadata_handler_for_track(i, cmd_id);
        }
    } else {
        register_metadata_handler_for_track(index, cmd_id);
    }
 }
 void
 Synthesizer::register_metadata_handler_for_track(uint32_t index, id_type cmd_id) {
    TimedMetadataTrack track = current_item.TimedMetadataTracks().GetAt(index);
    std::scoped_lock sl(recursive_lock);
    if (current_cmd_id.load() != cmd_id) return;
    revoker.cue_entered.push_back(track.CueEntered(winrt::auto_revoke, [cmd_id](auto track, const auto& args) {
        if (main_loop_is_running.load()) sx.on_cue_entered(cmd_id, track.Label(), args.Cue().template as<SpeechCue>());
    }));
-    revoker.cue_exited.push_back(track.CueExited(winrt::auto_revoke, [cmd_id](auto track, const auto& args) {
+
-        if (main_loop_is_running.load()) sx.output(
+    speak_revoker.cue_exited.push_back(track.CueExited(winrt::auto_revoke, [cmd_id](auto track, const auto& args) {
        if (main_loop_is_running.load()) output(
            cmd_id, "cue_exited", json_val(track.Label(), args.Cue().template as<SpeechCue>()));
    }));
-    revoker.track_failed.push_back(track.TrackFailed(winrt::auto_revoke, [cmd_id](auto, const auto& args) {
+
-        if (main_loop_is_running.load()) sx.output(
+    speak_revoker.track_failed.push_back(track.TrackFailed(winrt::auto_revoke, [cmd_id](auto, const auto& args) {
        if (main_loop_is_running.load()) output(
            cmd_id, "track_failed", {});
    }));
-    current_item.TimedMetadataTracks().SetPresentationMode((unsigned int)index, TimedMetadataTrackPresentationMode::ApplicationPresented);
+};
 }
 void
 Synthesizer::load_stream_for_playback(SpeechSynthesisStream const &&stream, id_type cmd_id, bool is_cued) {
    std::scoped_lock sl(recursive_lock);
    if (cmd_id != current_cmd_id.load()) return;
    current_stream = stream;
    current_source = MediaSource::CreateFromStream(current_stream, current_stream.ContentType());
    revoker.playback_state_changed = player.PlaybackSession().PlaybackStateChanged(
            winrt::auto_revoke, [cmd_id](auto session, auto const&) {
        if (main_loop_is_running.load()) sx.output(
            cmd_id, "playback_state_changed", {{"state", session.PlaybackState()}});
    });
    revoker.media_opened = player.MediaOpened(winrt::auto_revoke, [cmd_id](auto player, auto const&) {
        if (main_loop_is_running.load()) sx.output(
            cmd_id, "media_state_changed", {{"state", "opened"}});
    });
    revoker.media_ended = player.MediaEnded(winrt::auto_revoke, [cmd_id](auto player, auto const&) {
        if (main_loop_is_running.load()) sx.output(
            cmd_id, "media_state_changed", {{"state", "ended"}});
    });
    revoker.media_failed = player.MediaFailed(winrt::auto_revoke, [cmd_id](auto player, auto const& args) {
        if (main_loop_is_running.load()) sx.output(
            cmd_id, "media_state_changed", {{"state", "failed"}, {"error", args.ErrorMessage()}, {"code", args.Error()}});
    });
    current_item = MediaPlaybackItem(current_source);
    revoker.timed_metadata_tracks_changed = current_item.TimedMetadataTracksChanged(winrt::auto_revoke,
        [cmd_id](auto, auto const &args) {
        auto change_type = args.CollectionChange();
        long index;
        switch (change_type) {
            case CollectionChange::ItemInserted: index = args.Index(); break;
            case CollectionChange::Reset: index = -1; break;
            default: index = -2; break;
        }
        if (index > -2 && main_loop_is_running.load()) sx.register_metadata_handler_for_speech(cmd_id, index);
    });
    register_metadata_handler_for_speech(cmd_id, -1);
    player.Source(current_item);
 }
 winrt::fire_and_forget Synthesizer::speak(id_type cmd_id, std::wstring_view const &text, bool is_ssml, bool is_cued, std::vector<wchar_t> &&buf, Marks const && marks) {
    SpeechSynthesisStream stream{nullptr};
    { std::scoped_lock sl(recursive_lock);
        stop_current_activity();
        current_cmd_id.store(cmd_id);
        current_text_storage = std::move(buf);
        current_marks = std::move(marks);
        synth.Options().IncludeSentenceBoundaryMetadata(true);
        synth.Options().IncludeWordBoundaryMetadata(true);
    }
    output(cmd_id, "synthesizing", {{"ssml", is_ssml}, {"num_marks", current_marks.size()}, {"text_length", text.size()}});
    bool ok = false;
    try {
        if (is_ssml) stream = co_await synth.SynthesizeSsmlToStreamAsync(text);
        else stream = co_await synth.SynthesizeTextToStreamAsync(text);
        ok = true;
    } CATCH_ALL_EXCEPTIONS("Failed to synthesize speech", cmd_id);
    if (ok) {
        if (main_loop_is_running.load()) {
            try {
                load_stream_for_playback(std::move(stream), cmd_id, is_cued);
            } CATCH_ALL_EXCEPTIONS("Failed to load synthesized stream for playback", cmd_id);
        }
    }
 }
 static void
 handle_speak(id_type cmd_id, std::vector<std::wstring_view> &parts) {
@ -736,11 +548,11 @@ handle_speak(id_type cmd_id, std::vector<std::wstring_view> &parts) {
    }
    parts.erase(parts.begin(), parts.begin() + 2);
    std::wstring address;
-    Marks marks;
+    auto marks = std::make_shared<Marks>();
    std::vector<wchar_t> buf;
    std::wstring_view text;
    if (is_shm) {
-        text = read_from_shm(cmd_id, parts.at(0), std::wstring(parts.at(1)), buf, marks, is_cued);
+        text = read_from_shm(cmd_id, parts.at(0), std::wstring(parts.at(1)), buf, *marks, is_cued);
        if (text.size() == 0) return;
    } else {
        address = join(parts);
@ -750,12 +562,62 @@ handle_speak(id_type cmd_id, std::vector<std::wstring_view> &parts) {
        address.copy(buf.data(), address.size());
    }
    *((wchar_t*)text.data() + text.size()) = 0;  // ensure NULL termination
-    sx.speak(cmd_id, text, is_ssml, is_cued, std::move(buf), std::move(marks));
+
    output(cmd_id, "synthesizing", {{"ssml", is_ssml}, {"num_marks", marks->entries.size()}, {"text_length", text.size()}});
    bool ok = false;
    SpeechSynthesisStream stream{nullptr};
    if (!run_catching_exceptions([&]() {
        speech_synthesizer.Options().IncludeSentenceBoundaryMetadata(true);
        speech_synthesizer.Options().IncludeWordBoundaryMetadata(true);
        if (is_ssml) stream = speech_synthesizer.SynthesizeSsmlToStreamAsync(text).get();
        else stream = speech_synthesizer.SynthesizeTextToStreamAsync(text).get();
        ok = true;
    }, "Failed to synthesize speech", __LINE__, cmd_id)) return;
    speak_revoker = {};  // delete any revokers previously installed
    MediaSource source(MediaSource::CreateFromStream(stream, stream.ContentType()));
    speak_revoker.playback_state_changed = media_player.PlaybackSession().PlaybackStateChanged(
            winrt::auto_revoke, [cmd_id](auto session, auto const&) {
        if (main_loop_is_running.load()) output(
            cmd_id, "playback_state_changed", {{"state", session.PlaybackState()}});
    });
    speak_revoker.media_opened = media_player.MediaOpened(winrt::auto_revoke, [cmd_id](auto player, auto const&) {
        if (main_loop_is_running.load()) output(
            cmd_id, "media_state_changed", {{"state", "opened"}});
    });
    speak_revoker.media_ended = media_player.MediaEnded(winrt::auto_revoke, [cmd_id](auto player, auto const&) {
        if (main_loop_is_running.load()) output(
            cmd_id, "media_state_changed", {{"state", "ended"}});
    });
    speak_revoker.media_failed = media_player.MediaFailed(winrt::auto_revoke, [cmd_id](auto player, auto const& args) {
        if (main_loop_is_running.load()) output(
            cmd_id, "media_state_changed", {{"state", "failed"}, {"error", args.ErrorMessage()}, {"code", args.Error()}});
    });
    auto playback_item = std::make_shared<MediaPlaybackItem>(source);
    speak_revoker.timed_metadata_tracks_changed = playback_item->TimedMetadataTracksChanged(winrt::auto_revoke,
        [cmd_id, playback_item_weak_ref = std::weak_ptr(playback_item), marks](auto, auto const &args) {
        auto change_type = args.CollectionChange();
        long index;
        switch (change_type) {
            case CollectionChange::ItemInserted: index = args.Index(); break;
            case CollectionChange::Reset: index = -1; break;
            default: index = -2; break;
        }
        auto pi{ playback_item_weak_ref.lock() };
        if (index > -2 && pi && main_loop_is_running.load()) register_metadata_handler_for_track(pi->TimedMetadataTracks(), index, cmd_id, marks);
    });
    for (uint32_t i = 0; i < playback_item->TimedMetadataTracks().Size(); i++) {
        register_metadata_handler_for_track(playback_item->TimedMetadataTracks(), i, cmd_id, marks);
    }
    media_player.Source(*playback_item);
 }
 // }}}
 // Save {{{
-static winrt::fire_and_forget
+static void
 save_stream(SpeechSynthesisStream const &&stream, std::filesystem::path path, id_type cmd_id) {
    unsigned long long stream_size = stream.Size(), bytes_read = 0;
    DataReader reader(stream);
@ -763,66 +625,26 @@ save_stream(SpeechSynthesisStream const &&stream, std::filesystem::path path, id
    const static unsigned int chunk_size = 16 * 1024;
    std::array<uint8_t, chunk_size> buf;
    std::ofstream outfile;
-    bool ok = false;
+    if (!run_catching_exceptions([&](){
    try {
        outfile.open(path.string(), std::ios::out | std::ios::trunc);
-        ok = true;
+    }, "Failed to create file: " + path.string(), __LINE__, cmd_id)) return;
-    } CATCH_ALL_EXCEPTIONS("Failed to create file: " + path.string(), cmd_id);
+
    if (!ok) co_return;
    while (bytes_read < stream_size) {
-        try {
+        if (!run_catching_exceptions([&]() {
-            n = co_await reader.LoadAsync(chunk_size);
+            n = reader.LoadAsync(chunk_size).get();
-            ok = true;
+        }, "Failed to load data from DataReader", __LINE__, cmd_id)) return;
        } CATCH_ALL_EXCEPTIONS("Failed to load data from DataReader", cmd_id);
        if (!ok) co_return;
        if (n > 0) {
            bytes_read += n;
-            ok = false;
+            if (!run_catching_exceptions([&]() {
            try {
                reader.ReadBytes(winrt::array_view(buf.data(), buf.data() + n));
                outfile.write((const char*)buf.data(), n);
                if (!outfile.good()) throw "Failed to write to output file";
-                ok = true;
+            }, "Failed to save bytes from DataReader to file", __LINE__, cmd_id)) return;
            } CATCH_ALL_EXCEPTIONS("Failed to save bytes from DataReader to file", cmd_id);
            if (!ok) co_return;
        }
    }
    output(cmd_id, "saved", {{"size", bytes_read}});
 }
 void
 Synthesizer::start_save_stream(SpeechSynthesisStream const &&stream, std::filesystem::path path, id_type cmd_id) {
    std::scoped_lock sl(recursive_lock);
    try {
        save_stream(std::move(stream), path, cmd_id);
    } CATCH_ALL_EXCEPTIONS("Failed to save loaded stream", cmd_id);
    stop_current_activity();
 }
 winrt::fire_and_forget Synthesizer::save(id_type cmd_id, std::wstring_view const &text, bool is_ssml, std::vector<wchar_t> &&buf, std::filesystem::path path) {
    SpeechSynthesisStream stream{nullptr};
    { std::scoped_lock sl(recursive_lock);
        stop_current_activity();
        current_cmd_id.store(cmd_id);
        current_text_storage = std::move(buf);
        synth.Options().IncludeSentenceBoundaryMetadata(false);
        synth.Options().IncludeWordBoundaryMetadata(false);
    }
    bool ok = false;
    try {
        if (is_ssml) stream = co_await synth.SynthesizeSsmlToStreamAsync(text);
        else stream = co_await synth.SynthesizeTextToStreamAsync(text);
        ok = true;
    } CATCH_ALL_EXCEPTIONS("Failed to synthesize speech", cmd_id);
    if (ok) {
        if (main_loop_is_running.load()) {
            try {
                sx.start_save_stream(std::move(stream), path, cmd_id);
            } CATCH_ALL_EXCEPTIONS("Failed to load synthesized stream for save", cmd_id);
        }
    }
 }
 static void
 handle_save(id_type cmd_id, std::vector<std::wstring_view> &parts) {
    bool is_ssml;
@ -841,7 +663,14 @@ handle_save(id_type cmd_id, std::vector<std::wstring_view> &parts) {
    auto filename = join(parts);
    auto path = std::filesystem::absolute(filename);
    output(cmd_id, "saving", {{"ssml", is_ssml}, {"output_path", path.string()}});
-    sx.save(cmd_id, text, is_ssml, std::move(buf), path);
+    SpeechSynthesisStream stream{nullptr};
    speech_synthesizer.Options().IncludeSentenceBoundaryMetadata(false);
    speech_synthesizer.Options().IncludeWordBoundaryMetadata(false);
    if (!run_catching_exceptions([&]() {
        if (is_ssml) stream = speech_synthesizer.SynthesizeSsmlToStreamAsync(text).get();
        else stream = speech_synthesizer.SynthesizeTextToStreamAsync(text).get();
    }, "Failed to synthesize speech", __LINE__, cmd_id)) return;
    save_stream(std::move(stream), path, cmd_id);
 }
 // }}}
@ -862,18 +691,17 @@ static const std::unordered_map<std::string, handler_function> handlers = {
    }},
    {"play", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
-        sx.play();
+        media_player.Play();
-        output(cmd_id, "play", {{"playback_state", sx.playback_state()}});
+        output(cmd_id, "play", {{"playback_state", media_player.PlaybackSession().PlaybackState()}});
    }},
    {"pause", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
-        sx.play();
+        media_player.Pause();
-        output(cmd_id, "pause", {{"playback_state", sx.playback_state()}});
+        output(cmd_id, "pause", {{"playback_state", media_player.PlaybackSession().PlaybackState()}});
    }},
    {"state", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
-        sx.play();
+        output(cmd_id, "state", {{"playback_state", media_player.PlaybackSession().PlaybackState()}});
        output(cmd_id, "state", {{"playback_state", sx.playback_state()}});
    }},
    {"default_voice", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
@ -895,25 +723,28 @@ static const std::unordered_map<std::string, handler_function> handlers = {
    {"volume", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
        if (parts.size()) {
            auto vol = parse_double(parts[0].data());
-            sx.volume(vol);
+            if (vol < 0 || vol > 1) throw std::out_of_range("Invalid volume value must be between 0 and 1");
            speech_synthesizer.Options().AudioVolume(vol);
        }
-        output(cmd_id, "volume", {{"value", sx.volume()}});
+        output(cmd_id, "volume", {{"value", speech_synthesizer.Options().AudioVolume()}});
    }},
    {"rate", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
        if (parts.size()) {
            auto rate = parse_double(parts[0].data());
-            sx.rate(rate);
+            if (rate < 0.5 || rate > 6.0) throw std::out_of_range("Invalid rate value must be between 0.5 and 6");
            speech_synthesizer.Options().SpeakingRate(rate);
        }
-        output(cmd_id, "rate", {{"value", sx.rate()}});
+        output(cmd_id, "rate", {{"value", speech_synthesizer.Options().SpeakingRate()}});
    }},
    {"pitch", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
        if (parts.size()) {
-            auto rate = parse_double(parts[0].data());
+            auto pitch = parse_double(parts[0].data());
-            sx.rate(rate);
+            if (pitch < 0 || pitch > 2) throw std::out_of_range("Invalid pitch value must be between 0 and 2");
            speech_synthesizer.Options().AudioPitch(pitch);
        }
-        output(cmd_id, "pitch", {{"pitch", sx.rate()}});
+        output(cmd_id, "pitch", {{"pitch", speech_synthesizer.Options().AudioPitch()}});
    }},
    {"save", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
@ -932,7 +763,7 @@ handle_stdin_message(winrt::hstring const &&msg) {
    bool ok = false;
    std::vector<std::wstring_view> parts;
    int64_t exit_code = -1;
-    try {
+    if (!run_catching_exceptions([&]() {
        parts = split(msg);
        command = parts.at(1); cmd_id = parse_id(parts.at(0));
        if (cmd_id == 0) {
@ -940,8 +771,7 @@ handle_stdin_message(winrt::hstring const &&msg) {
        }
        parts.erase(parts.begin(), parts.begin() + 2);
        ok = true;
-    } CATCH_ALL_EXCEPTIONS((std::string("Invalid input message: ") + winrt::to_string(msg)), 0);
+    }, "Invalid input message: " + winrt::to_string(msg), __LINE__)) return exit_code;
    if (ok) {
    handler_function handler;
    std::string cmd(winrt::to_string(command));
    try {
@ -950,46 +780,51 @@ handle_stdin_message(winrt::hstring const &&msg) {
        output_error(cmd_id, "Unknown command", cmd, __LINE__);
        return exit_code;
    }
-        try {
+    run_catching_exceptions([&]() {
        handler(cmd_id, parts, &exit_code);
-        } CATCH_ALL_EXCEPTIONS("Error handling input message", cmd_id);
+    }, "Error handling input message", __LINE__, cmd_id);
    }
    return exit_code;
 }
 static PyObject*
 run_main_loop(PyObject*, PyObject*) {
-    try {
+    if (!run_catching_exceptions([]() {
        std::cout.imbue(std::locale("C"));
        std::cin.imbue(std::locale("C"));
        std::cerr.imbue(std::locale("C"));
        std::wcin.imbue(std::locale("C"));
        std::wcout.imbue(std::locale("C"));
        std::wcerr.imbue(std::locale("C"));
-    } CATCH_ALL_EXCEPTIONS("Failed to set stdio locales to C", 0);
+    }, "Failed to set stdio locales to C", __LINE__)) {
-    winrt::init_apartment(winrt::apartment_type::multi_threaded);
+        return PyLong_FromLongLong(1);
-    main_thread_id = GetCurrentThreadId();
+    }
    MSG msg;
    int64_t exit_code = 0;
    bool ok = false;
    try {
        new (&sx) Synthesizer();
        sx.initialize();
        ok = true;
    } CATCH_ALL_EXCEPTIONS("Error initializing Synthesizer", 0);
    if (!ok) return PyLong_FromUnsignedLongLong(1);
-    Py_BEGIN_ALLOW_THREADS;
+    if (!run_catching_exceptions([]() {
-    main_loop_is_running.store(true);
+    winrt::init_apartment(winrt::apartment_type::multi_threaded);
-    PeekMessage(&msg, NULL, WM_USER, WM_USER, PM_NOREMOVE);  // ensure we have a message queue
+    }, "Failed to initialize COM", __LINE__)) {
        return PyLong_FromLongLong(1);
    }
    main_thread_id = GetCurrentThreadId();
    if (!run_catching_exceptions([]() {
        speech_synthesizer = SpeechSynthesizer();
        media_player = MediaPlayer();
        media_player.AudioCategory(MediaPlayerAudioCategory::Speech);
        media_player.AutoPlay(true);
    }, "Failed to initialize SpeechSynthesizer and MediaPlayer", __LINE__)) {
        return PyLong_FromLongLong(1);
    }
    if (_isatty(_fileno(stdin))) {
        std::cout << "Welcome to winspeech. Type exit to quit." << std::endl;
    }
    int64_t exit_code = -1;
    main_loop_is_running.store(true);
    Py_BEGIN_ALLOW_THREADS;
    std::string input_buffer;
-    while (true) {
+    while (exit_code < 0) {
        try {
            if (!std::getline(std::cin, input_buffer)) {
                if (!std::cin.eof()) exit_code = 1;
@ -997,7 +832,10 @@ run_main_loop(PyObject*, PyObject*) {
            }
            rtrim(input_buffer);
            if (input_buffer.size() > 0) {
-                if ((exit_code = handle_stdin_message(std::move(winrt::to_hstring(input_buffer)))) >= 0) break;
+                run_catching_exceptions([&]() {
                    exit_code = handle_stdin_message(std::move(winrt::to_hstring(input_buffer)));
                }, "Error handling STDIN message", __LINE__);
                if (exit_code >= 0) break;
            }
        } catch(...) {
            exit_code = 1;
@ -1005,14 +843,13 @@ run_main_loop(PyObject*, PyObject*) {
            break;
        }
    }
    main_loop_is_running.store(false);
    Py_END_ALLOW_THREADS;
    main_loop_is_running.store(false);
    try {
-        sx.stop_current_activity();
+        speech_synthesizer = SpeechSynthesizer{nullptr};
-        (&sx)->~Synthesizer();
+        media_player = MediaPlayer{nullptr};
-    } CATCH_ALL_EXCEPTIONS("Error stopping all activity", 0);
+    } catch(...) {}
    return PyLong_FromLongLong(exit_code);
 }