Refactor speech code to make it simpler and hopefully more robust

Also get rid of the catch macro
2025-07-09 03:04:10 -04:00 · 2023-01-29 14:58:54 +05:30 · 2023-01-29 14:58:54 +05:30 · cf0cc595b6
commit cf0cc595b6
parent 356091be76
1 changed files with 188 additions and 351 deletions
--- a/src/calibre/utils/windows/winspeech.cpp
+++ b/src/calibre/utils/windows/winspeech.cpp
@ -393,21 +393,29 @@ output_error(id_type cmd_id, std::string_view const &msg, std::string_view const
    output(cmd_id, "error", std::move(m));
 }

-#define CATCH_ALL_EXCEPTIONS(msg, cmd_id) \
-  catch(winrt::hresult_error const& ex) { \
-    output_error(cmd_id, msg, winrt::to_string(ex.message()), __LINE__, ex.to_abi()); \
-} catch(const std::system_error& ex) { \
-    output_error(cmd_id, msg, "system_error with code: " + std::to_string(ex.code().value()) + " and meaning: " + ex.what(), __LINE__); \
-} catch (std::exception const &ex) { \
-    output_error(cmd_id, msg, ex.what(), __LINE__); \
-} catch (std::string const &ex) { \
-    output_error(cmd_id, msg, ex, __LINE__); \
-} catch (std::wstring const &ex) { \
-    output_error(cmd_id, msg, winrt::to_string(ex), __LINE__); \
-} catch (...) { \
-    output_error(cmd_id, msg, "Unknown exception type was raised", __LINE__); \
+static bool
+run_catching_exceptions(std::function<void(void)> f, std::string_view const &msg, int64_t line, id_type cmd_id=0) {
+    bool ok = false;
+    try {
+        f();
+        ok = true;
+    } catch(winrt::hresult_error const& ex) {
+        output_error(cmd_id, msg, winrt::to_string(ex.message()), line, ex.to_abi());
+    } catch(const std::system_error& ex) {
+        output_error(cmd_id, msg, "system_error with code: " + std::to_string(ex.code().value()) + " and meaning: " + ex.what(), line);
+    } catch (std::exception const &ex) {
+        output_error(cmd_id, msg, ex.what(), line);
+    } catch (std::string const &ex) {
+        output_error(cmd_id, msg, ex, line);
+    } catch (std::wstring const &ex) {
+        output_error(cmd_id, msg, winrt::to_string(ex), line);
+    } catch (...) {
+        output_error(cmd_id, msg, "Unknown exception type was raised", line);
+    }
+    return ok;
 }

+
 struct Revokers {
    MediaPlaybackSession::PlaybackStateChanged_revoker playback_state_changed;
    MediaPlayer::MediaEnded_revoker media_ended; MediaPlayer::MediaOpened_revoker media_opened;
@ -424,132 +432,14 @@ struct Mark {
    Mark(uint32_t id, uint32_t pos) : id(id), pos_in_text(pos) {}
 };

-typedef std::vector<Mark> Marks;
-
-class Synthesizer {
-    private:
-    SpeechSynthesizer synth{nullptr};
-    MediaPlayer player{nullptr};
-    MediaSource current_source{nullptr};
-    SpeechSynthesisStream current_stream{nullptr};
-    MediaPlaybackItem current_item{nullptr};
-    std::vector<wchar_t> current_text_storage;
-    Marks current_marks;
+struct Marks {
+    std::vector<Mark> entries;
    int32_t last_reported_mark_index;
-    std::atomic<id_type> current_cmd_id;
-
-    Revokers revoker;
-    std::recursive_mutex recursive_lock;
-
-    public:
-    // Speak {{{
-    void register_metadata_handler_for_track(uint32_t index, id_type cmd_id);
-    void load_stream_for_playback(SpeechSynthesisStream const &&stream, id_type cmd_id, bool is_cued);
-    winrt::fire_and_forget speak(id_type cmd_id, std::wstring_view const &text, bool is_ssml, bool is_cued, std::vector<wchar_t> &&buf, Marks const && marks);
-    void register_metadata_handler_for_speech(id_type cmd_id, long index);
-    bool cmd_id_is_current(id_type cmd_id) const noexcept { return current_cmd_id.load() == cmd_id; }
-    void on_cue_entered(id_type cmd_id, const winrt::hstring &label, const SpeechCue &cue);
-    // }}}
-
-    winrt::fire_and_forget save(id_type cmd_id, std::wstring_view const &text, bool is_ssml, std::vector<wchar_t> &&buf, std::filesystem::path path);
-    void start_save_stream(SpeechSynthesisStream const &&stream, std::filesystem::path path, id_type cmd_id);
-
-    void initialize() {
-        synth = SpeechSynthesizer();
-        player = MediaPlayer();
-        player.AudioCategory(MediaPlayerAudioCategory::Speech);
-        player.AutoPlay(true);
-    }
-
-    void output(id_type cmd_id, std::string_view const& type, json_val const && x) {
-        std::scoped_lock sl(recursive_lock);
-        if (cmd_id_is_current(cmd_id)) ::output(cmd_id, type, std::move(x));
-    }
-
-    void stop_current_activity() {
-        std::scoped_lock sl(recursive_lock);
-        if (current_cmd_id.load()) {
-            current_cmd_id.store(0);
-            revoker = {};
-            current_source = MediaSource{nullptr};
-            current_stream = SpeechSynthesisStream{nullptr};
-            current_item = MediaPlaybackItem{nullptr};
-            player.Pause();
-            current_text_storage = std::vector<wchar_t>();
-            current_marks = Marks();
-            last_reported_mark_index = -1;
-        }
-    }
-
-    double volume() const {
-        return synth.Options().AudioVolume();
-    }
-
-    void volume(double val) {
-        if (val < 0 || val > 1) throw std::out_of_range("Invalid volume value must be between 0 and 1");
-        std::scoped_lock sl(recursive_lock);
-        synth.Options().AudioVolume(val);
-    }
-
-    double rate() const {
-        return synth.Options().SpeakingRate();
-    }
-
-    void rate(double val) {
-        if (val < 0.5 || val > 6.0) throw std::out_of_range("Invalid rate value must be between 0.5 and 6");
-        std::scoped_lock sl(recursive_lock);
-        synth.Options().SpeakingRate(val);
-    }
-
-    double pitch() const {
-        return synth.Options().AudioPitch();
-    }
-
-    void pitch(double val) {
-        if (val < 0 || val > 2) throw std::out_of_range("Invalid pitch value must be between 0 and 2");
-        std::scoped_lock sl(recursive_lock);
-        synth.Options().AudioPitch(val);
-    }
-
-    void pause() const {
-        player.Pause();
-    }
-
-    void play() const {
-        player.Play();
-    }
-
-    bool toggle() const {
-        switch (player.PlaybackSession().PlaybackState()) {
-            case MediaPlaybackState::Playing: pause(); return true;
-            case MediaPlaybackState::Paused: play(); return true;
-            default: return false;
-        }
-    }
-
-    MediaPlaybackState playback_state() const {
-        return player.PlaybackSession().PlaybackState();
-    }
-
-    DeviceInformation audio_device() const {
-        return player.AudioDevice();
-    }
-
-    void audio_device(DeviceInformation const &di) const {
-        player.AudioDevice(di);
-    }
-
-    VoiceInformation voice() const {
-        return synth.Voice();
-    }
-
-    void voice(VoiceInformation const &v) const {
-        return synth.Voice(v);
-    }
-
+    Marks() : entries(), last_reported_mark_index(-1) {}
 };

-static Synthesizer sx;
+static SpeechSynthesizer speech_synthesizer{nullptr};
+static MediaPlayer media_player{nullptr};

 static size_t
 decode_into(std::string_view src, std::wstring_view dest) {
@ -575,7 +465,7 @@ parse_cued_text(std::string_view src, Marks &marks, std::wstring_view dest) {
            src = src.substr(1, src.size() - 1);
            if (src.size() >= 4) {
                uint32_t mark = *((uint32_t*)src.data());
-                marks.emplace_back(mark, (uint32_t)dest_pos);
+                marks.entries.emplace_back(mark, (uint32_t)dest_pos);
                src = src.substr(4, src.size() - 4);
            }
        }
@ -605,124 +495,46 @@ read_from_shm(id_type cmd_id, const std::wstring_view size, const std::wstring &


 // Speak {{{
-void Synthesizer::on_cue_entered(id_type cmd_id, const winrt::hstring &label, const SpeechCue &cue) {
-    std::scoped_lock sl(recursive_lock);
-    if (!cmd_id_is_current(cmd_id)) return;
-    output(cmd_id, "cue_entered", json_val(label, cue));
+static Revokers speak_revoker = {};
+
+static void
+register_metadata_handler_for_track(MediaPlaybackTimedMetadataTrackList const &tracks, uint32_t index, id_type cmd_id, std::shared_ptr<Marks> marks) {
+    TimedMetadataTrack track = tracks.GetAt(index);
+    tracks.SetPresentationMode((unsigned int)index, TimedMetadataTrackPresentationMode::ApplicationPresented);
+
+    speak_revoker.cue_entered.push_back(track.CueEntered(winrt::auto_revoke, [cmd_id, marks](auto track, const auto& args) {
+        if (main_loop_is_running.load()) {
+            auto label = track.Label();
+            auto cue = args.Cue().template as<SpeechCue>();
+            output(cmd_id, "cue_entered", {label, cue});
            if (label != L"SpeechWord") return;
            uint32_t pos = cue.StartPositionInInput().Value();
-    for (int32_t i = std::max(0, last_reported_mark_index); i < (int32_t)current_marks.size(); i++) {
+            for (int32_t i = std::max(0, marks->last_reported_mark_index); i < (int32_t)marks->entries.size(); i++) {
                int32_t idx = -1;
-        if (current_marks[i].pos_in_text > pos) {
+                if (marks->entries[i].pos_in_text > pos) {
                    idx = i-1;
-            if (idx == last_reported_mark_index && current_marks[i].pos_in_text - pos < 3) idx = i;
-        } else if (current_marks[i].pos_in_text == pos) idx = i;
+                    if (idx == marks->last_reported_mark_index && marks->entries[i].pos_in_text - pos < 3) idx = i;
+                } else if (marks->entries[i].pos_in_text == pos) idx = i;
                if (idx > -1) {
-            output(cmd_id, "mark_reached", {{"id", current_marks[idx].id}});
-            last_reported_mark_index = idx;
+                    output(cmd_id, "mark_reached", {{"id", marks->entries[idx].id}});
+                    marks->last_reported_mark_index = idx;
                    break;
                }
            }
-}
-
-void Synthesizer::register_metadata_handler_for_speech(id_type cmd_id, long index) {
-    std::scoped_lock sl(recursive_lock);
-    if (!cmd_id_is_current(cmd_id)) return;
-    if (index < 0) {
-        for (uint32_t i = 0; i < current_item.TimedMetadataTracks().Size(); i++) {
-            register_metadata_handler_for_track(i, cmd_id);
        }
-    } else {
-        register_metadata_handler_for_track(index, cmd_id);
-    }
-}
-
-void
-Synthesizer::register_metadata_handler_for_track(uint32_t index, id_type cmd_id) {
-    TimedMetadataTrack track = current_item.TimedMetadataTracks().GetAt(index);
-    std::scoped_lock sl(recursive_lock);
-    if (current_cmd_id.load() != cmd_id) return;
-    revoker.cue_entered.push_back(track.CueEntered(winrt::auto_revoke, [cmd_id](auto track, const auto& args) {
-        if (main_loop_is_running.load()) sx.on_cue_entered(cmd_id, track.Label(), args.Cue().template as<SpeechCue>());
    }));
-    revoker.cue_exited.push_back(track.CueExited(winrt::auto_revoke, [cmd_id](auto track, const auto& args) {
-        if (main_loop_is_running.load()) sx.output(
+
+    speak_revoker.cue_exited.push_back(track.CueExited(winrt::auto_revoke, [cmd_id](auto track, const auto& args) {
+        if (main_loop_is_running.load()) output(
            cmd_id, "cue_exited", json_val(track.Label(), args.Cue().template as<SpeechCue>()));
    }));
-    revoker.track_failed.push_back(track.TrackFailed(winrt::auto_revoke, [cmd_id](auto, const auto& args) {
-        if (main_loop_is_running.load()) sx.output(
+
+    speak_revoker.track_failed.push_back(track.TrackFailed(winrt::auto_revoke, [cmd_id](auto, const auto& args) {
+        if (main_loop_is_running.load()) output(
            cmd_id, "track_failed", {});
    }));
-    current_item.TimedMetadataTracks().SetPresentationMode((unsigned int)index, TimedMetadataTrackPresentationMode::ApplicationPresented);
-}
+};

-void
-Synthesizer::load_stream_for_playback(SpeechSynthesisStream const &&stream, id_type cmd_id, bool is_cued) {
-    std::scoped_lock sl(recursive_lock);
-    if (cmd_id != current_cmd_id.load()) return;
-    current_stream = stream;
-    current_source = MediaSource::CreateFromStream(current_stream, current_stream.ContentType());
-
-    revoker.playback_state_changed = player.PlaybackSession().PlaybackStateChanged(
-            winrt::auto_revoke, [cmd_id](auto session, auto const&) {
-        if (main_loop_is_running.load()) sx.output(
-            cmd_id, "playback_state_changed", {{"state", session.PlaybackState()}});
-    });
-    revoker.media_opened = player.MediaOpened(winrt::auto_revoke, [cmd_id](auto player, auto const&) {
-        if (main_loop_is_running.load()) sx.output(
-            cmd_id, "media_state_changed", {{"state", "opened"}});
-    });
-    revoker.media_ended = player.MediaEnded(winrt::auto_revoke, [cmd_id](auto player, auto const&) {
-        if (main_loop_is_running.load()) sx.output(
-            cmd_id, "media_state_changed", {{"state", "ended"}});
-    });
-    revoker.media_failed = player.MediaFailed(winrt::auto_revoke, [cmd_id](auto player, auto const& args) {
-        if (main_loop_is_running.load()) sx.output(
-            cmd_id, "media_state_changed", {{"state", "failed"}, {"error", args.ErrorMessage()}, {"code", args.Error()}});
-    });
-    current_item = MediaPlaybackItem(current_source);
-
-    revoker.timed_metadata_tracks_changed = current_item.TimedMetadataTracksChanged(winrt::auto_revoke,
-        [cmd_id](auto, auto const &args) {
-        auto change_type = args.CollectionChange();
-        long index;
-        switch (change_type) {
-            case CollectionChange::ItemInserted: index = args.Index(); break;
-            case CollectionChange::Reset: index = -1; break;
-            default: index = -2; break;
-        }
-        if (index > -2 && main_loop_is_running.load()) sx.register_metadata_handler_for_speech(cmd_id, index);
-    });
-    register_metadata_handler_for_speech(cmd_id, -1);
-
-    player.Source(current_item);
-}
-
-winrt::fire_and_forget Synthesizer::speak(id_type cmd_id, std::wstring_view const &text, bool is_ssml, bool is_cued, std::vector<wchar_t> &&buf, Marks const && marks) {
-    SpeechSynthesisStream stream{nullptr};
-    { std::scoped_lock sl(recursive_lock);
-        stop_current_activity();
-        current_cmd_id.store(cmd_id);
-        current_text_storage = std::move(buf);
-        current_marks = std::move(marks);
-        synth.Options().IncludeSentenceBoundaryMetadata(true);
-        synth.Options().IncludeWordBoundaryMetadata(true);
-    }
-    output(cmd_id, "synthesizing", {{"ssml", is_ssml}, {"num_marks", current_marks.size()}, {"text_length", text.size()}});
-    bool ok = false;
-    try {
-        if (is_ssml) stream = co_await synth.SynthesizeSsmlToStreamAsync(text);
-        else stream = co_await synth.SynthesizeTextToStreamAsync(text);
-        ok = true;
-    } CATCH_ALL_EXCEPTIONS("Failed to synthesize speech", cmd_id);
-    if (ok) {
-        if (main_loop_is_running.load()) {
-            try {
-                load_stream_for_playback(std::move(stream), cmd_id, is_cued);
-            } CATCH_ALL_EXCEPTIONS("Failed to load synthesized stream for playback", cmd_id);
-        }
-    }
-}

 static void
 handle_speak(id_type cmd_id, std::vector<std::wstring_view> &parts) {
@ -736,11 +548,11 @@ handle_speak(id_type cmd_id, std::vector<std::wstring_view> &parts) {
    }
    parts.erase(parts.begin(), parts.begin() + 2);
    std::wstring address;
-    Marks marks;
+    auto marks = std::make_shared<Marks>();
    std::vector<wchar_t> buf;
    std::wstring_view text;
    if (is_shm) {
-        text = read_from_shm(cmd_id, parts.at(0), std::wstring(parts.at(1)), buf, marks, is_cued);
+        text = read_from_shm(cmd_id, parts.at(0), std::wstring(parts.at(1)), buf, *marks, is_cued);
        if (text.size() == 0) return;
    } else {
        address = join(parts);
@ -750,12 +562,62 @@ handle_speak(id_type cmd_id, std::vector<std::wstring_view> &parts) {
        address.copy(buf.data(), address.size());
    }
    *((wchar_t*)text.data() + text.size()) = 0;  // ensure NULL termination
-    sx.speak(cmd_id, text, is_ssml, is_cued, std::move(buf), std::move(marks));
+
+    output(cmd_id, "synthesizing", {{"ssml", is_ssml}, {"num_marks", marks->entries.size()}, {"text_length", text.size()}});
+    bool ok = false;
+    SpeechSynthesisStream stream{nullptr};
+    if (!run_catching_exceptions([&]() {
+        speech_synthesizer.Options().IncludeSentenceBoundaryMetadata(true);
+        speech_synthesizer.Options().IncludeWordBoundaryMetadata(true);
+        if (is_ssml) stream = speech_synthesizer.SynthesizeSsmlToStreamAsync(text).get();
+        else stream = speech_synthesizer.SynthesizeTextToStreamAsync(text).get();
+        ok = true;
+    }, "Failed to synthesize speech", __LINE__, cmd_id)) return;
+
+    speak_revoker = {};  // delete any revokers previously installed
+    MediaSource source(MediaSource::CreateFromStream(stream, stream.ContentType()));
+
+    speak_revoker.playback_state_changed = media_player.PlaybackSession().PlaybackStateChanged(
+            winrt::auto_revoke, [cmd_id](auto session, auto const&) {
+        if (main_loop_is_running.load()) output(
+            cmd_id, "playback_state_changed", {{"state", session.PlaybackState()}});
+    });
+    speak_revoker.media_opened = media_player.MediaOpened(winrt::auto_revoke, [cmd_id](auto player, auto const&) {
+        if (main_loop_is_running.load()) output(
+            cmd_id, "media_state_changed", {{"state", "opened"}});
+    });
+    speak_revoker.media_ended = media_player.MediaEnded(winrt::auto_revoke, [cmd_id](auto player, auto const&) {
+        if (main_loop_is_running.load()) output(
+            cmd_id, "media_state_changed", {{"state", "ended"}});
+    });
+    speak_revoker.media_failed = media_player.MediaFailed(winrt::auto_revoke, [cmd_id](auto player, auto const& args) {
+        if (main_loop_is_running.load()) output(
+            cmd_id, "media_state_changed", {{"state", "failed"}, {"error", args.ErrorMessage()}, {"code", args.Error()}});
+    });
+    auto playback_item = std::make_shared<MediaPlaybackItem>(source);
+
+    speak_revoker.timed_metadata_tracks_changed = playback_item->TimedMetadataTracksChanged(winrt::auto_revoke,
+        [cmd_id, playback_item_weak_ref = std::weak_ptr(playback_item), marks](auto, auto const &args) {
+        auto change_type = args.CollectionChange();
+        long index;
+        switch (change_type) {
+            case CollectionChange::ItemInserted: index = args.Index(); break;
+            case CollectionChange::Reset: index = -1; break;
+            default: index = -2; break;
+        }
+        auto pi{ playback_item_weak_ref.lock() };
+        if (index > -2 && pi && main_loop_is_running.load()) register_metadata_handler_for_track(pi->TimedMetadataTracks(), index, cmd_id, marks);
+    });
+
+    for (uint32_t i = 0; i < playback_item->TimedMetadataTracks().Size(); i++) {
+        register_metadata_handler_for_track(playback_item->TimedMetadataTracks(), i, cmd_id, marks);
+    }
+    media_player.Source(*playback_item);
 }
 // }}}

 // Save {{{
-static winrt::fire_and_forget
+static void
 save_stream(SpeechSynthesisStream const &&stream, std::filesystem::path path, id_type cmd_id) {
    unsigned long long stream_size = stream.Size(), bytes_read = 0;
    DataReader reader(stream);
@ -763,66 +625,26 @@ save_stream(SpeechSynthesisStream const &&stream, std::filesystem::path path, id
    const static unsigned int chunk_size = 16 * 1024;
    std::array<uint8_t, chunk_size> buf;
    std::ofstream outfile;
-    bool ok = false;
-    try {
+    if (!run_catching_exceptions([&](){
        outfile.open(path.string(), std::ios::out | std::ios::trunc);
-        ok = true;
-    } CATCH_ALL_EXCEPTIONS("Failed to create file: " + path.string(), cmd_id);
-    if (!ok) co_return;
+    }, "Failed to create file: " + path.string(), __LINE__, cmd_id)) return;
+
    while (bytes_read < stream_size) {
-        try {
-            n = co_await reader.LoadAsync(chunk_size);
-            ok = true;
-        } CATCH_ALL_EXCEPTIONS("Failed to load data from DataReader", cmd_id);
-        if (!ok) co_return;
+        if (!run_catching_exceptions([&]() {
+            n = reader.LoadAsync(chunk_size).get();
+        }, "Failed to load data from DataReader", __LINE__, cmd_id)) return;
        if (n > 0) {
            bytes_read += n;
-            ok = false;
-            try {
+            if (!run_catching_exceptions([&]() {
                reader.ReadBytes(winrt::array_view(buf.data(), buf.data() + n));
                outfile.write((const char*)buf.data(), n);
                if (!outfile.good()) throw "Failed to write to output file";
-                ok = true;
-            } CATCH_ALL_EXCEPTIONS("Failed to save bytes from DataReader to file", cmd_id);
-            if (!ok) co_return;
+            }, "Failed to save bytes from DataReader to file", __LINE__, cmd_id)) return;
        }
    }
    output(cmd_id, "saved", {{"size", bytes_read}});
 }

-void
-Synthesizer::start_save_stream(SpeechSynthesisStream const &&stream, std::filesystem::path path, id_type cmd_id) {
-    std::scoped_lock sl(recursive_lock);
-    try {
-        save_stream(std::move(stream), path, cmd_id);
-    } CATCH_ALL_EXCEPTIONS("Failed to save loaded stream", cmd_id);
-    stop_current_activity();
-}
-
-winrt::fire_and_forget Synthesizer::save(id_type cmd_id, std::wstring_view const &text, bool is_ssml, std::vector<wchar_t> &&buf, std::filesystem::path path) {
-    SpeechSynthesisStream stream{nullptr};
-    { std::scoped_lock sl(recursive_lock);
-        stop_current_activity();
-        current_cmd_id.store(cmd_id);
-        current_text_storage = std::move(buf);
-        synth.Options().IncludeSentenceBoundaryMetadata(false);
-        synth.Options().IncludeWordBoundaryMetadata(false);
-    }
-    bool ok = false;
-    try {
-        if (is_ssml) stream = co_await synth.SynthesizeSsmlToStreamAsync(text);
-        else stream = co_await synth.SynthesizeTextToStreamAsync(text);
-        ok = true;
-    } CATCH_ALL_EXCEPTIONS("Failed to synthesize speech", cmd_id);
-    if (ok) {
-        if (main_loop_is_running.load()) {
-            try {
-                sx.start_save_stream(std::move(stream), path, cmd_id);
-            } CATCH_ALL_EXCEPTIONS("Failed to load synthesized stream for save", cmd_id);
-        }
-    }
-}
-
 static void
 handle_save(id_type cmd_id, std::vector<std::wstring_view> &parts) {
    bool is_ssml;
@ -841,7 +663,14 @@ handle_save(id_type cmd_id, std::vector<std::wstring_view> &parts) {
    auto filename = join(parts);
    auto path = std::filesystem::absolute(filename);
    output(cmd_id, "saving", {{"ssml", is_ssml}, {"output_path", path.string()}});
-    sx.save(cmd_id, text, is_ssml, std::move(buf), path);
+    SpeechSynthesisStream stream{nullptr};
+    speech_synthesizer.Options().IncludeSentenceBoundaryMetadata(false);
+    speech_synthesizer.Options().IncludeWordBoundaryMetadata(false);
+    if (!run_catching_exceptions([&]() {
+        if (is_ssml) stream = speech_synthesizer.SynthesizeSsmlToStreamAsync(text).get();
+        else stream = speech_synthesizer.SynthesizeTextToStreamAsync(text).get();
+    }, "Failed to synthesize speech", __LINE__, cmd_id)) return;
+    save_stream(std::move(stream), path, cmd_id);
 }
 // }}}

@ -862,18 +691,17 @@ static const std::unordered_map<std::string, handler_function> handlers = {
    }},

    {"play", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
-        sx.play();
-        output(cmd_id, "play", {{"playback_state", sx.playback_state()}});
+        media_player.Play();
+        output(cmd_id, "play", {{"playback_state", media_player.PlaybackSession().PlaybackState()}});
    }},

    {"pause", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
-        sx.play();
-        output(cmd_id, "pause", {{"playback_state", sx.playback_state()}});
+        media_player.Pause();
+        output(cmd_id, "pause", {{"playback_state", media_player.PlaybackSession().PlaybackState()}});
    }},

    {"state", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
-        sx.play();
-        output(cmd_id, "state", {{"playback_state", sx.playback_state()}});
+        output(cmd_id, "state", {{"playback_state", media_player.PlaybackSession().PlaybackState()}});
    }},

    {"default_voice", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
@ -895,25 +723,28 @@ static const std::unordered_map<std::string, handler_function> handlers = {
    {"volume", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
        if (parts.size()) {
            auto vol = parse_double(parts[0].data());
-            sx.volume(vol);
+            if (vol < 0 || vol > 1) throw std::out_of_range("Invalid volume value must be between 0 and 1");
+            speech_synthesizer.Options().AudioVolume(vol);
        }
-        output(cmd_id, "volume", {{"value", sx.volume()}});
+        output(cmd_id, "volume", {{"value", speech_synthesizer.Options().AudioVolume()}});
    }},

    {"rate", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
        if (parts.size()) {
            auto rate = parse_double(parts[0].data());
-            sx.rate(rate);
+            if (rate < 0.5 || rate > 6.0) throw std::out_of_range("Invalid rate value must be between 0.5 and 6");
+            speech_synthesizer.Options().SpeakingRate(rate);
        }
-        output(cmd_id, "rate", {{"value", sx.rate()}});
+        output(cmd_id, "rate", {{"value", speech_synthesizer.Options().SpeakingRate()}});
    }},

    {"pitch", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
        if (parts.size()) {
-            auto rate = parse_double(parts[0].data());
-            sx.rate(rate);
+            auto pitch = parse_double(parts[0].data());
+            if (pitch < 0 || pitch > 2) throw std::out_of_range("Invalid pitch value must be between 0 and 2");
+            speech_synthesizer.Options().AudioPitch(pitch);
        }
-        output(cmd_id, "pitch", {{"pitch", sx.rate()}});
+        output(cmd_id, "pitch", {{"pitch", speech_synthesizer.Options().AudioPitch()}});
    }},

    {"save", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
@ -932,7 +763,7 @@ handle_stdin_message(winrt::hstring const &&msg) {
    bool ok = false;
    std::vector<std::wstring_view> parts;
    int64_t exit_code = -1;
-    try {
+    if (!run_catching_exceptions([&]() {
        parts = split(msg);
        command = parts.at(1); cmd_id = parse_id(parts.at(0));
        if (cmd_id == 0) {
@ -940,8 +771,7 @@ handle_stdin_message(winrt::hstring const &&msg) {
        }
        parts.erase(parts.begin(), parts.begin() + 2);
        ok = true;
-    } CATCH_ALL_EXCEPTIONS((std::string("Invalid input message: ") + winrt::to_string(msg)), 0);
-    if (ok) {
+    }, "Invalid input message: " + winrt::to_string(msg), __LINE__)) return exit_code;
    handler_function handler;
    std::string cmd(winrt::to_string(command));
    try {
@ -950,46 +780,51 @@ handle_stdin_message(winrt::hstring const &&msg) {
        output_error(cmd_id, "Unknown command", cmd, __LINE__);
        return exit_code;
    }
-        try {
+    run_catching_exceptions([&]() {
        handler(cmd_id, parts, &exit_code);
-        } CATCH_ALL_EXCEPTIONS("Error handling input message", cmd_id);
-    }
+    }, "Error handling input message", __LINE__, cmd_id);
    return exit_code;
 }

-
 static PyObject*
 run_main_loop(PyObject*, PyObject*) {
-    try {
+    if (!run_catching_exceptions([]() {
        std::cout.imbue(std::locale("C"));
        std::cin.imbue(std::locale("C"));
        std::cerr.imbue(std::locale("C"));
        std::wcin.imbue(std::locale("C"));
        std::wcout.imbue(std::locale("C"));
        std::wcerr.imbue(std::locale("C"));
-    } CATCH_ALL_EXCEPTIONS("Failed to set stdio locales to C", 0);
-    winrt::init_apartment(winrt::apartment_type::multi_threaded);
-    main_thread_id = GetCurrentThreadId();
-    MSG msg;
-    int64_t exit_code = 0;
-    bool ok = false;
-    try {
-        new (&sx) Synthesizer();
-        sx.initialize();
-        ok = true;
-    } CATCH_ALL_EXCEPTIONS("Error initializing Synthesizer", 0);
-    if (!ok) return PyLong_FromUnsignedLongLong(1);
+    }, "Failed to set stdio locales to C", __LINE__)) {
+        return PyLong_FromLongLong(1);
+    }

-    Py_BEGIN_ALLOW_THREADS;
-    main_loop_is_running.store(true);
-    PeekMessage(&msg, NULL, WM_USER, WM_USER, PM_NOREMOVE);  // ensure we have a message queue
+    if (!run_catching_exceptions([]() {
+    winrt::init_apartment(winrt::apartment_type::multi_threaded);
+    }, "Failed to initialize COM", __LINE__)) {
+        return PyLong_FromLongLong(1);
+    }
+
+    main_thread_id = GetCurrentThreadId();
+
+    if (!run_catching_exceptions([]() {
+        speech_synthesizer = SpeechSynthesizer();
+        media_player = MediaPlayer();
+        media_player.AudioCategory(MediaPlayerAudioCategory::Speech);
+        media_player.AutoPlay(true);
+    }, "Failed to initialize SpeechSynthesizer and MediaPlayer", __LINE__)) {
+        return PyLong_FromLongLong(1);
+    }

    if (_isatty(_fileno(stdin))) {
        std::cout << "Welcome to winspeech. Type exit to quit." << std::endl;
    }
+    int64_t exit_code = -1;
+    main_loop_is_running.store(true);

+    Py_BEGIN_ALLOW_THREADS;
    std::string input_buffer;
-    while (true) {
+    while (exit_code < 0) {
        try {
            if (!std::getline(std::cin, input_buffer)) {
                if (!std::cin.eof()) exit_code = 1;
@ -997,7 +832,10 @@ run_main_loop(PyObject*, PyObject*) {
            }
            rtrim(input_buffer);
            if (input_buffer.size() > 0) {
-                if ((exit_code = handle_stdin_message(std::move(winrt::to_hstring(input_buffer)))) >= 0) break;
+                run_catching_exceptions([&]() {
+                    exit_code = handle_stdin_message(std::move(winrt::to_hstring(input_buffer)));
+                }, "Error handling STDIN message", __LINE__);
+                if (exit_code >= 0) break;
            }
        } catch(...) {
            exit_code = 1;
@ -1005,14 +843,13 @@ run_main_loop(PyObject*, PyObject*) {
            break;
        }
    }
-
-    main_loop_is_running.store(false);
    Py_END_ALLOW_THREADS;

+    main_loop_is_running.store(false);
    try {
-        sx.stop_current_activity();
-        (&sx)->~Synthesizer();
-    } CATCH_ALL_EXCEPTIONS("Error stopping all activity", 0);
+        speech_synthesizer = SpeechSynthesizer{nullptr};
+        media_player = MediaPlayer{nullptr};
+    } catch(...) {}

    return PyLong_FromLongLong(exit_code);
 }