diff --git a/src/calibre/utils/windows/winspeech.cpp b/src/calibre/utils/windows/winspeech.cpp
index b75d23bfa8..17297f71fa 100644
--- a/src/calibre/utils/windows/winspeech.cpp
+++ b/src/calibre/utils/windows/winspeech.cpp
@@ -647,6 +647,23 @@ class Synthesizer {
     Revokers revoker;
     std::recursive_mutex recursive_lock;
 
+    void register_metadata_handler_for_track(TimedMetadataTrack const& track, id_type cmd_id) {
+        std::scoped_lock sl(recursive_lock);
+        if (current_cmd_id.load() != cmd_id) return;
+        track.CueEntered([cmd_id](auto, const auto&) {
+            if (main_loop_is_running.load()) sx.output(
+                cmd_id, "cue", {{"state", "entered"}});
+        });
+        track.CueExited([cmd_id](auto, const auto&) {
+            if (main_loop_is_running.load()) sx.output(
+                cmd_id, "cue", {{"state", "exited"}});
+        });
+        track.TrackFailed([cmd_id](auto, const auto&) {
+            if (main_loop_is_running.load()) sx.output(
+                cmd_id, "track_failed", {});
+        });
+    }
+
     void load_stream_for_playback(SpeechSynthesisStream const &stream, id_type cmd_id) {
         std::scoped_lock sl(recursive_lock);
         if (cmd_id != current_cmd_id.load()) return;
@@ -667,17 +684,46 @@ class Synthesizer {
             if (main_loop_is_running.load()) sx.output(
                 cmd_id, "media_state_changed", {{"state", json_val("failed")}, {"error", args.ErrorMessage()}, {"code", json_val(args.Error())}});
         });
-            current_stream = stream;
-            current_source = MediaSource::CreateFromStream(current_stream, current_stream.ContentType());
-            current_item = MediaPlaybackItem(current_source);
-            player.Source(current_item);
+        current_stream = stream;
+        current_source = MediaSource::CreateFromStream(current_stream, current_stream.ContentType());
+        current_item = MediaPlaybackItem(current_source);
+
+        revoker.timed_metadata_tracks_changed = current_item.TimedMetadataTracksChanged(winrt::auto_revoke,
+            [cmd_id](auto, auto const &args) {
+            auto change_type = args.CollectionChange();
+            long index;
+            switch (change_type) {
+                case CollectionChange::ItemInserted: index = args.Index(); break;
+                case CollectionChange::Reset: index = -1; break;
+                default: index = -2; break;
+            }
+            if (index > -2 && main_loop_is_running.load()) sx.register_metadata_handler_for_speech(cmd_id, index);
+        });
+        register_metadata_handler_for_speech(cmd_id, -1);
+
+        player.Source(current_item);
     }
+
     public:
+    void register_metadata_handler_for_speech(id_type cmd_id, long index) {
+        std::scoped_lock sl(recursive_lock);
+        if (!cmd_id_is_current(cmd_id)) return;
+        if (index < 0) {
+            for (auto const &track : current_item.TimedMetadataTracks()) {
+                register_metadata_handler_for_track(track, cmd_id);
+            }
+        } else {
+            register_metadata_handler_for_track(current_item.TimedMetadataTracks().GetAt(index), cmd_id);
+        }
+    }
+
     bool cmd_id_is_current(id_type cmd_id) const noexcept { return current_cmd_id.load() == cmd_id; }
+
     void output(id_type cmd_id, std::string_view const& type, json_val const && x) {
         std::scoped_lock sl(recursive_lock);
         if (cmd_id_is_current(cmd_id)) ::output(cmd_id, type, std::move(x));
     }
+
     void initialize() {
         synth = SpeechSynthesizer();
         synth.Options().IncludeSentenceBoundaryMetadata(true);