Parse winspeech messages into python tuples and enums

2025-07-07 10:14:46 -04:00 · 2023-01-30 20:21:46 +05:30 · 2023-01-30 20:21:46 +05:30 · f0f4d95202
commit f0f4d95202
parent ff0766bda0
2 changed files with 279 additions and 20 deletions
--- a/src/calibre/utils/windows/winspeech.cpp
+++ b/src/calibre/utils/windows/winspeech.cpp
@ -267,6 +267,19 @@ public:
        }
    }

+    json_val(TimedMetadataTrackErrorCode const ec) : type(DT_STRING) {
+        switch(ec) {
+            case TimedMetadataTrackErrorCode::DataFormatError:
+                s = "data_format_error"; break;
+            case TimedMetadataTrackErrorCode::NetworkError:
+                s = "network_error"; break;
+            case TimedMetadataTrackErrorCode::InternalError:
+                s = "internal_error"; break;
+            case TimedMetadataTrackErrorCode::None:
+                s = "none"; break;
+        }
+    }
+
    json_val(DeviceInformationKind const dev) : type(DT_STRING) {
        switch(dev) {
            case DeviceInformationKind::Unknown:
@ -314,7 +327,6 @@ public:
            case MediaPlaybackState::Buffering: s = "buffering"; break;
            case MediaPlaybackState::Playing: s = "playing"; break;
            case MediaPlaybackState::Paused: s = "paused"; break;
-            default: s = "unknown"; break;
        }
    }

@ -326,7 +338,6 @@ public:
            case MediaPlayerError::NetworkError: s = "network_error"; break;
            case MediaPlayerError::DecodingError: s = "decoding_error"; break;
            case MediaPlayerError::SourceNotSupported: s = "source_not_supported"; break;
-            default: s = "unknown"; break;
        }
    }

@ -520,8 +531,9 @@ register_metadata_handler_for_track(MediaPlaybackTimedMetadataTrackList const &t
    }));

    speak_revoker.track_failed.emplace_back(track.TrackFailed(winrt::auto_revoke, [cmd_id](auto, const auto& args) {
+        auto error = args.Error();
        if (main_loop_is_running.load()) output(
-            cmd_id, "track_failed", {});
+            cmd_id, "track_failed", {{"code", error.ErrorCode()}, {"hr", json_val::from_hresult(error.ExtendedError())}});
    }));
 };

@ -580,7 +592,7 @@ handle_speak(id_type cmd_id, std::vector<std::wstring_view> &parts) {
    });
    speak_revoker.media_failed = media_player.MediaFailed(winrt::auto_revoke, [cmd_id](auto player, auto const& args) {
        if (main_loop_is_running.load()) output(
-            cmd_id, "media_state_changed", {{"state", "failed"}, {"error", args.ErrorMessage()}, {"code", args.Error()}});
+            cmd_id, "media_state_changed", {{"state", "failed"}, {"error", args.ErrorMessage()}, {"hr", json_val::from_hresult(args.ExtendedErrorCode())}, {"code", args.Error()}});
    });
    auto playback_item = std::make_shared<MediaPlaybackItem>(source);

@ -736,8 +748,8 @@ static const std::unordered_map<std::string, handler_function> handlers = {
            }
        }
        auto x = media_player.AudioDevice();
-        if (x) output(cmd_id, "audio_device", {{"value", x}, {"found", found}});
-        else output(cmd_id, "audio_device", {{"value", ""}, {"found", found}});
+        if (x) output(cmd_id, "audio_device", {{"device", x}, {"found", found}});
+        else output(cmd_id, "audio_device", {{"device", ""}, {"found", found}});
    }},

    {"voice", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
@ -781,7 +793,7 @@ static const std::unordered_map<std::string, handler_function> handlers = {
            if (pitch < 0 || pitch > 2) throw std::out_of_range("Invalid pitch value must be between 0 and 2");
            speech_synthesizer.Options().AudioPitch(pitch);
        }
-        output(cmd_id, "pitch", {{"pitch", speech_synthesizer.Options().AudioPitch()}});
+        output(cmd_id, "pitch", {{"value", speech_synthesizer.Options().AudioPitch()}});
    }},

    {"save", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
--- a/src/calibre/utils/windows/winspeech.py
+++ b/src/calibre/utils/windows/winspeech.py
@ -7,8 +7,11 @@ import os
 import struct
 import sys
 from contextlib import closing
+from enum import Enum, auto
+from itertools import count
 from queue import Queue
 from threading import Thread
+from typing import NamedTuple, Tuple

 from calibre.utils.ipc.simple_worker import start_pipe_worker
 from calibre.utils.shm import SharedMemory
@ -21,14 +24,6 @@ SSML_SAMPLE = '''
 </speak>
 '''

-def decode_msg(line: bytes) -> dict:
-    parts = line.strip().split(b' ', 2)
-    msg_id, msg_type, ans = int(parts[0]), parts[1].decode(), json.loads(parts[2])
-    ans['related_to'] = msg_id
-    ans['payload_type'] = msg_type
-    return ans
-
-
 def start_worker():
    return start_pipe_worker('from calibre_extensions.winspeech import run_main_loop; raise SystemExit(run_main_loop())')

@ -62,6 +57,257 @@ def encode_to_file_object(text, output) -> int:
    return sz


+# message decoding {{{
+class Saving(NamedTuple):
+    related_to: int
+    ssml: bool
+    output_path: str
+
+
+class Saved(NamedTuple):
+    related_to: int
+    size: int
+
+
+class CueEntered(NamedTuple):
+    related_to: int
+    start_pos_in_text: int
+    end_pos_in_text: int
+    start_time: int
+    type: str
+    text: str
+
+
+class CueExited(CueEntered):
+    related_to: int
+    start_pos_in_text: int
+    end_pos_in_text: int
+    start_time: int
+    type: str
+
+
+class MarkReached(NamedTuple):
+    related_to: int
+    id: int
+
+
+class Error(NamedTuple):
+    msg: str
+    error: str = ''
+    line: int = 0
+    file: str = 'winspeech.py'
+    hr: str = ''
+    related_to: int = 0
+
+
+class Synthesizing(NamedTuple):
+    related_to: int
+    ssml: bool
+    num_marks: int
+    text_length: int
+
+
+class TrackFailed(NamedTuple):
+    related_to: int
+    code: str
+    hr: str
+
+
+class PlaybackState(Enum):
+    none = auto()
+    opening = auto()
+    buffering = auto()
+    playing = auto()
+    paused = auto()
+
+
+class PlaybackStateChanged(NamedTuple):
+    related_to: int
+    state: PlaybackState
+
+
+class MediaState(Enum):
+    opened = auto()
+    ended = auto()
+    failed = auto()
+
+
+class MediaPlayerError(Enum):
+    unknown = auto()
+    aborted = auto()
+    network_error = auto()
+    decoding_error = auto()
+    source_not_supported = auto()
+
+
+class MediaStateChanged(NamedTuple):
+    related_to: int
+    state: MediaState
+    error: str = ""
+    code: MediaPlayerError = MediaPlayerError.unknown
+    hr: str = ""
+
+
+class Echo(NamedTuple):
+    related_to: int
+    msg: str
+
+
+class Play(NamedTuple):
+    related_to: int
+    playback_state: PlaybackState
+
+
+class Pause(NamedTuple):
+    related_to: int
+    playback_state: PlaybackState
+
+
+class State(NamedTuple):
+    related_to: int
+    playback_state: PlaybackState
+
+
+class VoiceInformation(NamedTuple):
+    display_name: str
+    description: str
+    id: str
+    language: str
+    gender: str
+
+
+class DefaultVoice(NamedTuple):
+    related_to: int
+    voice: VoiceInformation
+
+
+class Voice(NamedTuple):
+    related_to: int
+    voice: VoiceInformation
+    found: bool = True
+
+
+class DeviceInformation(NamedTuple):
+    id: str
+    name: str
+    kind: str
+    is_default: bool
+    is_enabled: bool
+
+
+class AudioDevice(NamedTuple):
+    related_to: int
+    device: DeviceInformation
+    found: bool = True
+
+
+class AllVoices(NamedTuple):
+    related_to: int
+    voices: Tuple[VoiceInformation, ...]
+
+
+class Volume(NamedTuple):
+    related_to: int
+    value: float
+
+
+class Rate(NamedTuple):
+    related_to: int
+    value: float
+
+
+class Pitch(NamedTuple):
+    related_to: int
+    value: float
+
+
+def parse_message(line):
+    parts = line.strip().split(b' ', 2)
+    msg_id, msg_type, ans = int(parts[0]), parts[1].decode(), json.loads(parts[2])
+    ans['related_to'] = msg_id
+    if msg_type == 'cue_entered':
+        return CueEntered(**ans)
+    if msg_type == 'cue_exited':
+        return CueExited(**ans)
+    if msg_type == 'mark_reached':
+        return MarkReached(**ans)
+    if msg_type == 'playback_state_changed':
+        ans['state'] = getattr(PlaybackState, ans['state'])
+        return PlaybackStateChanged(**ans)
+    if msg_type == 'media_state_changed':
+        ans['state'] = getattr(MediaState, ans['state'])
+        if 'code' in ans:
+            ans['code'] = MediaPlayerError(ans['code'])
+        return MediaStateChanged(**ans)
+    if msg_type == 'error':
+        return Error(**ans)
+    if msg_type == 'synthesizing':
+        return Synthesizing(**ans)
+    if msg_type == 'track_failed':
+        return TrackFailed(**ans)
+    if msg_type == 'saving':
+        return Saving(**ans)
+    if msg_type == 'saved':
+        return Saved(**ans)
+    if msg_type == 'echo':
+        return Echo(**ans)
+    if msg_type == 'play':
+        ans['playback_state'] = getattr(PlaybackState, ans['playback_state'])
+        return Play(**ans)
+    if msg_type == 'pause':
+        ans['playback_state'] = getattr(PlaybackState, ans['playback_state'])
+        return Pause(**ans)
+    if msg_type == 'state':
+        ans['playback_state'] = getattr(PlaybackState, ans['playback_state'])
+        return State(**ans)
+    if msg_type == 'default_voice':
+        ans['voice'] = VoiceInformation(**ans['voice'])
+        return DefaultVoice(**ans)
+    if msg_type == 'all_voices':
+        ans['voices'] = tuple(VoiceInformation(**x) for x in ans['voices'])
+        return AllVoices(**ans)
+    if msg_type == 'all_audio_devices':
+        ans['devices'] = tuple(DeviceInformation(**x) for x in ans['devices'])
+        return AudioDevice(**ans)
+    if msg_type == 'audio_device':
+        return AudioDevice(**ans)
+    if msg_type == 'voice':
+        ans['voice'] = VoiceInformation(**ans['voice'])
+        return Voice(**ans)
+    if msg_type == 'volume':
+        return Volume(**ans)
+    if msg_type == 'rate':
+        return Rate(**ans)
+    if msg_type == 'Pitch':
+        return Pitch(**ans)
+    return Error(f'Unknown message type: {msg_type}')
+# }}}
+
+
+class WinSpeech:
+
+    def __init__(self):
+        self._worker = None
+        self.queue = Queue()
+        self.msg_id_counter = count()
+        next(self.msg_id_counter)
+
+    @property
+    def worker(self):
+        if self._worker is None:
+            self._worker = start_worker()
+            Thread(name='WinspeechQueue', target=self._get_messages, args=(self._worker, self.queue), daemon=True).start()
+        return self._worker
+
+    def _get_messages(self, worker, queue):
+        try:
+            for line in worker.stdout:
+                queue.put(line.decode('utf-8'))
+        except OSError as e:
+            line = ('0 error ' + json.dumps({"msg": "Failed to read from worker", "error": str(e), "file": "winspeech.py", "line": 0}))
+            queue.put(line)
+
+
 def develop_loop(*commands):
    p = start_worker()
    q = Queue()
@ -70,7 +316,7 @@ def develop_loop(*commands):
        for line in p.stdout:
            sys.stdout.buffer.write(b'\x1b[33m' + line + b'\x1b[39m]]'[:-2])
            sys.stdout.buffer.flush()
-            q.put(decode_msg(line))
+            q.put(parse_message(line))

    def send(*a):
        cmd = ' '.join(map(str, a)) + '\n'
@ -89,13 +335,13 @@ def develop_loop(*commands):
                else:
                    while True:
                        m = q.get()
-                        if m['related_to'] != command:
+                        if m.related_to != command:
                            continue
-                        if m['payload_type'] == 'media_state_changed' and m['state'] == 'ended':
+                        if isinstance(m, MediaStateChanged) and m.state in (MediaState.ended, MediaState.failed):
                            break
-                        if m['payload_type'] == 'saved':
+                        if isinstance(m, Saved):
                            break
-                        if m['payload_type'] == 'error':
+                        if isinstance(m, Error):
                            exit_code = 1
                            break
            send(f'333 echo Synthesizer exiting with exit code: {exit_code}')
@ -137,6 +383,7 @@ def develop_save(text='Lucca Brazzi sleeps with the fishes.', filename="speech.w

 def develop_interactive():
    import subprocess
+
    from calibre.debug import run_calibre_debug
    print('\x1b[32mInteractive winspeech', '\x1b[39m]]'[:-2], flush=True)
    p = run_calibre_debug('-c', 'from calibre_extensions.winspeech import run_main_loop; raise SystemExit(run_main_loop())',