From f0f4d952020c8d6eeb87aef11d69e5d6b0ce34b9 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 30 Jan 2023 20:21:46 +0530 Subject: [PATCH] Parse winspeech messages into python tuples and enums --- src/calibre/utils/windows/winspeech.cpp | 26 ++- src/calibre/utils/windows/winspeech.py | 273 ++++++++++++++++++++++-- 2 files changed, 279 insertions(+), 20 deletions(-) diff --git a/src/calibre/utils/windows/winspeech.cpp b/src/calibre/utils/windows/winspeech.cpp index f5333169f1..97d23d2534 100644 --- a/src/calibre/utils/windows/winspeech.cpp +++ b/src/calibre/utils/windows/winspeech.cpp @@ -267,6 +267,19 @@ public: } } + json_val(TimedMetadataTrackErrorCode const ec) : type(DT_STRING) { + switch(ec) { + case TimedMetadataTrackErrorCode::DataFormatError: + s = "data_format_error"; break; + case TimedMetadataTrackErrorCode::NetworkError: + s = "network_error"; break; + case TimedMetadataTrackErrorCode::InternalError: + s = "internal_error"; break; + case TimedMetadataTrackErrorCode::None: + s = "none"; break; + } + } + json_val(DeviceInformationKind const dev) : type(DT_STRING) { switch(dev) { case DeviceInformationKind::Unknown: @@ -314,7 +327,6 @@ public: case MediaPlaybackState::Buffering: s = "buffering"; break; case MediaPlaybackState::Playing: s = "playing"; break; case MediaPlaybackState::Paused: s = "paused"; break; - default: s = "unknown"; break; } } @@ -326,7 +338,6 @@ public: case MediaPlayerError::NetworkError: s = "network_error"; break; case MediaPlayerError::DecodingError: s = "decoding_error"; break; case MediaPlayerError::SourceNotSupported: s = "source_not_supported"; break; - default: s = "unknown"; break; } } @@ -520,8 +531,9 @@ register_metadata_handler_for_track(MediaPlaybackTimedMetadataTrackList const &t })); speak_revoker.track_failed.emplace_back(track.TrackFailed(winrt::auto_revoke, [cmd_id](auto, const auto& args) { + auto error = args.Error(); if (main_loop_is_running.load()) output( - cmd_id, "track_failed", {}); + cmd_id, "track_failed", {{"code", error.ErrorCode()}, {"hr", json_val::from_hresult(error.ExtendedError())}}); })); }; @@ -580,7 +592,7 @@ handle_speak(id_type cmd_id, std::vector &parts) { }); speak_revoker.media_failed = media_player.MediaFailed(winrt::auto_revoke, [cmd_id](auto player, auto const& args) { if (main_loop_is_running.load()) output( - cmd_id, "media_state_changed", {{"state", "failed"}, {"error", args.ErrorMessage()}, {"code", args.Error()}}); + cmd_id, "media_state_changed", {{"state", "failed"}, {"error", args.ErrorMessage()}, {"hr", json_val::from_hresult(args.ExtendedErrorCode())}, {"code", args.Error()}}); }); auto playback_item = std::make_shared(source); @@ -736,8 +748,8 @@ static const std::unordered_map handlers = { } } auto x = media_player.AudioDevice(); - if (x) output(cmd_id, "audio_device", {{"value", x}, {"found", found}}); - else output(cmd_id, "audio_device", {{"value", ""}, {"found", found}}); + if (x) output(cmd_id, "audio_device", {{"device", x}, {"found", found}}); + else output(cmd_id, "audio_device", {{"device", ""}, {"found", found}}); }}, {"voice", [](id_type cmd_id, std::vector parts, int64_t*) { @@ -781,7 +793,7 @@ static const std::unordered_map handlers = { if (pitch < 0 || pitch > 2) throw std::out_of_range("Invalid pitch value must be between 0 and 2"); speech_synthesizer.Options().AudioPitch(pitch); } - output(cmd_id, "pitch", {{"pitch", speech_synthesizer.Options().AudioPitch()}}); + output(cmd_id, "pitch", {{"value", speech_synthesizer.Options().AudioPitch()}}); }}, {"save", [](id_type cmd_id, std::vector parts, int64_t*) { diff --git a/src/calibre/utils/windows/winspeech.py b/src/calibre/utils/windows/winspeech.py index d58fcb936e..9e2635a612 100644 --- a/src/calibre/utils/windows/winspeech.py +++ b/src/calibre/utils/windows/winspeech.py @@ -7,8 +7,11 @@ import os import struct import sys from contextlib import closing +from enum import Enum, auto +from itertools import count from queue import Queue from threading import Thread +from typing import NamedTuple, Tuple from calibre.utils.ipc.simple_worker import start_pipe_worker from calibre.utils.shm import SharedMemory @@ -21,14 +24,6 @@ SSML_SAMPLE = ''' ''' -def decode_msg(line: bytes) -> dict: - parts = line.strip().split(b' ', 2) - msg_id, msg_type, ans = int(parts[0]), parts[1].decode(), json.loads(parts[2]) - ans['related_to'] = msg_id - ans['payload_type'] = msg_type - return ans - - def start_worker(): return start_pipe_worker('from calibre_extensions.winspeech import run_main_loop; raise SystemExit(run_main_loop())') @@ -62,6 +57,257 @@ def encode_to_file_object(text, output) -> int: return sz +# message decoding {{{ +class Saving(NamedTuple): + related_to: int + ssml: bool + output_path: str + + +class Saved(NamedTuple): + related_to: int + size: int + + +class CueEntered(NamedTuple): + related_to: int + start_pos_in_text: int + end_pos_in_text: int + start_time: int + type: str + text: str + + +class CueExited(CueEntered): + related_to: int + start_pos_in_text: int + end_pos_in_text: int + start_time: int + type: str + + +class MarkReached(NamedTuple): + related_to: int + id: int + + +class Error(NamedTuple): + msg: str + error: str = '' + line: int = 0 + file: str = 'winspeech.py' + hr: str = '' + related_to: int = 0 + + +class Synthesizing(NamedTuple): + related_to: int + ssml: bool + num_marks: int + text_length: int + + +class TrackFailed(NamedTuple): + related_to: int + code: str + hr: str + + +class PlaybackState(Enum): + none = auto() + opening = auto() + buffering = auto() + playing = auto() + paused = auto() + + +class PlaybackStateChanged(NamedTuple): + related_to: int + state: PlaybackState + + +class MediaState(Enum): + opened = auto() + ended = auto() + failed = auto() + + +class MediaPlayerError(Enum): + unknown = auto() + aborted = auto() + network_error = auto() + decoding_error = auto() + source_not_supported = auto() + + +class MediaStateChanged(NamedTuple): + related_to: int + state: MediaState + error: str = "" + code: MediaPlayerError = MediaPlayerError.unknown + hr: str = "" + + +class Echo(NamedTuple): + related_to: int + msg: str + + +class Play(NamedTuple): + related_to: int + playback_state: PlaybackState + + +class Pause(NamedTuple): + related_to: int + playback_state: PlaybackState + + +class State(NamedTuple): + related_to: int + playback_state: PlaybackState + + +class VoiceInformation(NamedTuple): + display_name: str + description: str + id: str + language: str + gender: str + + +class DefaultVoice(NamedTuple): + related_to: int + voice: VoiceInformation + + +class Voice(NamedTuple): + related_to: int + voice: VoiceInformation + found: bool = True + + +class DeviceInformation(NamedTuple): + id: str + name: str + kind: str + is_default: bool + is_enabled: bool + + +class AudioDevice(NamedTuple): + related_to: int + device: DeviceInformation + found: bool = True + + +class AllVoices(NamedTuple): + related_to: int + voices: Tuple[VoiceInformation, ...] + + +class Volume(NamedTuple): + related_to: int + value: float + + +class Rate(NamedTuple): + related_to: int + value: float + + +class Pitch(NamedTuple): + related_to: int + value: float + + +def parse_message(line): + parts = line.strip().split(b' ', 2) + msg_id, msg_type, ans = int(parts[0]), parts[1].decode(), json.loads(parts[2]) + ans['related_to'] = msg_id + if msg_type == 'cue_entered': + return CueEntered(**ans) + if msg_type == 'cue_exited': + return CueExited(**ans) + if msg_type == 'mark_reached': + return MarkReached(**ans) + if msg_type == 'playback_state_changed': + ans['state'] = getattr(PlaybackState, ans['state']) + return PlaybackStateChanged(**ans) + if msg_type == 'media_state_changed': + ans['state'] = getattr(MediaState, ans['state']) + if 'code' in ans: + ans['code'] = MediaPlayerError(ans['code']) + return MediaStateChanged(**ans) + if msg_type == 'error': + return Error(**ans) + if msg_type == 'synthesizing': + return Synthesizing(**ans) + if msg_type == 'track_failed': + return TrackFailed(**ans) + if msg_type == 'saving': + return Saving(**ans) + if msg_type == 'saved': + return Saved(**ans) + if msg_type == 'echo': + return Echo(**ans) + if msg_type == 'play': + ans['playback_state'] = getattr(PlaybackState, ans['playback_state']) + return Play(**ans) + if msg_type == 'pause': + ans['playback_state'] = getattr(PlaybackState, ans['playback_state']) + return Pause(**ans) + if msg_type == 'state': + ans['playback_state'] = getattr(PlaybackState, ans['playback_state']) + return State(**ans) + if msg_type == 'default_voice': + ans['voice'] = VoiceInformation(**ans['voice']) + return DefaultVoice(**ans) + if msg_type == 'all_voices': + ans['voices'] = tuple(VoiceInformation(**x) for x in ans['voices']) + return AllVoices(**ans) + if msg_type == 'all_audio_devices': + ans['devices'] = tuple(DeviceInformation(**x) for x in ans['devices']) + return AudioDevice(**ans) + if msg_type == 'audio_device': + return AudioDevice(**ans) + if msg_type == 'voice': + ans['voice'] = VoiceInformation(**ans['voice']) + return Voice(**ans) + if msg_type == 'volume': + return Volume(**ans) + if msg_type == 'rate': + return Rate(**ans) + if msg_type == 'Pitch': + return Pitch(**ans) + return Error(f'Unknown message type: {msg_type}') +# }}} + + +class WinSpeech: + + def __init__(self): + self._worker = None + self.queue = Queue() + self.msg_id_counter = count() + next(self.msg_id_counter) + + @property + def worker(self): + if self._worker is None: + self._worker = start_worker() + Thread(name='WinspeechQueue', target=self._get_messages, args=(self._worker, self.queue), daemon=True).start() + return self._worker + + def _get_messages(self, worker, queue): + try: + for line in worker.stdout: + queue.put(line.decode('utf-8')) + except OSError as e: + line = ('0 error ' + json.dumps({"msg": "Failed to read from worker", "error": str(e), "file": "winspeech.py", "line": 0})) + queue.put(line) + + def develop_loop(*commands): p = start_worker() q = Queue() @@ -70,7 +316,7 @@ def develop_loop(*commands): for line in p.stdout: sys.stdout.buffer.write(b'\x1b[33m' + line + b'\x1b[39m]]'[:-2]) sys.stdout.buffer.flush() - q.put(decode_msg(line)) + q.put(parse_message(line)) def send(*a): cmd = ' '.join(map(str, a)) + '\n' @@ -89,13 +335,13 @@ def develop_loop(*commands): else: while True: m = q.get() - if m['related_to'] != command: + if m.related_to != command: continue - if m['payload_type'] == 'media_state_changed' and m['state'] == 'ended': + if isinstance(m, MediaStateChanged) and m.state in (MediaState.ended, MediaState.failed): break - if m['payload_type'] == 'saved': + if isinstance(m, Saved): break - if m['payload_type'] == 'error': + if isinstance(m, Error): exit_code = 1 break send(f'333 echo Synthesizer exiting with exit code: {exit_code}') @@ -137,6 +383,7 @@ def develop_save(text='Lucca Brazzi sleeps with the fishes.', filename="speech.w def develop_interactive(): import subprocess + from calibre.debug import run_calibre_debug print('\x1b[32mInteractive winspeech', '\x1b[39m]]'[:-2], flush=True) p = run_calibre_debug('-c', 'from calibre_extensions.winspeech import run_main_loop; raise SystemExit(run_main_loop())',