Refactor speech code to make it simpler and hopefully more robust

Also get rid of the catch macro
This commit is contained in:
Kovid Goyal 2023-01-29 14:58:54 +05:30
parent 356091be76
commit cf0cc595b6
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -393,21 +393,29 @@ output_error(id_type cmd_id, std::string_view const &msg, std::string_view const
output(cmd_id, "error", std::move(m)); output(cmd_id, "error", std::move(m));
} }
#define CATCH_ALL_EXCEPTIONS(msg, cmd_id) \ static bool
catch(winrt::hresult_error const& ex) { \ run_catching_exceptions(std::function<void(void)> f, std::string_view const &msg, int64_t line, id_type cmd_id=0) {
output_error(cmd_id, msg, winrt::to_string(ex.message()), __LINE__, ex.to_abi()); \ bool ok = false;
} catch(const std::system_error& ex) { \ try {
output_error(cmd_id, msg, "system_error with code: " + std::to_string(ex.code().value()) + " and meaning: " + ex.what(), __LINE__); \ f();
} catch (std::exception const &ex) { \ ok = true;
output_error(cmd_id, msg, ex.what(), __LINE__); \ } catch(winrt::hresult_error const& ex) {
} catch (std::string const &ex) { \ output_error(cmd_id, msg, winrt::to_string(ex.message()), line, ex.to_abi());
output_error(cmd_id, msg, ex, __LINE__); \ } catch(const std::system_error& ex) {
} catch (std::wstring const &ex) { \ output_error(cmd_id, msg, "system_error with code: " + std::to_string(ex.code().value()) + " and meaning: " + ex.what(), line);
output_error(cmd_id, msg, winrt::to_string(ex), __LINE__); \ } catch (std::exception const &ex) {
} catch (...) { \ output_error(cmd_id, msg, ex.what(), line);
output_error(cmd_id, msg, "Unknown exception type was raised", __LINE__); \ } catch (std::string const &ex) {
output_error(cmd_id, msg, ex, line);
} catch (std::wstring const &ex) {
output_error(cmd_id, msg, winrt::to_string(ex), line);
} catch (...) {
output_error(cmd_id, msg, "Unknown exception type was raised", line);
}
return ok;
} }
struct Revokers { struct Revokers {
MediaPlaybackSession::PlaybackStateChanged_revoker playback_state_changed; MediaPlaybackSession::PlaybackStateChanged_revoker playback_state_changed;
MediaPlayer::MediaEnded_revoker media_ended; MediaPlayer::MediaOpened_revoker media_opened; MediaPlayer::MediaEnded_revoker media_ended; MediaPlayer::MediaOpened_revoker media_opened;
@ -424,132 +432,14 @@ struct Mark {
Mark(uint32_t id, uint32_t pos) : id(id), pos_in_text(pos) {} Mark(uint32_t id, uint32_t pos) : id(id), pos_in_text(pos) {}
}; };
typedef std::vector<Mark> Marks; struct Marks {
std::vector<Mark> entries;
class Synthesizer {
private:
SpeechSynthesizer synth{nullptr};
MediaPlayer player{nullptr};
MediaSource current_source{nullptr};
SpeechSynthesisStream current_stream{nullptr};
MediaPlaybackItem current_item{nullptr};
std::vector<wchar_t> current_text_storage;
Marks current_marks;
int32_t last_reported_mark_index; int32_t last_reported_mark_index;
std::atomic<id_type> current_cmd_id; Marks() : entries(), last_reported_mark_index(-1) {}
Revokers revoker;
std::recursive_mutex recursive_lock;
public:
// Speak {{{
void register_metadata_handler_for_track(uint32_t index, id_type cmd_id);
void load_stream_for_playback(SpeechSynthesisStream const &&stream, id_type cmd_id, bool is_cued);
winrt::fire_and_forget speak(id_type cmd_id, std::wstring_view const &text, bool is_ssml, bool is_cued, std::vector<wchar_t> &&buf, Marks const && marks);
void register_metadata_handler_for_speech(id_type cmd_id, long index);
bool cmd_id_is_current(id_type cmd_id) const noexcept { return current_cmd_id.load() == cmd_id; }
void on_cue_entered(id_type cmd_id, const winrt::hstring &label, const SpeechCue &cue);
// }}}
winrt::fire_and_forget save(id_type cmd_id, std::wstring_view const &text, bool is_ssml, std::vector<wchar_t> &&buf, std::filesystem::path path);
void start_save_stream(SpeechSynthesisStream const &&stream, std::filesystem::path path, id_type cmd_id);
void initialize() {
synth = SpeechSynthesizer();
player = MediaPlayer();
player.AudioCategory(MediaPlayerAudioCategory::Speech);
player.AutoPlay(true);
}
void output(id_type cmd_id, std::string_view const& type, json_val const && x) {
std::scoped_lock sl(recursive_lock);
if (cmd_id_is_current(cmd_id)) ::output(cmd_id, type, std::move(x));
}
void stop_current_activity() {
std::scoped_lock sl(recursive_lock);
if (current_cmd_id.load()) {
current_cmd_id.store(0);
revoker = {};
current_source = MediaSource{nullptr};
current_stream = SpeechSynthesisStream{nullptr};
current_item = MediaPlaybackItem{nullptr};
player.Pause();
current_text_storage = std::vector<wchar_t>();
current_marks = Marks();
last_reported_mark_index = -1;
}
}
double volume() const {
return synth.Options().AudioVolume();
}
void volume(double val) {
if (val < 0 || val > 1) throw std::out_of_range("Invalid volume value must be between 0 and 1");
std::scoped_lock sl(recursive_lock);
synth.Options().AudioVolume(val);
}
double rate() const {
return synth.Options().SpeakingRate();
}
void rate(double val) {
if (val < 0.5 || val > 6.0) throw std::out_of_range("Invalid rate value must be between 0.5 and 6");
std::scoped_lock sl(recursive_lock);
synth.Options().SpeakingRate(val);
}
double pitch() const {
return synth.Options().AudioPitch();
}
void pitch(double val) {
if (val < 0 || val > 2) throw std::out_of_range("Invalid pitch value must be between 0 and 2");
std::scoped_lock sl(recursive_lock);
synth.Options().AudioPitch(val);
}
void pause() const {
player.Pause();
}
void play() const {
player.Play();
}
bool toggle() const {
switch (player.PlaybackSession().PlaybackState()) {
case MediaPlaybackState::Playing: pause(); return true;
case MediaPlaybackState::Paused: play(); return true;
default: return false;
}
}
MediaPlaybackState playback_state() const {
return player.PlaybackSession().PlaybackState();
}
DeviceInformation audio_device() const {
return player.AudioDevice();
}
void audio_device(DeviceInformation const &di) const {
player.AudioDevice(di);
}
VoiceInformation voice() const {
return synth.Voice();
}
void voice(VoiceInformation const &v) const {
return synth.Voice(v);
}
}; };
static Synthesizer sx; static SpeechSynthesizer speech_synthesizer{nullptr};
static MediaPlayer media_player{nullptr};
static size_t static size_t
decode_into(std::string_view src, std::wstring_view dest) { decode_into(std::string_view src, std::wstring_view dest) {
@ -575,7 +465,7 @@ parse_cued_text(std::string_view src, Marks &marks, std::wstring_view dest) {
src = src.substr(1, src.size() - 1); src = src.substr(1, src.size() - 1);
if (src.size() >= 4) { if (src.size() >= 4) {
uint32_t mark = *((uint32_t*)src.data()); uint32_t mark = *((uint32_t*)src.data());
marks.emplace_back(mark, (uint32_t)dest_pos); marks.entries.emplace_back(mark, (uint32_t)dest_pos);
src = src.substr(4, src.size() - 4); src = src.substr(4, src.size() - 4);
} }
} }
@ -605,124 +495,46 @@ read_from_shm(id_type cmd_id, const std::wstring_view size, const std::wstring &
// Speak {{{ // Speak {{{
void Synthesizer::on_cue_entered(id_type cmd_id, const winrt::hstring &label, const SpeechCue &cue) { static Revokers speak_revoker = {};
std::scoped_lock sl(recursive_lock);
if (!cmd_id_is_current(cmd_id)) return; static void
output(cmd_id, "cue_entered", json_val(label, cue)); register_metadata_handler_for_track(MediaPlaybackTimedMetadataTrackList const &tracks, uint32_t index, id_type cmd_id, std::shared_ptr<Marks> marks) {
TimedMetadataTrack track = tracks.GetAt(index);
tracks.SetPresentationMode((unsigned int)index, TimedMetadataTrackPresentationMode::ApplicationPresented);
speak_revoker.cue_entered.push_back(track.CueEntered(winrt::auto_revoke, [cmd_id, marks](auto track, const auto& args) {
if (main_loop_is_running.load()) {
auto label = track.Label();
auto cue = args.Cue().template as<SpeechCue>();
output(cmd_id, "cue_entered", {label, cue});
if (label != L"SpeechWord") return; if (label != L"SpeechWord") return;
uint32_t pos = cue.StartPositionInInput().Value(); uint32_t pos = cue.StartPositionInInput().Value();
for (int32_t i = std::max(0, last_reported_mark_index); i < (int32_t)current_marks.size(); i++) { for (int32_t i = std::max(0, marks->last_reported_mark_index); i < (int32_t)marks->entries.size(); i++) {
int32_t idx = -1; int32_t idx = -1;
if (current_marks[i].pos_in_text > pos) { if (marks->entries[i].pos_in_text > pos) {
idx = i-1; idx = i-1;
if (idx == last_reported_mark_index && current_marks[i].pos_in_text - pos < 3) idx = i; if (idx == marks->last_reported_mark_index && marks->entries[i].pos_in_text - pos < 3) idx = i;
} else if (current_marks[i].pos_in_text == pos) idx = i; } else if (marks->entries[i].pos_in_text == pos) idx = i;
if (idx > -1) { if (idx > -1) {
output(cmd_id, "mark_reached", {{"id", current_marks[idx].id}}); output(cmd_id, "mark_reached", {{"id", marks->entries[idx].id}});
last_reported_mark_index = idx; marks->last_reported_mark_index = idx;
break; break;
} }
} }
}
void Synthesizer::register_metadata_handler_for_speech(id_type cmd_id, long index) {
std::scoped_lock sl(recursive_lock);
if (!cmd_id_is_current(cmd_id)) return;
if (index < 0) {
for (uint32_t i = 0; i < current_item.TimedMetadataTracks().Size(); i++) {
register_metadata_handler_for_track(i, cmd_id);
} }
} else {
register_metadata_handler_for_track(index, cmd_id);
}
}
void
Synthesizer::register_metadata_handler_for_track(uint32_t index, id_type cmd_id) {
TimedMetadataTrack track = current_item.TimedMetadataTracks().GetAt(index);
std::scoped_lock sl(recursive_lock);
if (current_cmd_id.load() != cmd_id) return;
revoker.cue_entered.push_back(track.CueEntered(winrt::auto_revoke, [cmd_id](auto track, const auto& args) {
if (main_loop_is_running.load()) sx.on_cue_entered(cmd_id, track.Label(), args.Cue().template as<SpeechCue>());
})); }));
revoker.cue_exited.push_back(track.CueExited(winrt::auto_revoke, [cmd_id](auto track, const auto& args) {
if (main_loop_is_running.load()) sx.output( speak_revoker.cue_exited.push_back(track.CueExited(winrt::auto_revoke, [cmd_id](auto track, const auto& args) {
if (main_loop_is_running.load()) output(
cmd_id, "cue_exited", json_val(track.Label(), args.Cue().template as<SpeechCue>())); cmd_id, "cue_exited", json_val(track.Label(), args.Cue().template as<SpeechCue>()));
})); }));
revoker.track_failed.push_back(track.TrackFailed(winrt::auto_revoke, [cmd_id](auto, const auto& args) {
if (main_loop_is_running.load()) sx.output( speak_revoker.track_failed.push_back(track.TrackFailed(winrt::auto_revoke, [cmd_id](auto, const auto& args) {
if (main_loop_is_running.load()) output(
cmd_id, "track_failed", {}); cmd_id, "track_failed", {});
})); }));
current_item.TimedMetadataTracks().SetPresentationMode((unsigned int)index, TimedMetadataTrackPresentationMode::ApplicationPresented); };
}
void
Synthesizer::load_stream_for_playback(SpeechSynthesisStream const &&stream, id_type cmd_id, bool is_cued) {
std::scoped_lock sl(recursive_lock);
if (cmd_id != current_cmd_id.load()) return;
current_stream = stream;
current_source = MediaSource::CreateFromStream(current_stream, current_stream.ContentType());
revoker.playback_state_changed = player.PlaybackSession().PlaybackStateChanged(
winrt::auto_revoke, [cmd_id](auto session, auto const&) {
if (main_loop_is_running.load()) sx.output(
cmd_id, "playback_state_changed", {{"state", session.PlaybackState()}});
});
revoker.media_opened = player.MediaOpened(winrt::auto_revoke, [cmd_id](auto player, auto const&) {
if (main_loop_is_running.load()) sx.output(
cmd_id, "media_state_changed", {{"state", "opened"}});
});
revoker.media_ended = player.MediaEnded(winrt::auto_revoke, [cmd_id](auto player, auto const&) {
if (main_loop_is_running.load()) sx.output(
cmd_id, "media_state_changed", {{"state", "ended"}});
});
revoker.media_failed = player.MediaFailed(winrt::auto_revoke, [cmd_id](auto player, auto const& args) {
if (main_loop_is_running.load()) sx.output(
cmd_id, "media_state_changed", {{"state", "failed"}, {"error", args.ErrorMessage()}, {"code", args.Error()}});
});
current_item = MediaPlaybackItem(current_source);
revoker.timed_metadata_tracks_changed = current_item.TimedMetadataTracksChanged(winrt::auto_revoke,
[cmd_id](auto, auto const &args) {
auto change_type = args.CollectionChange();
long index;
switch (change_type) {
case CollectionChange::ItemInserted: index = args.Index(); break;
case CollectionChange::Reset: index = -1; break;
default: index = -2; break;
}
if (index > -2 && main_loop_is_running.load()) sx.register_metadata_handler_for_speech(cmd_id, index);
});
register_metadata_handler_for_speech(cmd_id, -1);
player.Source(current_item);
}
winrt::fire_and_forget Synthesizer::speak(id_type cmd_id, std::wstring_view const &text, bool is_ssml, bool is_cued, std::vector<wchar_t> &&buf, Marks const && marks) {
SpeechSynthesisStream stream{nullptr};
{ std::scoped_lock sl(recursive_lock);
stop_current_activity();
current_cmd_id.store(cmd_id);
current_text_storage = std::move(buf);
current_marks = std::move(marks);
synth.Options().IncludeSentenceBoundaryMetadata(true);
synth.Options().IncludeWordBoundaryMetadata(true);
}
output(cmd_id, "synthesizing", {{"ssml", is_ssml}, {"num_marks", current_marks.size()}, {"text_length", text.size()}});
bool ok = false;
try {
if (is_ssml) stream = co_await synth.SynthesizeSsmlToStreamAsync(text);
else stream = co_await synth.SynthesizeTextToStreamAsync(text);
ok = true;
} CATCH_ALL_EXCEPTIONS("Failed to synthesize speech", cmd_id);
if (ok) {
if (main_loop_is_running.load()) {
try {
load_stream_for_playback(std::move(stream), cmd_id, is_cued);
} CATCH_ALL_EXCEPTIONS("Failed to load synthesized stream for playback", cmd_id);
}
}
}
static void static void
handle_speak(id_type cmd_id, std::vector<std::wstring_view> &parts) { handle_speak(id_type cmd_id, std::vector<std::wstring_view> &parts) {
@ -736,11 +548,11 @@ handle_speak(id_type cmd_id, std::vector<std::wstring_view> &parts) {
} }
parts.erase(parts.begin(), parts.begin() + 2); parts.erase(parts.begin(), parts.begin() + 2);
std::wstring address; std::wstring address;
Marks marks; auto marks = std::make_shared<Marks>();
std::vector<wchar_t> buf; std::vector<wchar_t> buf;
std::wstring_view text; std::wstring_view text;
if (is_shm) { if (is_shm) {
text = read_from_shm(cmd_id, parts.at(0), std::wstring(parts.at(1)), buf, marks, is_cued); text = read_from_shm(cmd_id, parts.at(0), std::wstring(parts.at(1)), buf, *marks, is_cued);
if (text.size() == 0) return; if (text.size() == 0) return;
} else { } else {
address = join(parts); address = join(parts);
@ -750,12 +562,62 @@ handle_speak(id_type cmd_id, std::vector<std::wstring_view> &parts) {
address.copy(buf.data(), address.size()); address.copy(buf.data(), address.size());
} }
*((wchar_t*)text.data() + text.size()) = 0; // ensure NULL termination *((wchar_t*)text.data() + text.size()) = 0; // ensure NULL termination
sx.speak(cmd_id, text, is_ssml, is_cued, std::move(buf), std::move(marks));
output(cmd_id, "synthesizing", {{"ssml", is_ssml}, {"num_marks", marks->entries.size()}, {"text_length", text.size()}});
bool ok = false;
SpeechSynthesisStream stream{nullptr};
if (!run_catching_exceptions([&]() {
speech_synthesizer.Options().IncludeSentenceBoundaryMetadata(true);
speech_synthesizer.Options().IncludeWordBoundaryMetadata(true);
if (is_ssml) stream = speech_synthesizer.SynthesizeSsmlToStreamAsync(text).get();
else stream = speech_synthesizer.SynthesizeTextToStreamAsync(text).get();
ok = true;
}, "Failed to synthesize speech", __LINE__, cmd_id)) return;
speak_revoker = {}; // delete any revokers previously installed
MediaSource source(MediaSource::CreateFromStream(stream, stream.ContentType()));
speak_revoker.playback_state_changed = media_player.PlaybackSession().PlaybackStateChanged(
winrt::auto_revoke, [cmd_id](auto session, auto const&) {
if (main_loop_is_running.load()) output(
cmd_id, "playback_state_changed", {{"state", session.PlaybackState()}});
});
speak_revoker.media_opened = media_player.MediaOpened(winrt::auto_revoke, [cmd_id](auto player, auto const&) {
if (main_loop_is_running.load()) output(
cmd_id, "media_state_changed", {{"state", "opened"}});
});
speak_revoker.media_ended = media_player.MediaEnded(winrt::auto_revoke, [cmd_id](auto player, auto const&) {
if (main_loop_is_running.load()) output(
cmd_id, "media_state_changed", {{"state", "ended"}});
});
speak_revoker.media_failed = media_player.MediaFailed(winrt::auto_revoke, [cmd_id](auto player, auto const& args) {
if (main_loop_is_running.load()) output(
cmd_id, "media_state_changed", {{"state", "failed"}, {"error", args.ErrorMessage()}, {"code", args.Error()}});
});
auto playback_item = std::make_shared<MediaPlaybackItem>(source);
speak_revoker.timed_metadata_tracks_changed = playback_item->TimedMetadataTracksChanged(winrt::auto_revoke,
[cmd_id, playback_item_weak_ref = std::weak_ptr(playback_item), marks](auto, auto const &args) {
auto change_type = args.CollectionChange();
long index;
switch (change_type) {
case CollectionChange::ItemInserted: index = args.Index(); break;
case CollectionChange::Reset: index = -1; break;
default: index = -2; break;
}
auto pi{ playback_item_weak_ref.lock() };
if (index > -2 && pi && main_loop_is_running.load()) register_metadata_handler_for_track(pi->TimedMetadataTracks(), index, cmd_id, marks);
});
for (uint32_t i = 0; i < playback_item->TimedMetadataTracks().Size(); i++) {
register_metadata_handler_for_track(playback_item->TimedMetadataTracks(), i, cmd_id, marks);
}
media_player.Source(*playback_item);
} }
// }}} // }}}
// Save {{{ // Save {{{
static winrt::fire_and_forget static void
save_stream(SpeechSynthesisStream const &&stream, std::filesystem::path path, id_type cmd_id) { save_stream(SpeechSynthesisStream const &&stream, std::filesystem::path path, id_type cmd_id) {
unsigned long long stream_size = stream.Size(), bytes_read = 0; unsigned long long stream_size = stream.Size(), bytes_read = 0;
DataReader reader(stream); DataReader reader(stream);
@ -763,66 +625,26 @@ save_stream(SpeechSynthesisStream const &&stream, std::filesystem::path path, id
const static unsigned int chunk_size = 16 * 1024; const static unsigned int chunk_size = 16 * 1024;
std::array<uint8_t, chunk_size> buf; std::array<uint8_t, chunk_size> buf;
std::ofstream outfile; std::ofstream outfile;
bool ok = false; if (!run_catching_exceptions([&](){
try {
outfile.open(path.string(), std::ios::out | std::ios::trunc); outfile.open(path.string(), std::ios::out | std::ios::trunc);
ok = true; }, "Failed to create file: " + path.string(), __LINE__, cmd_id)) return;
} CATCH_ALL_EXCEPTIONS("Failed to create file: " + path.string(), cmd_id);
if (!ok) co_return;
while (bytes_read < stream_size) { while (bytes_read < stream_size) {
try { if (!run_catching_exceptions([&]() {
n = co_await reader.LoadAsync(chunk_size); n = reader.LoadAsync(chunk_size).get();
ok = true; }, "Failed to load data from DataReader", __LINE__, cmd_id)) return;
} CATCH_ALL_EXCEPTIONS("Failed to load data from DataReader", cmd_id);
if (!ok) co_return;
if (n > 0) { if (n > 0) {
bytes_read += n; bytes_read += n;
ok = false; if (!run_catching_exceptions([&]() {
try {
reader.ReadBytes(winrt::array_view(buf.data(), buf.data() + n)); reader.ReadBytes(winrt::array_view(buf.data(), buf.data() + n));
outfile.write((const char*)buf.data(), n); outfile.write((const char*)buf.data(), n);
if (!outfile.good()) throw "Failed to write to output file"; if (!outfile.good()) throw "Failed to write to output file";
ok = true; }, "Failed to save bytes from DataReader to file", __LINE__, cmd_id)) return;
} CATCH_ALL_EXCEPTIONS("Failed to save bytes from DataReader to file", cmd_id);
if (!ok) co_return;
} }
} }
output(cmd_id, "saved", {{"size", bytes_read}}); output(cmd_id, "saved", {{"size", bytes_read}});
} }
void
Synthesizer::start_save_stream(SpeechSynthesisStream const &&stream, std::filesystem::path path, id_type cmd_id) {
std::scoped_lock sl(recursive_lock);
try {
save_stream(std::move(stream), path, cmd_id);
} CATCH_ALL_EXCEPTIONS("Failed to save loaded stream", cmd_id);
stop_current_activity();
}
winrt::fire_and_forget Synthesizer::save(id_type cmd_id, std::wstring_view const &text, bool is_ssml, std::vector<wchar_t> &&buf, std::filesystem::path path) {
SpeechSynthesisStream stream{nullptr};
{ std::scoped_lock sl(recursive_lock);
stop_current_activity();
current_cmd_id.store(cmd_id);
current_text_storage = std::move(buf);
synth.Options().IncludeSentenceBoundaryMetadata(false);
synth.Options().IncludeWordBoundaryMetadata(false);
}
bool ok = false;
try {
if (is_ssml) stream = co_await synth.SynthesizeSsmlToStreamAsync(text);
else stream = co_await synth.SynthesizeTextToStreamAsync(text);
ok = true;
} CATCH_ALL_EXCEPTIONS("Failed to synthesize speech", cmd_id);
if (ok) {
if (main_loop_is_running.load()) {
try {
sx.start_save_stream(std::move(stream), path, cmd_id);
} CATCH_ALL_EXCEPTIONS("Failed to load synthesized stream for save", cmd_id);
}
}
}
static void static void
handle_save(id_type cmd_id, std::vector<std::wstring_view> &parts) { handle_save(id_type cmd_id, std::vector<std::wstring_view> &parts) {
bool is_ssml; bool is_ssml;
@ -841,7 +663,14 @@ handle_save(id_type cmd_id, std::vector<std::wstring_view> &parts) {
auto filename = join(parts); auto filename = join(parts);
auto path = std::filesystem::absolute(filename); auto path = std::filesystem::absolute(filename);
output(cmd_id, "saving", {{"ssml", is_ssml}, {"output_path", path.string()}}); output(cmd_id, "saving", {{"ssml", is_ssml}, {"output_path", path.string()}});
sx.save(cmd_id, text, is_ssml, std::move(buf), path); SpeechSynthesisStream stream{nullptr};
speech_synthesizer.Options().IncludeSentenceBoundaryMetadata(false);
speech_synthesizer.Options().IncludeWordBoundaryMetadata(false);
if (!run_catching_exceptions([&]() {
if (is_ssml) stream = speech_synthesizer.SynthesizeSsmlToStreamAsync(text).get();
else stream = speech_synthesizer.SynthesizeTextToStreamAsync(text).get();
}, "Failed to synthesize speech", __LINE__, cmd_id)) return;
save_stream(std::move(stream), path, cmd_id);
} }
// }}} // }}}
@ -862,18 +691,17 @@ static const std::unordered_map<std::string, handler_function> handlers = {
}}, }},
{"play", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) { {"play", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
sx.play(); media_player.Play();
output(cmd_id, "play", {{"playback_state", sx.playback_state()}}); output(cmd_id, "play", {{"playback_state", media_player.PlaybackSession().PlaybackState()}});
}}, }},
{"pause", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) { {"pause", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
sx.play(); media_player.Pause();
output(cmd_id, "pause", {{"playback_state", sx.playback_state()}}); output(cmd_id, "pause", {{"playback_state", media_player.PlaybackSession().PlaybackState()}});
}}, }},
{"state", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) { {"state", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
sx.play(); output(cmd_id, "state", {{"playback_state", media_player.PlaybackSession().PlaybackState()}});
output(cmd_id, "state", {{"playback_state", sx.playback_state()}});
}}, }},
{"default_voice", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) { {"default_voice", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
@ -895,25 +723,28 @@ static const std::unordered_map<std::string, handler_function> handlers = {
{"volume", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) { {"volume", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
if (parts.size()) { if (parts.size()) {
auto vol = parse_double(parts[0].data()); auto vol = parse_double(parts[0].data());
sx.volume(vol); if (vol < 0 || vol > 1) throw std::out_of_range("Invalid volume value must be between 0 and 1");
speech_synthesizer.Options().AudioVolume(vol);
} }
output(cmd_id, "volume", {{"value", sx.volume()}}); output(cmd_id, "volume", {{"value", speech_synthesizer.Options().AudioVolume()}});
}}, }},
{"rate", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) { {"rate", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
if (parts.size()) { if (parts.size()) {
auto rate = parse_double(parts[0].data()); auto rate = parse_double(parts[0].data());
sx.rate(rate); if (rate < 0.5 || rate > 6.0) throw std::out_of_range("Invalid rate value must be between 0.5 and 6");
speech_synthesizer.Options().SpeakingRate(rate);
} }
output(cmd_id, "rate", {{"value", sx.rate()}}); output(cmd_id, "rate", {{"value", speech_synthesizer.Options().SpeakingRate()}});
}}, }},
{"pitch", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) { {"pitch", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
if (parts.size()) { if (parts.size()) {
auto rate = parse_double(parts[0].data()); auto pitch = parse_double(parts[0].data());
sx.rate(rate); if (pitch < 0 || pitch > 2) throw std::out_of_range("Invalid pitch value must be between 0 and 2");
speech_synthesizer.Options().AudioPitch(pitch);
} }
output(cmd_id, "pitch", {{"pitch", sx.rate()}}); output(cmd_id, "pitch", {{"pitch", speech_synthesizer.Options().AudioPitch()}});
}}, }},
{"save", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) { {"save", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
@ -932,7 +763,7 @@ handle_stdin_message(winrt::hstring const &&msg) {
bool ok = false; bool ok = false;
std::vector<std::wstring_view> parts; std::vector<std::wstring_view> parts;
int64_t exit_code = -1; int64_t exit_code = -1;
try { if (!run_catching_exceptions([&]() {
parts = split(msg); parts = split(msg);
command = parts.at(1); cmd_id = parse_id(parts.at(0)); command = parts.at(1); cmd_id = parse_id(parts.at(0));
if (cmd_id == 0) { if (cmd_id == 0) {
@ -940,8 +771,7 @@ handle_stdin_message(winrt::hstring const &&msg) {
} }
parts.erase(parts.begin(), parts.begin() + 2); parts.erase(parts.begin(), parts.begin() + 2);
ok = true; ok = true;
} CATCH_ALL_EXCEPTIONS((std::string("Invalid input message: ") + winrt::to_string(msg)), 0); }, "Invalid input message: " + winrt::to_string(msg), __LINE__)) return exit_code;
if (ok) {
handler_function handler; handler_function handler;
std::string cmd(winrt::to_string(command)); std::string cmd(winrt::to_string(command));
try { try {
@ -950,46 +780,51 @@ handle_stdin_message(winrt::hstring const &&msg) {
output_error(cmd_id, "Unknown command", cmd, __LINE__); output_error(cmd_id, "Unknown command", cmd, __LINE__);
return exit_code; return exit_code;
} }
try { run_catching_exceptions([&]() {
handler(cmd_id, parts, &exit_code); handler(cmd_id, parts, &exit_code);
} CATCH_ALL_EXCEPTIONS("Error handling input message", cmd_id); }, "Error handling input message", __LINE__, cmd_id);
}
return exit_code; return exit_code;
} }
static PyObject* static PyObject*
run_main_loop(PyObject*, PyObject*) { run_main_loop(PyObject*, PyObject*) {
try { if (!run_catching_exceptions([]() {
std::cout.imbue(std::locale("C")); std::cout.imbue(std::locale("C"));
std::cin.imbue(std::locale("C")); std::cin.imbue(std::locale("C"));
std::cerr.imbue(std::locale("C")); std::cerr.imbue(std::locale("C"));
std::wcin.imbue(std::locale("C")); std::wcin.imbue(std::locale("C"));
std::wcout.imbue(std::locale("C")); std::wcout.imbue(std::locale("C"));
std::wcerr.imbue(std::locale("C")); std::wcerr.imbue(std::locale("C"));
} CATCH_ALL_EXCEPTIONS("Failed to set stdio locales to C", 0); }, "Failed to set stdio locales to C", __LINE__)) {
winrt::init_apartment(winrt::apartment_type::multi_threaded); return PyLong_FromLongLong(1);
main_thread_id = GetCurrentThreadId(); }
MSG msg;
int64_t exit_code = 0;
bool ok = false;
try {
new (&sx) Synthesizer();
sx.initialize();
ok = true;
} CATCH_ALL_EXCEPTIONS("Error initializing Synthesizer", 0);
if (!ok) return PyLong_FromUnsignedLongLong(1);
Py_BEGIN_ALLOW_THREADS; if (!run_catching_exceptions([]() {
main_loop_is_running.store(true); winrt::init_apartment(winrt::apartment_type::multi_threaded);
PeekMessage(&msg, NULL, WM_USER, WM_USER, PM_NOREMOVE); // ensure we have a message queue }, "Failed to initialize COM", __LINE__)) {
return PyLong_FromLongLong(1);
}
main_thread_id = GetCurrentThreadId();
if (!run_catching_exceptions([]() {
speech_synthesizer = SpeechSynthesizer();
media_player = MediaPlayer();
media_player.AudioCategory(MediaPlayerAudioCategory::Speech);
media_player.AutoPlay(true);
}, "Failed to initialize SpeechSynthesizer and MediaPlayer", __LINE__)) {
return PyLong_FromLongLong(1);
}
if (_isatty(_fileno(stdin))) { if (_isatty(_fileno(stdin))) {
std::cout << "Welcome to winspeech. Type exit to quit." << std::endl; std::cout << "Welcome to winspeech. Type exit to quit." << std::endl;
} }
int64_t exit_code = -1;
main_loop_is_running.store(true);
Py_BEGIN_ALLOW_THREADS;
std::string input_buffer; std::string input_buffer;
while (true) { while (exit_code < 0) {
try { try {
if (!std::getline(std::cin, input_buffer)) { if (!std::getline(std::cin, input_buffer)) {
if (!std::cin.eof()) exit_code = 1; if (!std::cin.eof()) exit_code = 1;
@ -997,7 +832,10 @@ run_main_loop(PyObject*, PyObject*) {
} }
rtrim(input_buffer); rtrim(input_buffer);
if (input_buffer.size() > 0) { if (input_buffer.size() > 0) {
if ((exit_code = handle_stdin_message(std::move(winrt::to_hstring(input_buffer)))) >= 0) break; run_catching_exceptions([&]() {
exit_code = handle_stdin_message(std::move(winrt::to_hstring(input_buffer)));
}, "Error handling STDIN message", __LINE__);
if (exit_code >= 0) break;
} }
} catch(...) { } catch(...) {
exit_code = 1; exit_code = 1;
@ -1005,14 +843,13 @@ run_main_loop(PyObject*, PyObject*) {
break; break;
} }
} }
main_loop_is_running.store(false);
Py_END_ALLOW_THREADS; Py_END_ALLOW_THREADS;
main_loop_is_running.store(false);
try { try {
sx.stop_current_activity(); speech_synthesizer = SpeechSynthesizer{nullptr};
(&sx)->~Synthesizer(); media_player = MediaPlayer{nullptr};
} CATCH_ALL_EXCEPTIONS("Error stopping all activity", 0); } catch(...) {}
return PyLong_FromLongLong(exit_code); return PyLong_FromLongLong(exit_code);
} }