mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 18:24:30 -04:00
More work on winspeech
This commit is contained in:
parent
e8e95f888d
commit
cd7d100eca
@ -11,6 +11,7 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
#include <iostream>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <winrt/base.h>
|
#include <winrt/base.h>
|
||||||
#include <winrt/Windows.Foundation.Collections.h>
|
#include <winrt/Windows.Foundation.Collections.h>
|
||||||
@ -35,7 +36,15 @@ runtime_error_as_python_error(PyObject *exc_type, winrt::hresult_error const &ex
|
|||||||
else PyErr_Format(exc_type, "%s:%d:%s:[hr=0x%x] %V", file, line, prefix, hr, msg.ptr(), "Out of memory");
|
else PyErr_Format(exc_type, "%s:%d:%s:[hr=0x%x] %V", file, line, prefix, hr, msg.ptr(), "Out of memory");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
#define set_python_error_from_runtime(ex, ...) runtime_error_as_python_error(PyExc_OSError, ex, __FILE__, __LINE__, __VA_ARGS__)
|
|
||||||
|
#define CATCH_ALL_EXCEPTIONS(msg) catch(winrt::hresult_error const& ex) { \
|
||||||
|
runtime_error_as_python_error(PyExc_OSError, ex, __FILE__, __LINE__, msg); \
|
||||||
|
} catch (std::exception const &ex) { \
|
||||||
|
PyErr_Format(PyExc_OSError, "%s:%d:%s: %s", __FILE__, __LINE__, msg, ex.what()); \
|
||||||
|
} catch (...) { \
|
||||||
|
PyErr_Format(PyExc_OSError, "%s:%d:%s: Unknown exception type was raised", __FILE__, __LINE__, msg); \
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
class WeakRefs {
|
class WeakRefs {
|
||||||
@ -44,84 +53,144 @@ class WeakRefs {
|
|||||||
std::unordered_map<id_type, T*> refs;
|
std::unordered_map<id_type, T*> refs;
|
||||||
id_type counter;
|
id_type counter;
|
||||||
public:
|
public:
|
||||||
void register_ref(T *self) {
|
id_type register_ref(T *self) {
|
||||||
std::scoped_lock lock(weak_ref_lock);
|
std::scoped_lock lock(weak_ref_lock);
|
||||||
self->id = ++counter;
|
auto id = ++counter;
|
||||||
refs[self->id] = self;
|
refs[id] = self;
|
||||||
|
return id;
|
||||||
}
|
}
|
||||||
void unregister_ref(T *self, std::function<void(T*)> dealloc) {
|
void unregister_ref(T* self) {
|
||||||
std::scoped_lock lock(weak_ref_lock);
|
std::scoped_lock lock(weak_ref_lock);
|
||||||
dealloc(self);
|
auto id = self->clear_id();
|
||||||
refs.erase(self->id);
|
refs.erase(id);
|
||||||
self->id = 0;
|
self->~T();
|
||||||
}
|
}
|
||||||
void use_ref(id_type id, DWORD creation_thread_id, std::function<void(T*)> callback) {
|
void use_ref(id_type id, std::function<void(T*)> callback) {
|
||||||
if (GetCurrentThreadId() == creation_thread_id) {
|
std::scoped_lock lock(weak_ref_lock);
|
||||||
try {
|
try {
|
||||||
callback(at(id));
|
callback(refs.at(id));
|
||||||
} catch (std::out_of_range) {
|
} catch (std::out_of_range) {
|
||||||
callback(NULL);
|
callback(NULL);
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
std::scoped_lock lock(weak_ref_lock);
|
|
||||||
try {
|
|
||||||
callback(at(id));
|
|
||||||
} catch (std::out_of_range) {
|
|
||||||
callback(NULL);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Synthesizer {
|
enum class EventType {
|
||||||
PyObject_HEAD
|
playback_state_changed = 1, media_opened, media_failed, media_ended
|
||||||
|
};
|
||||||
|
|
||||||
|
class Event {
|
||||||
|
private:
|
||||||
|
EventType type;
|
||||||
|
public:
|
||||||
|
Event(EventType type) : type(type) {}
|
||||||
|
Event(const Event &source) : type(source.type) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
class SynthesizerImplementation {
|
||||||
|
private:
|
||||||
id_type id;
|
id_type id;
|
||||||
DWORD creation_thread_id;
|
DWORD creation_thread_id;
|
||||||
SpeechSynthesizer synth{nullptr};
|
SpeechSynthesizer synth{nullptr};
|
||||||
MediaPlayer player{nullptr};
|
MediaPlayer player{nullptr};
|
||||||
|
|
||||||
|
struct {
|
||||||
|
MediaPlaybackSession::PlaybackStateChanged_revoker playback_state_changed;
|
||||||
|
MediaPlayer::MediaEnded_revoker media_ended;
|
||||||
|
MediaPlayer::MediaOpened_revoker media_opened;
|
||||||
|
MediaPlayer::MediaFailed_revoker media_failed;
|
||||||
|
} revoker;
|
||||||
|
|
||||||
|
std::vector<Event> events;
|
||||||
|
std::mutex events_lock;
|
||||||
|
public:
|
||||||
|
SynthesizerImplementation();
|
||||||
|
void add_simple_event(EventType type) {
|
||||||
|
try {
|
||||||
|
std::scoped_lock lock(events_lock);
|
||||||
|
events.emplace_back(type);
|
||||||
|
} catch(...) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
SpeechSynthesisStream synthesize(const std::wstring_view &text, bool is_ssml = false) {
|
||||||
|
if (is_ssml) return synth.SynthesizeSsmlToStreamAsync(text).get();
|
||||||
|
return synth.SynthesizeTextToStreamAsync(text).get();
|
||||||
|
}
|
||||||
|
|
||||||
|
void speak(const std::wstring_view &text, bool is_ssml = false) {
|
||||||
|
SpeechSynthesisStream stream = synthesize(text, is_ssml);
|
||||||
|
MediaSource source = winrt::Windows::Media::Core::MediaSource::CreateFromStream(stream, stream.ContentType());
|
||||||
|
player.Source(source);
|
||||||
|
player.Play();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_creation_thread() const noexcept {
|
||||||
|
return creation_thread_id == GetCurrentThreadId();
|
||||||
|
}
|
||||||
|
|
||||||
|
id_type clear_id() noexcept {
|
||||||
|
auto ans = id;
|
||||||
|
id = 0;
|
||||||
|
return ans;
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Synthesizer {
|
||||||
|
PyObject_HEAD
|
||||||
|
SynthesizerImplementation impl;
|
||||||
};
|
};
|
||||||
|
|
||||||
static PyTypeObject SynthesizerType = {
|
static PyTypeObject SynthesizerType = {
|
||||||
PyVarObject_HEAD_INIT(NULL, 0)
|
PyVarObject_HEAD_INIT(NULL, 0)
|
||||||
};
|
};
|
||||||
|
|
||||||
static WeakRefs<Synthesizer> synthesizer_weakrefs;
|
static WeakRefs<SynthesizerImplementation> synthesizer_weakrefs;
|
||||||
|
|
||||||
|
SynthesizerImplementation::SynthesizerImplementation() {
|
||||||
|
events.reserve(128);
|
||||||
|
synth = SpeechSynthesizer();
|
||||||
|
player = MediaPlayer();
|
||||||
|
player.AudioCategory(MediaPlayerAudioCategory::Speech);
|
||||||
|
creation_thread_id = GetCurrentThreadId();
|
||||||
|
id = synthesizer_weakrefs.register_ref(this);
|
||||||
|
id_type self_id = id;
|
||||||
|
#define simple_event_listener(method, event_type) \
|
||||||
|
revoker.event_type = method(winrt::auto_revoke, [self_id](auto, const auto &args) { \
|
||||||
|
fprintf(stderr, "111111111 %s\n", #event_type); fflush(stderr); \
|
||||||
|
synthesizer_weakrefs.use_ref(self_id, [](auto s) { \
|
||||||
|
if (s) s->add_simple_event(EventType::event_type); \
|
||||||
|
}); \
|
||||||
|
});
|
||||||
|
simple_event_listener(player.PlaybackSession().PlaybackStateChanged, playback_state_changed);
|
||||||
|
simple_event_listener(player.MediaOpened, media_opened);
|
||||||
|
simple_event_listener(player.MediaEnded, media_ended);
|
||||||
|
#undef simple_event_listener
|
||||||
|
}
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
Synthesizer_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { INITIALIZE_COM_IN_FUNCTION
|
Synthesizer_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { INITIALIZE_COM_IN_FUNCTION
|
||||||
Synthesizer *self = (Synthesizer *) type->tp_alloc(type, 0);
|
Synthesizer *self = (Synthesizer *) type->tp_alloc(type, 0);
|
||||||
if (self) {
|
if (self) {
|
||||||
|
auto i = &self->impl;
|
||||||
try {
|
try {
|
||||||
self->synth = SpeechSynthesizer();
|
new (i) SynthesizerImplementation();
|
||||||
self->player = MediaPlayer();
|
} CATCH_ALL_EXCEPTIONS("Failed to create SynthesizerImplementation object");
|
||||||
self->player.AudioCategory(MediaPlayerAudioCategory::Speech);
|
if (PyErr_Occurred()) { Py_CLEAR(self); }
|
||||||
} catch(winrt::hresult_error const& ex) {
|
|
||||||
set_python_error_from_runtime(ex, "Failed to get SpeechSynthesisStream from text");
|
|
||||||
Py_CLEAR(self);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (PyErr_Occurred()) { Py_CLEAR(self); }
|
|
||||||
if (self) {
|
|
||||||
self->creation_thread_id = GetCurrentThreadId();
|
|
||||||
synthesizer_weakrefs.register_ref(self);
|
|
||||||
com.detach();
|
|
||||||
}
|
}
|
||||||
|
if (self) com.detach();
|
||||||
return (PyObject*)self;
|
return (PyObject*)self;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
Synthesizer_dealloc(Synthesizer *self_) {
|
Synthesizer_dealloc(Synthesizer *self) {
|
||||||
synthesizer_weakrefs.unregister_ref(self_, [](Synthesizer *self) {
|
auto *i = &self->impl;
|
||||||
try {
|
try {
|
||||||
self->~Synthesizer();
|
synthesizer_weakrefs.unregister_ref(i);
|
||||||
} catch (...) {
|
} CATCH_ALL_EXCEPTIONS("Failed to destruct SynthesizerImplementation");
|
||||||
fprintf(stderr, "Unhandled exception during Synthesizer object destruction, ignored.\n");
|
if (PyErr_Occurred()) { PyErr_Print(); }
|
||||||
}
|
Py_TYPE(self)->tp_free((PyObject*)self);
|
||||||
Py_TYPE(self)->tp_free((PyObject*)self);
|
CoUninitialize();
|
||||||
CoUninitialize();
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -130,8 +199,7 @@ ensure_current_thread_has_message_queue(void) {
|
|||||||
PeekMessage(&msg, NULL, WM_USER, WM_USER, PM_NOREMOVE);
|
PeekMessage(&msg, NULL, WM_USER, WM_USER, PM_NOREMOVE);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define PREPARE_METHOD_CALL ensure_current_thread_has_message_queue(); if (GetCurrentThreadId() != self->creation_thread_id) { PyErr_SetString(PyExc_RuntimeError, "Cannot use a Synthesizer object from a thread other than the thread it was created in"); return NULL; }
|
#define PREPARE_METHOD_CALL ensure_current_thread_has_message_queue(); if (!self->impl.is_creation_thread()) { PyErr_SetString(PyExc_RuntimeError, "Cannot use a Synthesizer object from a thread other than the thread it was created in"); return NULL; }
|
||||||
|
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
Synthesizer_speak(Synthesizer *self, PyObject *args) {
|
Synthesizer_speak(Synthesizer *self, PyObject *args) {
|
||||||
@ -139,16 +207,10 @@ Synthesizer_speak(Synthesizer *self, PyObject *args) {
|
|||||||
wchar_raii pytext;
|
wchar_raii pytext;
|
||||||
int is_ssml = 0;
|
int is_ssml = 0;
|
||||||
if (!PyArg_ParseTuple(args, "O&|p", py_to_wchar_no_none, &pytext, &is_ssml)) return NULL;
|
if (!PyArg_ParseTuple(args, "O&|p", py_to_wchar_no_none, &pytext, &is_ssml)) return NULL;
|
||||||
SpeechSynthesisStream stream{nullptr};
|
|
||||||
try {
|
try {
|
||||||
if (is_ssml) stream = self->synth.SynthesizeSsmlToStreamAsync(pytext.as_view()).get();
|
self->impl.speak(pytext.as_view(), (bool)is_ssml);
|
||||||
else stream = self->synth.SynthesizeTextToStreamAsync(pytext.as_view()).get();
|
} CATCH_ALL_EXCEPTIONS("Failed to start speaking text");
|
||||||
} catch (winrt::hresult_error const& ex) {
|
if (PyErr_Occurred()) return NULL;
|
||||||
return set_python_error_from_runtime(ex, "Failed to get SpeechSynthesisStream from text");
|
|
||||||
}
|
|
||||||
MediaSource source = winrt::Windows::Media::Core::MediaSource::CreateFromStream(stream, stream.ContentType());
|
|
||||||
self->player.Source(source);
|
|
||||||
self->player.Play();
|
|
||||||
Py_RETURN_NONE;
|
Py_RETURN_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -164,11 +226,9 @@ Synthesizer_create_recording(Synthesizer *self, PyObject *args) {
|
|||||||
|
|
||||||
SpeechSynthesisStream stream{nullptr};
|
SpeechSynthesisStream stream{nullptr};
|
||||||
try {
|
try {
|
||||||
if (is_ssml) stream = self->synth.SynthesizeSsmlToStreamAsync(pytext.as_view()).get();
|
stream = self->impl.synthesize(pytext.as_view(), (bool)is_ssml);
|
||||||
else stream = self->synth.SynthesizeTextToStreamAsync(pytext.as_view()).get();
|
} CATCH_ALL_EXCEPTIONS( "Failed to get SpeechSynthesisStream from text");
|
||||||
} catch(winrt::hresult_error const& ex) {
|
if (PyErr_Occurred()) return NULL;
|
||||||
return set_python_error_from_runtime(ex, "Failed to get SpeechSynthesisStream from text");
|
|
||||||
}
|
|
||||||
unsigned long long stream_size = stream.Size(), bytes_read = 0;
|
unsigned long long stream_size = stream.Size(), bytes_read = 0;
|
||||||
DataReader reader(stream);
|
DataReader reader(stream);
|
||||||
unsigned int n;
|
unsigned int n;
|
||||||
@ -176,9 +236,8 @@ Synthesizer_create_recording(Synthesizer *self, PyObject *args) {
|
|||||||
while (bytes_read < stream_size) {
|
while (bytes_read < stream_size) {
|
||||||
try {
|
try {
|
||||||
n = reader.LoadAsync(chunk_size).get();
|
n = reader.LoadAsync(chunk_size).get();
|
||||||
} catch(winrt::hresult_error const& ex) {
|
} CATCH_ALL_EXCEPTIONS("Failed to load data from DataReader");
|
||||||
return set_python_error_from_runtime(ex, "Failed to load data from DataReader");
|
if (PyErr_Occurred()) return NULL;
|
||||||
}
|
|
||||||
if (n > 0) {
|
if (n > 0) {
|
||||||
bytes_read += n;
|
bytes_read += n;
|
||||||
pyobject_raii b(PyBytes_FromStringAndSize(NULL, n));
|
pyobject_raii b(PyBytes_FromStringAndSize(NULL, n));
|
||||||
@ -209,9 +268,8 @@ voice_as_dict(VoiceInformation const& voice) {
|
|||||||
"language", voice.Language().c_str(),
|
"language", voice.Language().c_str(),
|
||||||
"gender", gender
|
"gender", gender
|
||||||
);
|
);
|
||||||
} catch(winrt::hresult_error const& ex) {
|
} CATCH_ALL_EXCEPTIONS("Could not convert Voice to dict");
|
||||||
return set_python_error_from_runtime(ex);
|
return NULL;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -231,18 +289,16 @@ all_voices(PyObject* /*self*/, PyObject* /*args*/) { INITIALIZE_COM_IN_FUNCTION
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
return ans.detach();
|
return ans.detach();
|
||||||
} catch(winrt::hresult_error const& ex) {
|
} CATCH_ALL_EXCEPTIONS("Could not get all voices");
|
||||||
return set_python_error_from_runtime(ex);
|
return NULL;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
default_voice(PyObject* /*self*/, PyObject* /*args*/) { INITIALIZE_COM_IN_FUNCTION
|
default_voice(PyObject* /*self*/, PyObject* /*args*/) { INITIALIZE_COM_IN_FUNCTION
|
||||||
try {
|
try {
|
||||||
return voice_as_dict(SpeechSynthesizer::DefaultVoice());
|
return voice_as_dict(SpeechSynthesizer::DefaultVoice());
|
||||||
} catch(winrt::hresult_error const& ex) {
|
} CATCH_ALL_EXCEPTIONS("Could not get default voice");
|
||||||
return set_python_error_from_runtime(ex);
|
return NULL;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define M(name, args) { #name, (PyCFunction)Synthesizer_##name, args, ""}
|
#define M(name, args) { #name, (PyCFunction)Synthesizer_##name, args, ""}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user