Switch to speech-dispatcher from espeak

This commit is contained in:
Kovid Goyal 2020-11-15 21:55:05 +05:30
parent 8e44992888
commit de179fc1b9
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
7 changed files with 17 additions and 350 deletions

View File

@ -1,4 +1,4 @@
image 'https://partner-images.canonical.com/core/xenial/current/ubuntu-xenial-core-cloudimg-{}-root.tar.gz'
# Build time deps for Qt. See http://doc.qt.io/qt-5/linux-requirements.html and https://wiki.qt.io/Building_Qt_5_from_Git
deps 'flex bison gperf ruby libx11-dev libxext-dev libxfixes-dev libxi-dev libxrender-dev libxcb1-dev libx11-xcb-dev libxcb-glx0-dev libxcb-keysyms1-dev libxcb-image0-dev libxcb-shm0-dev libxcb-icccm4-dev libxcb-sync0-dev libxcb-xfixes0-dev libxcb-shape0-dev libxcb-randr0-dev libxcb-render-util0-dev libxcb-xinerama0-dev xkb-data libglu1-mesa-dev libxkbcommon-dev libinput-dev libxkbcommon-x11-dev libgtk2.0-dev libvulkan-dev libwayland-dev libwayland-egl1-mesa libegl1-mesa-dev libxtst-dev libnss3-dev libfreetype6-dev libfontconfig-dev libespeak-ng-dev'
deps 'flex bison gperf ruby libx11-dev libxext-dev libxfixes-dev libxi-dev libxrender-dev libxcb1-dev libx11-xcb-dev libxcb-glx0-dev libxcb-keysyms1-dev libxcb-image0-dev libxcb-shm0-dev libxcb-icccm4-dev libxcb-sync0-dev libxcb-xfixes0-dev libxcb-shape0-dev libxcb-randr0-dev libxcb-render-util0-dev libxcb-xinerama0-dev xkb-data libglu1-mesa-dev libxkbcommon-dev libinput-dev libxkbcommon-x11-dev libgtk2.0-dev libvulkan-dev libwayland-dev libwayland-egl1-mesa libegl1-mesa-dev libxtst-dev libnss3-dev libfreetype6-dev libfontconfig-dev'

View File

@ -840,6 +840,16 @@
}
},
{
"name": "speech-dispatcher-client",
"os": "linux",
"unix": {
"filename": "speech-dispatcher-0.10.1.tar.gz",
"hash": "sha256:098c9e56f3226b2d9b98ba23b154bfc54ad29cdd3f2bc3e5cbb9eb55e7775448",
"urls": ["https://github.com/brailcom/speechd/releases/download/0.10.1/{filename}"]
}
},
{
"name": "pyqt",
"unix": {

View File

@ -179,12 +179,6 @@
"sources": "calibre/utils/cocoa.m calibre/utils/cocoa_wrapper.c",
"ldflags": "-framework Cocoa"
},
{
"name": "espeak",
"only": "linux haiku",
"sources": "calibre/utils/tts/espeak.cpp",
"libraries": "espeak-ng"
},
{
"name": "libusb",
"only": "macos linux haiku",

View File

@ -253,7 +253,7 @@ class ExtensionsImporter:
elif ismacos:
extra = ('usbobserver', 'cocoa', 'libusb', 'libmtp')
elif isfreebsd or ishaiku or islinux:
extra = ('libusb', 'libmtp', 'espeak')
extra = ('libusb', 'libmtp')
else:
extra = ()
self.calibre_extensions = frozenset(extensions + extra)

View File

@ -96,6 +96,11 @@ class BuildTest(unittest.TestCase):
import soupsieve, bs4
del soupsieve, bs4
@unittest.skipUnless(islinux, 'Speech dispatcher only used on Linux')
def test_speech_dispatcher(self):
from speechd.client import SSIPClient
del SSIPClient
def test_zeroconf(self):
import zeroconf as z, ifaddr
del z

View File

@ -1,303 +0,0 @@
/*
* espeak.cpp
* Copyright (C) 2020 Kovid Goyal <kovid at kovidgoyal.net>
*
* Distributed under terms of the GPL3 license.
*/
#define PY_SSIZE_T_CLEAN
#define UNICODE
#define _UNICODE
#include <Python.h>
#include <espeak-ng/speak_lib.h>
static PyObject *EspeakError = NULL;
typedef struct {
PyThreadState *thread_state;
PyObject *data_callback, *err_type, *err_value, *err_traceback;
} CallbackData;
class ScopedGILAcquire {
public:
inline ScopedGILAcquire(CallbackData *cbd) : data(cbd) {
if (data && data->thread_state) {
PyEval_RestoreThread(data->thread_state);
data->thread_state = NULL;
}
}
inline ~ScopedGILAcquire() { if (data) data->thread_state = PyEval_SaveThread(); }
private:
CallbackData *data;
};
class pyobject_raii {
private:
PyObject *handle;
pyobject_raii( const pyobject_raii & ) ;
pyobject_raii & operator=( const pyobject_raii & ) ;
public:
pyobject_raii() : handle(NULL) {}
pyobject_raii(PyObject* h) : handle(h) {}
~pyobject_raii() { Py_CLEAR(handle); }
PyObject *ptr() { return handle; }
void set_ptr(PyObject *val) { handle = val; }
PyObject **address() { return &handle; }
explicit operator bool() const { return handle != NULL; }
PyObject *detach() { PyObject *ans = handle; handle = NULL; return ans; }
};
static bool initialize_called = false;
static PyObject*
terminate(PyObject *self, PyObject *args) {
if (initialize_called) {
espeak_Terminate();
initialize_called = false;
}
Py_RETURN_NONE;
}
static PyObject*
info(PyObject *self, PyObject *args) {
const char *path_data;
const char *version = espeak_Info(&path_data);
return Py_BuildValue("ss", version, path_data);
}
static PyObject*
list_voices(PyObject *self, PyObject *args, PyObject *kw) {
espeak_VOICE q = {0};
static const char* kwds[] = {"name", "language", "identifier", "gender", "age", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kw, "|$sssBB", (char**)kwds, &q.name, &q.languages, &q.identifier, &q.gender, &q.age)) return NULL;
const espeak_VOICE **voices;
Py_BEGIN_ALLOW_THREADS;
voices = espeak_ListVoices(&q);
Py_END_ALLOW_THREADS;
pyobject_raii ans(PyList_New(0));
if (!ans) return NULL;
while (*voices) {
const espeak_VOICE *x = *voices;
pyobject_raii languages(PyList_New(0));
if (!languages) return NULL;
const char *pos = x->languages;
while (pos && *pos) {
const char priority = *pos;
size_t sz = strlen(++pos);
if (!sz) break;
pyobject_raii lang(Py_BuildValue("bs", priority, pos));
if (!lang) return NULL;
if (PyList_Append(languages.ptr(), lang.ptr()) != 0) return NULL;
pos += sz + 1;
}
pyobject_raii entry(Py_BuildValue("{ss ss sO sB sB}",
"name", x->name, "identifier", x->identifier, "languages", languages.ptr(),
"gender", x->gender, "age", x->age));
if (!entry) return NULL;
if (PyList_Append(ans.ptr(), entry.ptr()) != 0) return NULL;
voices++;
}
return ans.detach();
}
static PyObject*
set_espeak_error(const char *prefix, espeak_ERROR err, const char *file, const int line) {
const char *m = "Unknown error";
switch(err) {
case EE_OK:
m = "No error"; break;
case EE_INTERNAL_ERROR:
m = "Internal error"; break;
case EE_BUFFER_FULL:
m = "Buffer full"; break;
case EE_NOT_FOUND:
m = "Not found"; break;
}
PyErr_Format(EspeakError, "[%s:%d] %s: %s", file, line, prefix, m);
return NULL;
}
#define espeak_error(prefix, err) set_espeak_error(prefix, err, __FILE__, __LINE__)
static PyObject*
set_voice_by_properties(PyObject *self, PyObject *args, PyObject *kw) {
espeak_VOICE q = {0};
static const char* kwds[] = {"name", "language", "gender", "age", "variant", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kw, "|$ssBBB", (char**)kwds, &q.name, &q.languages, &q.gender, &q.age, &q.variant)) return NULL;
espeak_ERROR err = espeak_SetVoiceByProperties(&q);
if (err != EE_OK) return espeak_error("Failed to set voice by properties", err);
Py_RETURN_NONE;
}
static PyObject*
cancel(PyObject *self, PyObject *args) {
espeak_ERROR err;
Py_BEGIN_ALLOW_THREADS;
err = espeak_Cancel();
Py_END_ALLOW_THREADS;
if (err != EE_OK) return espeak_error("Failed to cancel speech", err);
Py_RETURN_NONE;
}
static PyObject*
is_playing(PyObject *self, PyObject *args) {
int ans;
Py_BEGIN_ALLOW_THREADS
ans = espeak_IsPlaying();
Py_END_ALLOW_THREADS
return Py_BuildValue("O", ans ? Py_True : Py_False);
}
static PyObject*
synchronize(PyObject *self, PyObject *args) {
espeak_ERROR err;
Py_BEGIN_ALLOW_THREADS;
err = espeak_Synchronize();
Py_END_ALLOW_THREADS;
if (err != EE_OK) return espeak_error("Failed to synchronize speech", err);
Py_RETURN_NONE;
}
static PyObject*
set_parameter(PyObject *self, PyObject *args) {
espeak_PARAMETER param;
int value, relative = 0;
if (!PyArg_ParseTuple(args, "ii|i", &param, &value, &relative)) return NULL;
espeak_ERROR err;
Py_BEGIN_ALLOW_THREADS;
err = espeak_SetParameter(param, value, relative);
Py_END_ALLOW_THREADS;
if (err != EE_OK) return espeak_error("Failed to set set parameter", err);
Py_RETURN_NONE;
}
static PyObject*
get_parameter(PyObject *self, PyObject *args) {
espeak_PARAMETER param;
int current = 1;
if (!PyArg_ParseTuple(args, "i|i", &param, &current)) return NULL;
long ans;
Py_BEGIN_ALLOW_THREADS;
ans = espeak_GetParameter(param, current);
Py_END_ALLOW_THREADS;
return PyLong_FromLong(ans);
}
static int
synth_callback(short* wav_data, int num_samples, espeak_EVENT *evt) {
if (wav_data == NULL) return 0;
CallbackData *cbdata = static_cast<CallbackData*>(evt->user_data);
if (cbdata->data_callback) {
ScopedGILAcquire sga(cbdata);
PyObject *ret = PyObject_CallFunction(cbdata->data_callback, "y#", wav_data, num_samples * 2);
if (!ret) {
PyErr_Fetch(&cbdata->err_type, &cbdata->err_value, &cbdata->err_traceback);
return 1;
}
int r = PyObject_IsTrue(ret) ? 1 : 0;
Py_DECREF(ret);
return r;
}
return 0;
}
static inline void
int_as_four_bytes(int32_t value, unsigned char *output) {
output[0] = value & 0xff;
output[1] = (value >> 8) & 0xff;
output[2] = (value >> 16) & 0xff;
output[3] = (value >> 24) & 0xff;
}
static PyObject*
create_recording_wav(PyObject *self, PyObject *args) {
int buflength = 0;
unsigned int flags = 0;
const char *text;
Py_ssize_t text_len;
CallbackData cbdata = {0};
if (!PyArg_ParseTuple(args, "s#O|iI", &text, &text_len, &cbdata.data_callback, &buflength, &flags)) return NULL;
espeak_Cancel();
int rate = espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS, buflength, NULL, espeakINITIALIZE_DONT_EXIT);
if (rate == -1) return espeak_error("Initialization failed", EE_INTERNAL_ERROR);
espeak_SetSynthCallback(synth_callback);
unsigned char wave_hdr[44] = {
'R', 'I', 'F', 'F', 0x24, 0xf0, 0xff, 0x7f, 'W', 'A', 'V', 'E', 'f', 'm', 't', ' ',
0x10, 0, 0, 0, 1, 0, 1, 0, 9, 0x3d, 0, 0, 0x12, 0x7a, 0, 0,
2, 0, 0x10, 0, 'd', 'a', 't', 'a', 0x00, 0xf0, 0xff, 0x7f
};
int_as_four_bytes(rate, wave_hdr + 24);
int_as_four_bytes(rate * 2, wave_hdr + 28);
PyObject *ret = PyObject_CallFunction(cbdata.data_callback, "y#", wave_hdr, sizeof(wave_hdr));
if (!ret) return NULL;
Py_DECREF(ret);
espeak_ERROR err;
cbdata.thread_state = PyEval_SaveThread();
err = espeak_Synth(text, text_len, 0, POS_CHARACTER, 0, flags | espeakCHARS_UTF8, NULL, &cbdata);
if (cbdata.thread_state) PyEval_RestoreThread(cbdata.thread_state);
if (cbdata.err_type) {
PyErr_Restore(cbdata.err_type, cbdata.err_value, cbdata.err_traceback);
return NULL;
}
if (err != EE_OK) return espeak_error("Failed to synthesize text", err);
Py_RETURN_NONE;
}
// Boilerplate {{{
#define M(name, args, doc) { #name, (PyCFunction)name, args, ""}
static PyMethodDef methods[] = {
M(info, METH_NOARGS, "version and path"),
M(terminate, METH_NOARGS, "terminate the library"),
M(cancel, METH_NOARGS, "cancel all ongoing speech activity"),
M(synchronize, METH_NOARGS, "synchronize all ongoing speech activity"),
M(is_playing, METH_NOARGS, "True iff speech is happening"),
M(set_parameter, METH_VARARGS, "set speech parameter"),
M(get_parameter, METH_VARARGS, "get speech parameter"),
M(create_recording_wav, METH_VARARGS, "save tts output as WAV"),
M(list_voices, METH_VARARGS | METH_KEYWORDS, "list available voices"),
M(set_voice_by_properties, METH_VARARGS | METH_KEYWORDS, "set voice by properties"),
{NULL, NULL, 0, NULL}
};
#undef M
static int
exec_module(PyObject *m) {
#define AI(name) if (PyModule_AddIntConstant(m, #name, espeak##name) != 0) { return -1; }
AI(RATE); AI(VOLUME); AI(PITCH); AI(RANGE); AI(PUNCTUATION); AI(CAPITALS); AI(WORDGAP);
AI(SSML); AI(PHONEMES); AI(ENDPAUSE);
#undef AI
EspeakError = PyErr_NewException("espeak.EspeakError", NULL, NULL);
if (EspeakError == NULL) return -1;
PyModule_AddObject(m, "EspeakError", EspeakError);
int sample_rate = espeak_Initialize(AUDIO_OUTPUT_SYNCH_PLAYBACK, 0, NULL, espeakINITIALIZE_DONT_EXIT);
if (sample_rate == -1) {
PyErr_SetString(PyExc_OSError, "Failed to initialize espeak library, are the data files missing?");
return 1;
}
initialize_called = true;
return 0;
}
static PyModuleDef_Slot slots[] = { {Py_mod_exec, (void*)exec_module}, {0, NULL} };
static struct PyModuleDef module_def = {PyModuleDef_HEAD_INIT};
static void
finalize(void*) { terminate(NULL, NULL); }
CALIBRE_MODINIT_FUNC PyInit_espeak(void) {
module_def.m_name = "espeak";
module_def.m_doc = "espeak-ng wrapper";
module_def.m_slots = slots;
module_def.m_free = finalize;
module_def.m_methods = methods;
return PyModuleDef_Init(&module_def);
}

View File

@ -1,39 +0,0 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# License: GPL v3 Copyright: 2020, Kovid Goyal <kovid at kovidgoyal.net>
def info():
from calibre_extensions.espeak import info
return info()
def create_recording_wav(text, seekable_file_object_or_path, buflength=0, ssml=False, phonemes=False, endpause=False):
import struct
from calibre_extensions.espeak import (
ENDPAUSE, PHONEMES, SSML, create_recording_wav as doit
)
flags = 0
if ssml:
flags |= SSML
if phonemes:
flags |= PHONEMES
if endpause:
flags |= ENDPAUSE
if isinstance(seekable_file_object_or_path, str):
seekable_file_object = open(seekable_file_object_or_path, 'w+b')
else:
seekable_file_object = seekable_file_object_or_path
w = seekable_file_object.write
def write(data):
w(data)
return False
doit(text, write, buflength, flags)
sz = seekable_file_object.tell()
seekable_file_object.seek(4)
seekable_file_object.write(struct.pack('<I', sz - 8))
seekable_file_object.seek(40)
seekable_file_object.write(struct.pack('<I', sz - 44))