From de179fc1b9b317657ec3f1609d375f275c7880f5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 15 Nov 2020 21:55:05 +0530 Subject: [PATCH] Switch to speech-dispatcher from espeak --- bypy/linux.conf | 2 +- bypy/sources.json | 10 + setup/extensions.json | 6 - src/calibre/constants.py | 2 +- src/calibre/test_build.py | 5 + src/calibre/utils/tts/espeak.cpp | 303 ------------------------------- src/calibre/utils/tts/espeak.py | 39 ---- 7 files changed, 17 insertions(+), 350 deletions(-) delete mode 100644 src/calibre/utils/tts/espeak.cpp delete mode 100644 src/calibre/utils/tts/espeak.py diff --git a/bypy/linux.conf b/bypy/linux.conf index c863d16ad3..76b8b4c192 100644 --- a/bypy/linux.conf +++ b/bypy/linux.conf @@ -1,4 +1,4 @@ image 'https://partner-images.canonical.com/core/xenial/current/ubuntu-xenial-core-cloudimg-{}-root.tar.gz' # Build time deps for Qt. See http://doc.qt.io/qt-5/linux-requirements.html and https://wiki.qt.io/Building_Qt_5_from_Git -deps 'flex bison gperf ruby libx11-dev libxext-dev libxfixes-dev libxi-dev libxrender-dev libxcb1-dev libx11-xcb-dev libxcb-glx0-dev libxcb-keysyms1-dev libxcb-image0-dev libxcb-shm0-dev libxcb-icccm4-dev libxcb-sync0-dev libxcb-xfixes0-dev libxcb-shape0-dev libxcb-randr0-dev libxcb-render-util0-dev libxcb-xinerama0-dev xkb-data libglu1-mesa-dev libxkbcommon-dev libinput-dev libxkbcommon-x11-dev libgtk2.0-dev libvulkan-dev libwayland-dev libwayland-egl1-mesa libegl1-mesa-dev libxtst-dev libnss3-dev libfreetype6-dev libfontconfig-dev libespeak-ng-dev' +deps 'flex bison gperf ruby libx11-dev libxext-dev libxfixes-dev libxi-dev libxrender-dev libxcb1-dev libx11-xcb-dev libxcb-glx0-dev libxcb-keysyms1-dev libxcb-image0-dev libxcb-shm0-dev libxcb-icccm4-dev libxcb-sync0-dev libxcb-xfixes0-dev libxcb-shape0-dev libxcb-randr0-dev libxcb-render-util0-dev libxcb-xinerama0-dev xkb-data libglu1-mesa-dev libxkbcommon-dev libinput-dev libxkbcommon-x11-dev libgtk2.0-dev libvulkan-dev libwayland-dev libwayland-egl1-mesa libegl1-mesa-dev libxtst-dev libnss3-dev libfreetype6-dev libfontconfig-dev' diff --git a/bypy/sources.json b/bypy/sources.json index 0feae4a85f..a8d3cc3abd 100644 --- a/bypy/sources.json +++ b/bypy/sources.json @@ -840,6 +840,16 @@ } }, + { + "name": "speech-dispatcher-client", + "os": "linux", + "unix": { + "filename": "speech-dispatcher-0.10.1.tar.gz", + "hash": "sha256:098c9e56f3226b2d9b98ba23b154bfc54ad29cdd3f2bc3e5cbb9eb55e7775448", + "urls": ["https://github.com/brailcom/speechd/releases/download/0.10.1/{filename}"] + } + }, + { "name": "pyqt", "unix": { diff --git a/setup/extensions.json b/setup/extensions.json index 38421c258f..2c9d5187de 100644 --- a/setup/extensions.json +++ b/setup/extensions.json @@ -179,12 +179,6 @@ "sources": "calibre/utils/cocoa.m calibre/utils/cocoa_wrapper.c", "ldflags": "-framework Cocoa" }, - { - "name": "espeak", - "only": "linux haiku", - "sources": "calibre/utils/tts/espeak.cpp", - "libraries": "espeak-ng" - }, { "name": "libusb", "only": "macos linux haiku", diff --git a/src/calibre/constants.py b/src/calibre/constants.py index 36a4478a4d..3cc4682c24 100644 --- a/src/calibre/constants.py +++ b/src/calibre/constants.py @@ -253,7 +253,7 @@ class ExtensionsImporter: elif ismacos: extra = ('usbobserver', 'cocoa', 'libusb', 'libmtp') elif isfreebsd or ishaiku or islinux: - extra = ('libusb', 'libmtp', 'espeak') + extra = ('libusb', 'libmtp') else: extra = () self.calibre_extensions = frozenset(extensions + extra) diff --git a/src/calibre/test_build.py b/src/calibre/test_build.py index fa9b02ea76..1104ddbc7c 100644 --- a/src/calibre/test_build.py +++ b/src/calibre/test_build.py @@ -96,6 +96,11 @@ class BuildTest(unittest.TestCase): import soupsieve, bs4 del soupsieve, bs4 + @unittest.skipUnless(islinux, 'Speech dispatcher only used on Linux') + def test_speech_dispatcher(self): + from speechd.client import SSIPClient + del SSIPClient + def test_zeroconf(self): import zeroconf as z, ifaddr del z diff --git a/src/calibre/utils/tts/espeak.cpp b/src/calibre/utils/tts/espeak.cpp deleted file mode 100644 index 94a4cba8f4..0000000000 --- a/src/calibre/utils/tts/espeak.cpp +++ /dev/null @@ -1,303 +0,0 @@ -/* - * espeak.cpp - * Copyright (C) 2020 Kovid Goyal - * - * Distributed under terms of the GPL3 license. - */ - - -#define PY_SSIZE_T_CLEAN -#define UNICODE -#define _UNICODE -#include -#include - -static PyObject *EspeakError = NULL; - -typedef struct { - PyThreadState *thread_state; - PyObject *data_callback, *err_type, *err_value, *err_traceback; -} CallbackData; - -class ScopedGILAcquire { -public: - inline ScopedGILAcquire(CallbackData *cbd) : data(cbd) { - if (data && data->thread_state) { - PyEval_RestoreThread(data->thread_state); - data->thread_state = NULL; - } - } - inline ~ScopedGILAcquire() { if (data) data->thread_state = PyEval_SaveThread(); } -private: - CallbackData *data; -}; - -class pyobject_raii { - private: - PyObject *handle; - pyobject_raii( const pyobject_raii & ) ; - pyobject_raii & operator=( const pyobject_raii & ) ; - - public: - pyobject_raii() : handle(NULL) {} - pyobject_raii(PyObject* h) : handle(h) {} - - ~pyobject_raii() { Py_CLEAR(handle); } - - PyObject *ptr() { return handle; } - void set_ptr(PyObject *val) { handle = val; } - PyObject **address() { return &handle; } - explicit operator bool() const { return handle != NULL; } - PyObject *detach() { PyObject *ans = handle; handle = NULL; return ans; } -}; - -static bool initialize_called = false; - -static PyObject* -terminate(PyObject *self, PyObject *args) { - if (initialize_called) { - espeak_Terminate(); - initialize_called = false; - } - Py_RETURN_NONE; -} - -static PyObject* -info(PyObject *self, PyObject *args) { - const char *path_data; - const char *version = espeak_Info(&path_data); - return Py_BuildValue("ss", version, path_data); -} - -static PyObject* -list_voices(PyObject *self, PyObject *args, PyObject *kw) { - espeak_VOICE q = {0}; - static const char* kwds[] = {"name", "language", "identifier", "gender", "age", NULL}; - if (!PyArg_ParseTupleAndKeywords(args, kw, "|$sssBB", (char**)kwds, &q.name, &q.languages, &q.identifier, &q.gender, &q.age)) return NULL; - const espeak_VOICE **voices; - Py_BEGIN_ALLOW_THREADS; - voices = espeak_ListVoices(&q); - Py_END_ALLOW_THREADS; - pyobject_raii ans(PyList_New(0)); - if (!ans) return NULL; - while (*voices) { - const espeak_VOICE *x = *voices; - pyobject_raii languages(PyList_New(0)); - if (!languages) return NULL; - const char *pos = x->languages; - while (pos && *pos) { - const char priority = *pos; - size_t sz = strlen(++pos); - if (!sz) break; - pyobject_raii lang(Py_BuildValue("bs", priority, pos)); - if (!lang) return NULL; - if (PyList_Append(languages.ptr(), lang.ptr()) != 0) return NULL; - pos += sz + 1; - } - pyobject_raii entry(Py_BuildValue("{ss ss sO sB sB}", - "name", x->name, "identifier", x->identifier, "languages", languages.ptr(), - "gender", x->gender, "age", x->age)); - if (!entry) return NULL; - if (PyList_Append(ans.ptr(), entry.ptr()) != 0) return NULL; - voices++; - } - return ans.detach(); -} - -static PyObject* -set_espeak_error(const char *prefix, espeak_ERROR err, const char *file, const int line) { - const char *m = "Unknown error"; - switch(err) { - case EE_OK: - m = "No error"; break; - case EE_INTERNAL_ERROR: - m = "Internal error"; break; - case EE_BUFFER_FULL: - m = "Buffer full"; break; - case EE_NOT_FOUND: - m = "Not found"; break; - } - PyErr_Format(EspeakError, "[%s:%d] %s: %s", file, line, prefix, m); - return NULL; -} -#define espeak_error(prefix, err) set_espeak_error(prefix, err, __FILE__, __LINE__) - -static PyObject* -set_voice_by_properties(PyObject *self, PyObject *args, PyObject *kw) { - espeak_VOICE q = {0}; - static const char* kwds[] = {"name", "language", "gender", "age", "variant", NULL}; - if (!PyArg_ParseTupleAndKeywords(args, kw, "|$ssBBB", (char**)kwds, &q.name, &q.languages, &q.gender, &q.age, &q.variant)) return NULL; - espeak_ERROR err = espeak_SetVoiceByProperties(&q); - if (err != EE_OK) return espeak_error("Failed to set voice by properties", err); - Py_RETURN_NONE; -} - -static PyObject* -cancel(PyObject *self, PyObject *args) { - espeak_ERROR err; - Py_BEGIN_ALLOW_THREADS; - err = espeak_Cancel(); - Py_END_ALLOW_THREADS; - if (err != EE_OK) return espeak_error("Failed to cancel speech", err); - Py_RETURN_NONE; -} - -static PyObject* -is_playing(PyObject *self, PyObject *args) { - int ans; - Py_BEGIN_ALLOW_THREADS - ans = espeak_IsPlaying(); - Py_END_ALLOW_THREADS - return Py_BuildValue("O", ans ? Py_True : Py_False); - -} - -static PyObject* -synchronize(PyObject *self, PyObject *args) { - espeak_ERROR err; - Py_BEGIN_ALLOW_THREADS; - err = espeak_Synchronize(); - Py_END_ALLOW_THREADS; - if (err != EE_OK) return espeak_error("Failed to synchronize speech", err); - Py_RETURN_NONE; -} - -static PyObject* -set_parameter(PyObject *self, PyObject *args) { - espeak_PARAMETER param; - int value, relative = 0; - if (!PyArg_ParseTuple(args, "ii|i", ¶m, &value, &relative)) return NULL; - espeak_ERROR err; - Py_BEGIN_ALLOW_THREADS; - err = espeak_SetParameter(param, value, relative); - Py_END_ALLOW_THREADS; - if (err != EE_OK) return espeak_error("Failed to set set parameter", err); - Py_RETURN_NONE; -} - -static PyObject* -get_parameter(PyObject *self, PyObject *args) { - espeak_PARAMETER param; - int current = 1; - if (!PyArg_ParseTuple(args, "i|i", ¶m, ¤t)) return NULL; - long ans; - Py_BEGIN_ALLOW_THREADS; - ans = espeak_GetParameter(param, current); - Py_END_ALLOW_THREADS; - return PyLong_FromLong(ans); -} - -static int -synth_callback(short* wav_data, int num_samples, espeak_EVENT *evt) { - if (wav_data == NULL) return 0; - CallbackData *cbdata = static_cast(evt->user_data); - if (cbdata->data_callback) { - ScopedGILAcquire sga(cbdata); - PyObject *ret = PyObject_CallFunction(cbdata->data_callback, "y#", wav_data, num_samples * 2); - if (!ret) { - PyErr_Fetch(&cbdata->err_type, &cbdata->err_value, &cbdata->err_traceback); - return 1; - } - int r = PyObject_IsTrue(ret) ? 1 : 0; - Py_DECREF(ret); - return r; - } - return 0; -} - - -static inline void -int_as_four_bytes(int32_t value, unsigned char *output) { - output[0] = value & 0xff; - output[1] = (value >> 8) & 0xff; - output[2] = (value >> 16) & 0xff; - output[3] = (value >> 24) & 0xff; -} - - -static PyObject* -create_recording_wav(PyObject *self, PyObject *args) { - int buflength = 0; - unsigned int flags = 0; - const char *text; - Py_ssize_t text_len; - CallbackData cbdata = {0}; - if (!PyArg_ParseTuple(args, "s#O|iI", &text, &text_len, &cbdata.data_callback, &buflength, &flags)) return NULL; - espeak_Cancel(); - int rate = espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS, buflength, NULL, espeakINITIALIZE_DONT_EXIT); - if (rate == -1) return espeak_error("Initialization failed", EE_INTERNAL_ERROR); - espeak_SetSynthCallback(synth_callback); - unsigned char wave_hdr[44] = { - 'R', 'I', 'F', 'F', 0x24, 0xf0, 0xff, 0x7f, 'W', 'A', 'V', 'E', 'f', 'm', 't', ' ', - 0x10, 0, 0, 0, 1, 0, 1, 0, 9, 0x3d, 0, 0, 0x12, 0x7a, 0, 0, - 2, 0, 0x10, 0, 'd', 'a', 't', 'a', 0x00, 0xf0, 0xff, 0x7f - }; - int_as_four_bytes(rate, wave_hdr + 24); - int_as_four_bytes(rate * 2, wave_hdr + 28); - PyObject *ret = PyObject_CallFunction(cbdata.data_callback, "y#", wave_hdr, sizeof(wave_hdr)); - if (!ret) return NULL; - Py_DECREF(ret); - - espeak_ERROR err; - cbdata.thread_state = PyEval_SaveThread(); - err = espeak_Synth(text, text_len, 0, POS_CHARACTER, 0, flags | espeakCHARS_UTF8, NULL, &cbdata); - if (cbdata.thread_state) PyEval_RestoreThread(cbdata.thread_state); - if (cbdata.err_type) { - PyErr_Restore(cbdata.err_type, cbdata.err_value, cbdata.err_traceback); - return NULL; - } - if (err != EE_OK) return espeak_error("Failed to synthesize text", err); - Py_RETURN_NONE; -} - -// Boilerplate {{{ -#define M(name, args, doc) { #name, (PyCFunction)name, args, ""} -static PyMethodDef methods[] = { - M(info, METH_NOARGS, "version and path"), - M(terminate, METH_NOARGS, "terminate the library"), - M(cancel, METH_NOARGS, "cancel all ongoing speech activity"), - M(synchronize, METH_NOARGS, "synchronize all ongoing speech activity"), - M(is_playing, METH_NOARGS, "True iff speech is happening"), - M(set_parameter, METH_VARARGS, "set speech parameter"), - M(get_parameter, METH_VARARGS, "get speech parameter"), - M(create_recording_wav, METH_VARARGS, "save tts output as WAV"), - M(list_voices, METH_VARARGS | METH_KEYWORDS, "list available voices"), - M(set_voice_by_properties, METH_VARARGS | METH_KEYWORDS, "set voice by properties"), - {NULL, NULL, 0, NULL} -}; -#undef M - -static int -exec_module(PyObject *m) { -#define AI(name) if (PyModule_AddIntConstant(m, #name, espeak##name) != 0) { return -1; } - AI(RATE); AI(VOLUME); AI(PITCH); AI(RANGE); AI(PUNCTUATION); AI(CAPITALS); AI(WORDGAP); - AI(SSML); AI(PHONEMES); AI(ENDPAUSE); -#undef AI - EspeakError = PyErr_NewException("espeak.EspeakError", NULL, NULL); - if (EspeakError == NULL) return -1; - PyModule_AddObject(m, "EspeakError", EspeakError); - - int sample_rate = espeak_Initialize(AUDIO_OUTPUT_SYNCH_PLAYBACK, 0, NULL, espeakINITIALIZE_DONT_EXIT); - if (sample_rate == -1) { - PyErr_SetString(PyExc_OSError, "Failed to initialize espeak library, are the data files missing?"); - return 1; - } - initialize_called = true; - return 0; -} - -static PyModuleDef_Slot slots[] = { {Py_mod_exec, (void*)exec_module}, {0, NULL} }; - -static struct PyModuleDef module_def = {PyModuleDef_HEAD_INIT}; - -static void -finalize(void*) { terminate(NULL, NULL); } - -CALIBRE_MODINIT_FUNC PyInit_espeak(void) { - module_def.m_name = "espeak"; - module_def.m_doc = "espeak-ng wrapper"; - module_def.m_slots = slots; - module_def.m_free = finalize; - module_def.m_methods = methods; - return PyModuleDef_Init(&module_def); -} diff --git a/src/calibre/utils/tts/espeak.py b/src/calibre/utils/tts/espeak.py deleted file mode 100644 index 15104becb4..0000000000 --- a/src/calibre/utils/tts/espeak.py +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=utf-8 -# License: GPL v3 Copyright: 2020, Kovid Goyal - -def info(): - from calibre_extensions.espeak import info - return info() - - -def create_recording_wav(text, seekable_file_object_or_path, buflength=0, ssml=False, phonemes=False, endpause=False): - import struct - - from calibre_extensions.espeak import ( - ENDPAUSE, PHONEMES, SSML, create_recording_wav as doit - ) - flags = 0 - if ssml: - flags |= SSML - if phonemes: - flags |= PHONEMES - if endpause: - flags |= ENDPAUSE - if isinstance(seekable_file_object_or_path, str): - seekable_file_object = open(seekable_file_object_or_path, 'w+b') - else: - seekable_file_object = seekable_file_object_or_path - - w = seekable_file_object.write - - def write(data): - w(data) - return False - - doit(text, write, buflength, flags) - sz = seekable_file_object.tell() - seekable_file_object.seek(4) - seekable_file_object.write(struct.pack('