From 90214194408a4d73d2e22e419b54148a6c5f8c1d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 29 Jul 2025 21:56:01 +0530 Subject: [PATCH] Get the piper module working on windows --- bypy/sources.json | 6 +++--- bypy/windows/__main__.py | 13 ++++++++++--- setup/build_environment.py | 3 +++ src/calibre/utils/tts/piper.cpp | 30 ++++++++++++++++++++---------- src/calibre/utils/tts/piper.py | 3 +++ 5 files changed, 39 insertions(+), 16 deletions(-) diff --git a/bypy/sources.json b/bypy/sources.json index 12f7e0ab9d..c18d861ba5 100644 --- a/bypy/sources.json +++ b/bypy/sources.json @@ -18,9 +18,9 @@ "name": "cmake", "os": "macos", "unix": { - "filename": "cmake-3.27.6.tar.gz", - "hash": "sha256:ef3056df528569e0e8956f6cf38806879347ac6de6a4ff7e4105dc4578732cfb", - "urls": ["https://github.com/Kitware/CMake/releases/download/v3.27.6/{filename}"] + "filename": "cmake-3.31.8.tar.gz", + "hash": "sha256:e3cde3ca83dc2d3212105326b8f1b565116be808394384007e7ef1c253af6caa", + "urls": ["https://github.com/Kitware/CMake/releases/download/v3.31.8/{filename}"] } }, diff --git a/bypy/windows/__main__.py b/bypy/windows/__main__.py index 14a025d659..8ae57e86ec 100644 --- a/bypy/windows/__main__.py +++ b/bypy/windows/__main__.py @@ -18,7 +18,6 @@ import zipfile from bypy.constants import CL, LINK, MT, PREFIX, RC, SIGNTOOL, SW, build_dir, python_major_minor_version, worker_env from bypy.constants import SRC as CALIBRE_DIR from bypy.freeze import cleanup_site_packages, extract_extension_modules, freeze_python, path_to_freeze_dir -from bypy.pkgs.piper import copy_piper_dir from bypy.utils import mkdtemp, py_compile, run, walk iv = globals()['init_env'] @@ -96,6 +95,7 @@ class Env: self.lib_dir = j(self.app_base, 'Lib') self.pylib = j(self.app_base, 'pylib.zip') self.dll_dir = j(self.app_base, 'bin') + self.share_dir = j(self.app_base, 'share') self.portable_base = j(d(self.base), 'Calibre Portable') self.obj_dir = j(build_dir, 'launcher') self.installer_dir = j(build_dir, 'wix') @@ -105,6 +105,7 @@ class Env: def initbase(env): os.makedirs(env.app_base) os.mkdir(env.dll_dir) + os.mkdir(env.share_dir) try: shutil.rmtree(env.dist) except EnvironmentError as err: @@ -130,18 +131,24 @@ def freeze(env, ext_dir, incdir): shutil.copy2(x + '.manifest', dest) bindir = os.path.join(PREFIX, 'bin') + libdir = os.path.join(PREFIX, 'lib') for x in ('pdftohtml', 'pdfinfo', 'pdftoppm', 'pdftotext', 'jpegtran-calibre', 'cjpeg-calibre', 'optipng-calibre', 'cwebp-calibre', 'JXRDecApp-calibre'): copybin(os.path.join(bindir, x + '.exe')) + # piper + for x in ('espeak-ng-data',): + shutil.copytree(os.path.join(PREFIX, 'share', x), os.path.join(env.share_dir, x)) + copybin(os.path.join(libdir, "onnxruntime.dll")) + for f in glob.glob(os.path.join(bindir, '*.dll')): if re.search(r'(easylzma|icutest)', f.lower()) is None: copybin(f) + ossm = os.path.join(env.dll_dir, 'ossl-modules') os.mkdir(ossm) - for f in glob.glob(os.path.join(PREFIX, 'lib', 'ossl-modules', '*.dll')): + for f in glob.glob(os.path.join(libdir, 'ossl-modules', '*.dll')): copybin(f, ossm) for f in glob.glob(os.path.join(PREFIX, 'ffmpeg', 'bin', '*.dll')): copybin(f) - copy_piper_dir(PREFIX, env.dll_dir) copybin(os.path.join(env.python_base, 'python%s.dll' % env.py_ver.replace('.', ''))) copybin(os.path.join(env.python_base, 'python%s.dll' % env.py_ver[0])) diff --git a/setup/build_environment.py b/setup/build_environment.py index 4adda7f415..7ea56c685e 100644 --- a/setup/build_environment.py +++ b/setup/build_environment.py @@ -181,6 +181,9 @@ if iswindows: zlib_lib_dirs = [sw_lib_dir] podofo_inc = os.path.join(sw_inc_dir, 'podofo') podofo_lib = sw_lib_dir + piper_inc_dirs = [sw_inc_dir, os.path.join(sw_inc_dir, 'onnxruntime')] + piper_lib_dirs = [sw_lib_dir] + piper_libs = ['espeak-ng', 'onnxruntime'] elif ismacos: sw = os.environ.get('SW', os.path.expanduser('~/sw')) sw_inc_dir = os.path.join(sw, 'include') diff --git a/src/calibre/utils/tts/piper.cpp b/src/calibre/utils/tts/piper.cpp index f56045297e..18f98ec432 100644 --- a/src/calibre/utils/tts/piper.cpp +++ b/src/calibre/utils/tts/piper.cpp @@ -11,11 +11,14 @@ #include #include #include -#include #include #include #include #include +#ifdef _WIN32 +#define ORT_DLL_IMPORT +#endif +#include #define CLAUSE_INTONATION_FULL_STOP 0x00000000 #define CLAUSE_INTONATION_COMMA 0x00001000 @@ -137,8 +140,8 @@ phonemize(PyObject *self, PyObject *pytext) { static PyObject* set_voice(PyObject *self, PyObject *args) { - PyObject *cfg; const char *model_path; - if (!PyArg_ParseTuple(args, "Os", &cfg, &model_path)) return NULL; + PyObject *cfg; PyObject *pymp; + if (!PyArg_ParseTuple(args, "OU", &cfg, &pymp)) return NULL; PyObject *evn = PyObject_GetAttrString(cfg, "espeak_voice_name"); if (!evn) return NULL; @@ -155,10 +158,10 @@ set_voice(PyObject *self, PyObject *args) { } G(sample_rate, current_sample_rate, PyLong_AsLong); G(num_speakers, current_num_speakers, PyLong_AsLong); - G(length_scale, current_length_scale, PyFloat_AsDouble); - G(noise_scale, current_noise_scale, PyFloat_AsDouble); - G(noise_w, current_noise_w, PyFloat_AsDouble); - G(sentence_delay, current_sentence_delay, PyFloat_AsDouble); + G(length_scale, current_length_scale, (float)PyFloat_AsDouble); + G(noise_scale, current_noise_scale, (float)PyFloat_AsDouble); + G(noise_w, current_noise_w, (float)PyFloat_AsDouble); + G(sentence_delay, current_sentence_delay, (float)PyFloat_AsDouble); #undef G PyObject *map = PyObject_GetAttrString(cfg, "phoneme_id_map"); @@ -187,7 +190,14 @@ set_voice(PyObject *self, PyObject *args) { opts.DisableProfiling(); Ort::Env ort_env{ORT_LOGGING_LEVEL_WARNING, "piper"}; session.reset(); +#ifdef _WIN32 + wchar_t *model_path = PyUnicode_AsWideCharString(pymp, NULL); + if (!model_path) return NULL; session = std::make_unique(Ort::Session(ort_env, model_path, opts)); + PyMem_Free(model_path); +#else + session = std::make_unique(Ort::Session(ort_env, PyUnicode_AsUTF8(pymp), opts)); +#endif Py_END_ALLOW_THREADS; Py_RETURN_NONE; @@ -341,20 +351,20 @@ next(PyObject *self, PyObject *args) { int num_samples; const float *audio_tensor_data; Py_BEGIN_ALLOW_THREADS; auto audio_shape = output_tensors.front().GetTensorTypeAndShapeInfo().GetShape(); - num_samples = audio_shape[audio_shape.size() - 1]; + num_samples = (int)audio_shape[audio_shape.size() - 1]; audio_tensor_data = output_tensors.front().GetTensorData(); Py_END_ALLOW_THREADS; PyObject *ans = NULL, *data = NULL; int num_of_silence_samples = 0; - if (current_sentence_delay > 0) num_of_silence_samples = current_sample_rate * current_sentence_delay; + if (current_sentence_delay > 0) num_of_silence_samples = (int)(current_sample_rate * current_sentence_delay); if (as_16bit_samples) { data = PyBytes_FromStringAndSize(NULL, sizeof(int16_t) * (num_samples + num_of_silence_samples)); if (data) { Py_BEGIN_ALLOW_THREADS; int16_t *x = (int16_t*)PyBytes_AS_STRING(data); for (int i = 0; i < num_samples; i++) { - x[i] = std::max(-1.f, std::min(audio_tensor_data[i], 1.f)) * std::numeric_limits::max(); + x[i] = (int16_t)(std::max(-1.f, std::min(audio_tensor_data[i], 1.f)) * std::numeric_limits::max()); } memset(x + num_samples, 0, num_of_silence_samples * sizeof(int16_t)); Py_END_ALLOW_THREADS; diff --git a/src/calibre/utils/tts/piper.py b/src/calibre/utils/tts/piper.py index f653a3166d..60acdc6829 100644 --- a/src/calibre/utils/tts/piper.py +++ b/src/calibre/utils/tts/piper.py @@ -12,6 +12,7 @@ from threading import Lock, Thread from typing import Any, NamedTuple import calibre_extensions.piper as piper +from calibre.constants import iswindows DEFAULT_LENGTH_SCALE = 1.0 DEFAULT_NOISE_SCALE = 0.667 @@ -61,6 +62,8 @@ def load_voice_config(path: str) -> VoiceConfig: def espeak_data_dir() -> str: if not getattr(sys, 'frozen', False): return '' + if iswindows: + return os.path.join(os.path.dirname(sys.executables_location), 'share', 'espeak-ng-data') return os.path.join(sys.executables_location, 'share', 'espeak-ng-data')