From 90214194408a4d73d2e22e419b54148a6c5f8c1d Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 29 Jul 2025 21:56:01 +0530
Subject: [PATCH] Get the piper module working on windows

---
 bypy/sources.json               |  6 +++---
 bypy/windows/__main__.py        | 13 ++++++++++---
 setup/build_environment.py      |  3 +++
 src/calibre/utils/tts/piper.cpp | 30 ++++++++++++++++++++----------
 src/calibre/utils/tts/piper.py  |  3 +++
 5 files changed, 39 insertions(+), 16 deletions(-)

diff --git a/bypy/sources.json b/bypy/sources.json
index 12f7e0ab9d..c18d861ba5 100644
--- a/bypy/sources.json
+++ b/bypy/sources.json
@@ -18,9 +18,9 @@
         "name": "cmake",
         "os": "macos",
         "unix": {
-            "filename": "cmake-3.27.6.tar.gz",
-            "hash": "sha256:ef3056df528569e0e8956f6cf38806879347ac6de6a4ff7e4105dc4578732cfb",
-            "urls": ["https://github.com/Kitware/CMake/releases/download/v3.27.6/{filename}"]
+            "filename": "cmake-3.31.8.tar.gz",
+            "hash": "sha256:e3cde3ca83dc2d3212105326b8f1b565116be808394384007e7ef1c253af6caa",
+            "urls": ["https://github.com/Kitware/CMake/releases/download/v3.31.8/{filename}"]
         }
     },
 
diff --git a/bypy/windows/__main__.py b/bypy/windows/__main__.py
index 14a025d659..8ae57e86ec 100644
--- a/bypy/windows/__main__.py
+++ b/bypy/windows/__main__.py
@@ -18,7 +18,6 @@ import zipfile
 from bypy.constants import CL, LINK, MT, PREFIX, RC, SIGNTOOL, SW, build_dir, python_major_minor_version, worker_env
 from bypy.constants import SRC as CALIBRE_DIR
 from bypy.freeze import cleanup_site_packages, extract_extension_modules, freeze_python, path_to_freeze_dir
-from bypy.pkgs.piper import copy_piper_dir
 from bypy.utils import mkdtemp, py_compile, run, walk
 
 iv = globals()['init_env']
@@ -96,6 +95,7 @@ class Env:
         self.lib_dir = j(self.app_base, 'Lib')
         self.pylib = j(self.app_base, 'pylib.zip')
         self.dll_dir = j(self.app_base, 'bin')
+        self.share_dir = j(self.app_base, 'share')
         self.portable_base = j(d(self.base), 'Calibre Portable')
         self.obj_dir = j(build_dir, 'launcher')
         self.installer_dir = j(build_dir, 'wix')
@@ -105,6 +105,7 @@ class Env:
 def initbase(env):
     os.makedirs(env.app_base)
     os.mkdir(env.dll_dir)
+    os.mkdir(env.share_dir)
     try:
         shutil.rmtree(env.dist)
     except EnvironmentError as err:
@@ -130,18 +131,24 @@ def freeze(env, ext_dir, incdir):
             shutil.copy2(x + '.manifest', dest)
 
     bindir = os.path.join(PREFIX, 'bin')
+    libdir = os.path.join(PREFIX, 'lib')
     for x in ('pdftohtml', 'pdfinfo', 'pdftoppm', 'pdftotext', 'jpegtran-calibre', 'cjpeg-calibre', 'optipng-calibre', 'cwebp-calibre', 'JXRDecApp-calibre'):
         copybin(os.path.join(bindir, x + '.exe'))
+    # piper
+    for x in ('espeak-ng-data',):
+        shutil.copytree(os.path.join(PREFIX, 'share', x), os.path.join(env.share_dir, x))
+    copybin(os.path.join(libdir, "onnxruntime.dll"))
+
     for f in glob.glob(os.path.join(bindir, '*.dll')):
         if re.search(r'(easylzma|icutest)', f.lower()) is None:
             copybin(f)
+
     ossm = os.path.join(env.dll_dir, 'ossl-modules')
     os.mkdir(ossm)
-    for f in glob.glob(os.path.join(PREFIX, 'lib', 'ossl-modules', '*.dll')):
+    for f in glob.glob(os.path.join(libdir, 'ossl-modules', '*.dll')):
         copybin(f, ossm)
     for f in glob.glob(os.path.join(PREFIX, 'ffmpeg', 'bin', '*.dll')):
         copybin(f)
-    copy_piper_dir(PREFIX, env.dll_dir)
 
     copybin(os.path.join(env.python_base, 'python%s.dll' % env.py_ver.replace('.', '')))
     copybin(os.path.join(env.python_base, 'python%s.dll' % env.py_ver[0]))
diff --git a/setup/build_environment.py b/setup/build_environment.py
index 4adda7f415..7ea56c685e 100644
--- a/setup/build_environment.py
+++ b/setup/build_environment.py
@@ -181,6 +181,9 @@ if iswindows:
     zlib_lib_dirs = [sw_lib_dir]
     podofo_inc = os.path.join(sw_inc_dir, 'podofo')
     podofo_lib = sw_lib_dir
+    piper_inc_dirs = [sw_inc_dir, os.path.join(sw_inc_dir, 'onnxruntime')]
+    piper_lib_dirs = [sw_lib_dir]
+    piper_libs = ['espeak-ng', 'onnxruntime']
 elif ismacos:
     sw = os.environ.get('SW', os.path.expanduser('~/sw'))
     sw_inc_dir  = os.path.join(sw, 'include')
diff --git a/src/calibre/utils/tts/piper.cpp b/src/calibre/utils/tts/piper.cpp
index f56045297e..18f98ec432 100644
--- a/src/calibre/utils/tts/piper.cpp
+++ b/src/calibre/utils/tts/piper.cpp
@@ -11,11 +11,14 @@
 #include <vector>
 #include <map>
 #include <memory>
-#include <onnxruntime_cxx_api.h>
 #include <queue>
 #include <cstdint>
 #include <algorithm>
 #include <limits>
+#ifdef _WIN32
+#define ORT_DLL_IMPORT
+#endif
+#include <onnxruntime_cxx_api.h>
 
 #define CLAUSE_INTONATION_FULL_STOP 0x00000000
 #define CLAUSE_INTONATION_COMMA 0x00001000
@@ -137,8 +140,8 @@ phonemize(PyObject *self, PyObject *pytext) {
 
 static PyObject*
 set_voice(PyObject *self, PyObject *args) {
-    PyObject *cfg; const char *model_path;
-    if (!PyArg_ParseTuple(args, "Os", &cfg, &model_path)) return NULL;
+    PyObject *cfg; PyObject *pymp;
+    if (!PyArg_ParseTuple(args, "OU", &cfg, &pymp)) return NULL;
 
     PyObject *evn = PyObject_GetAttrString(cfg, "espeak_voice_name");
     if (!evn) return NULL;
@@ -155,10 +158,10 @@ set_voice(PyObject *self, PyObject *args) {
 }
     G(sample_rate, current_sample_rate, PyLong_AsLong);
     G(num_speakers, current_num_speakers, PyLong_AsLong);
-    G(length_scale, current_length_scale, PyFloat_AsDouble);
-    G(noise_scale, current_noise_scale, PyFloat_AsDouble);
-    G(noise_w, current_noise_w, PyFloat_AsDouble);
-    G(sentence_delay, current_sentence_delay, PyFloat_AsDouble);
+    G(length_scale, current_length_scale, (float)PyFloat_AsDouble);
+    G(noise_scale, current_noise_scale, (float)PyFloat_AsDouble);
+    G(noise_w, current_noise_w, (float)PyFloat_AsDouble);
+    G(sentence_delay, current_sentence_delay, (float)PyFloat_AsDouble);
 #undef G
 
     PyObject *map = PyObject_GetAttrString(cfg, "phoneme_id_map");
@@ -187,7 +190,14 @@ set_voice(PyObject *self, PyObject *args) {
     opts.DisableProfiling();
     Ort::Env ort_env{ORT_LOGGING_LEVEL_WARNING, "piper"};
     session.reset();
+#ifdef _WIN32
+    wchar_t *model_path = PyUnicode_AsWideCharString(pymp, NULL);
+    if (!model_path) return NULL;
     session = std::make_unique<Ort::Session>(Ort::Session(ort_env, model_path, opts));
+    PyMem_Free(model_path);
+#else
+    session = std::make_unique<Ort::Session>(Ort::Session(ort_env, PyUnicode_AsUTF8(pymp), opts));
+#endif
     Py_END_ALLOW_THREADS;
 
     Py_RETURN_NONE;
@@ -341,20 +351,20 @@ next(PyObject *self, PyObject *args) {
     int num_samples; const float *audio_tensor_data;
     Py_BEGIN_ALLOW_THREADS;
     auto audio_shape = output_tensors.front().GetTensorTypeAndShapeInfo().GetShape();
-    num_samples = audio_shape[audio_shape.size() - 1];
+    num_samples = (int)audio_shape[audio_shape.size() - 1];
     audio_tensor_data = output_tensors.front().GetTensorData<float>();
     Py_END_ALLOW_THREADS;
 
     PyObject *ans = NULL, *data = NULL;
     int num_of_silence_samples = 0;
-    if (current_sentence_delay > 0) num_of_silence_samples = current_sample_rate * current_sentence_delay;
+    if (current_sentence_delay > 0) num_of_silence_samples = (int)(current_sample_rate * current_sentence_delay);
     if (as_16bit_samples) {
         data = PyBytes_FromStringAndSize(NULL, sizeof(int16_t) * (num_samples + num_of_silence_samples));
         if (data) {
             Py_BEGIN_ALLOW_THREADS;
             int16_t *x = (int16_t*)PyBytes_AS_STRING(data);
             for (int i = 0; i < num_samples; i++) {
-                x[i] = std::max(-1.f, std::min(audio_tensor_data[i], 1.f)) * std::numeric_limits<int16_t>::max();
+                x[i] = (int16_t)(std::max(-1.f, std::min(audio_tensor_data[i], 1.f)) * std::numeric_limits<int16_t>::max());
             }
             memset(x + num_samples, 0, num_of_silence_samples * sizeof(int16_t));
             Py_END_ALLOW_THREADS;
diff --git a/src/calibre/utils/tts/piper.py b/src/calibre/utils/tts/piper.py
index f653a3166d..60acdc6829 100644
--- a/src/calibre/utils/tts/piper.py
+++ b/src/calibre/utils/tts/piper.py
@@ -12,6 +12,7 @@ from threading import Lock, Thread
 from typing import Any, NamedTuple
 
 import calibre_extensions.piper as piper
+from calibre.constants import iswindows
 
 DEFAULT_LENGTH_SCALE = 1.0
 DEFAULT_NOISE_SCALE = 0.667
@@ -61,6 +62,8 @@ def load_voice_config(path: str) -> VoiceConfig:
 def espeak_data_dir() -> str:
     if not getattr(sys, 'frozen', False):
         return ''
+    if iswindows:
+        return os.path.join(os.path.dirname(sys.executables_location), 'share', 'espeak-ng-data')
     return os.path.join(sys.executables_location, 'share', 'espeak-ng-data')