Start work on using different execution providers for piper inferencing

2025-07-31 14:33:54 -04:00 · 2025-07-30 10:53:54 +05:30 · 2025-07-30 10:53:54 +05:30 · 504d1a4f1d
commit 504d1a4f1d
parent 650aeee96d
1 changed files with 68 additions and 2 deletions
--- a/src/calibre/utils/tts/piper.cpp
+++ b/src/calibre/utils/tts/piper.cpp
@ -61,6 +61,49 @@ static struct {
    PyObject *func, *args;
 } normalize_data = {0};

+static const std::vector<std::string>& PRIORITY_ORDER = {
+#ifdef _WIN32
+    "DML", "DmlExecutionProvider", "DirectMLExecutionProvider",
+#endif
+#ifdef __APPLE__
+    "CoreML", "CoreMLExecutionProvider",
+#endif
+    "ROCMExecutionProvider",  // AMD GPU
+    "TensorRTExecutionProvider", "CUDAExecutionProvider", // NVIDIA GPU
+    "OpenVINO", "OpenVINOExecutionProvider", // Intel GPU and CPU
+    // The various CPU providers
+    "DnnlExecutionProvider",  // CPU with AVX 512
+    "CPUExecutionProvider",  // the default, always available provider
+};
+
+static void
+sort_providers_by_priority(std::vector<std::string>& providers, const std::vector<std::string>& priority_order) {
+    // Build a priority map: provider name -> order index
+    std::unordered_map<std::string, size_t> priority_map;
+    for (size_t i = 0; i < priority_order.size(); ++i) {
+        priority_map[priority_order[i]] = i;
+    }
+
+    // Stable sort so original order is preserved for equal priority
+    std::stable_sort(providers.begin(), providers.end(),
+        [&priority_map, &priority_order](const std::string& a, const std::string& b) {
+            auto it_a = priority_map.find(a);
+            auto it_b = priority_map.find(b);
+            size_t index_a = it_a != priority_map.end() ? it_a->second : priority_order.size();
+            size_t index_b = it_b != priority_map.end() ? it_b->second : priority_order.size();
+            return index_a < index_b;
+        });
+}
+
+static std::vector<std::string> available_providers;
+
+static void
+set_available_providers() {
+    if (!available_providers.empty()) return;
+    available_providers = Ort::GetAvailableProviders();
+    sort_providers_by_priority(available_providers, PRIORITY_ORDER);
+}
+
 static PyObject*
 initialize(PyObject *self, PyObject *args) {
    const char *path = "";
@ -80,6 +123,7 @@ initialize(PyObject *self, PyObject *args) {
        if (!normalize_data.func) return NULL;
        normalize_data.args = Py_BuildValue("(ss)", "NFD", "");
        if (!normalize_data.args) return NULL;
+        set_available_providers();
    }
    Py_RETURN_NONE;
 }
@ -184,7 +228,27 @@ set_voice(PyObject *self, PyObject *args) {

    // Load onnx model
    Py_BEGIN_ALLOW_THREADS;
-    Ort::SessionOptions opts;
+    static Ort::SessionOptions opts;
+
+    // for (const auto& p : available_providers) {
+    //     std::unordered_map<std::string, std::string> provider_options;
+    //     try {
+    //         opts.AppendExecutionProvider(p, provider_options);
+    //     } catch (const Ort::Exception& e) {
+    //         fprintf(stderr, "Failed to append execution provider: '%s' with error: %s\n", p.c_str(), e.what());
+    //     }
+    // }
+    // OrtDnnlProviderOptions* dnnl_options_ptr = nullptr;
+    // OrtStatus* status = Ort::GetApi().CreateDnnlProviderOptions(&dnnl_options_ptr);
+    // if (status == nullptr) {
+    //     printf("11111111111111111 %d\n", __LINE__);
+    // try {
+    //     opts.AppendExecutionProvider_Dnnl(*dnnl_options_ptr);
+    //     printf("11111111111111111 %d\n", __LINE__);
+    // } catch (const Ort::Exception& e) {
+    //     fprintf(stderr, "Failed to append execution provider with error: %s\n", e.what());
+    // }}
+
    opts.DisableCpuMemArena();
    opts.DisableMemPattern();
    opts.DisableProfiling();
@ -200,6 +264,7 @@ set_voice(PyObject *self, PyObject *args) {
 #endif
    Py_END_ALLOW_THREADS;

+
    Py_RETURN_NONE;
 }

@ -338,8 +403,9 @@ next(PyObject *self, PyObject *args) {
    std::array<const char *, 1> output_names = {"output"};

    // Infer
+    Ort::RunOptions ro;
    output_tensors = session->Run(
-        Ort::RunOptions{nullptr}, input_names.data(), input_tensors.data(),
+        ro, input_names.data(), input_tensors.data(),
        input_tensors.size(), output_names.data(), output_names.size());
    Py_END_ALLOW_THREADS;