Make normalization configurable

Still need to add an actual config UI for it. Will see if its actually
needed.
This commit is contained in:
Kovid Goyal 2025-07-30 17:27:02 +05:30
parent 6f9820e4d9
commit 5a03ee2e85
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 3 additions and 0 deletions

View File

@ -57,6 +57,7 @@ static float current_length_scale = 1;
static float current_noise_scale = 1; static float current_noise_scale = 1;
static float current_noise_w = 1; static float current_noise_w = 1;
static float current_sentence_delay = 0; static float current_sentence_delay = 0;
static bool current_normalize_volume = true;
std::unique_ptr<Ort::Session> session; std::unique_ptr<Ort::Session> session;
std::queue<std::vector<PhonemeId>> phoneme_id_queue; std::queue<std::vector<PhonemeId>> phoneme_id_queue;
std::vector<float> chunk_samples; std::vector<float> chunk_samples;
@ -231,6 +232,7 @@ set_voice(PyObject *self, PyObject *args) {
G(noise_scale, current_noise_scale, (float)PyFloat_AsDouble); G(noise_scale, current_noise_scale, (float)PyFloat_AsDouble);
G(noise_w, current_noise_w, (float)PyFloat_AsDouble); G(noise_w, current_noise_w, (float)PyFloat_AsDouble);
G(sentence_delay, current_sentence_delay, (float)PyFloat_AsDouble); G(sentence_delay, current_sentence_delay, (float)PyFloat_AsDouble);
G(normalize_volume, current_normalize_volume, PyObject_IsTrue);
#undef G #undef G
PyObject *map = PyObject_GetAttrString(cfg, "phoneme_id_map"); PyObject *map = PyObject_GetAttrString(cfg, "phoneme_id_map");

View File

@ -28,6 +28,7 @@ class VoiceConfig(NamedTuple):
noise_w: float noise_w: float
num_speakers: int num_speakers: int
sentence_delay: float = 0 sentence_delay: float = 0
normalize_volume: bool = False
def translate_voice_config(x: Any) -> VoiceConfig: def translate_voice_config(x: Any) -> VoiceConfig: