diff --git a/setup/piper.py b/setup/piper.py index 9158a0bba8..873b540c91 100644 --- a/setup/piper.py +++ b/setup/piper.py @@ -63,7 +63,7 @@ class PiperVoices(ReVendor): if not lang_map: raise SystemExit(f'Failed to read any piper voices from: {url}') with open(self.output_file_path, 'w') as f: - json.dump(lang_map, f, indent=2, sort_keys=False) + json.dump({'version': 1, 'lang_map': lang_map}, f, indent=2, sort_keys=False) def clean(self): with suppress(FileNotFoundError): diff --git a/src/calibre/gui2/tts2/piper.py b/src/calibre/gui2/tts2/piper.py index bb76c3a0db..f32e1ba69c 100644 --- a/src/calibre/gui2/tts2/piper.py +++ b/src/calibre/gui2/tts2/piper.py @@ -8,26 +8,16 @@ import re import sys from collections import deque from dataclasses import dataclass -from functools import lru_cache from itertools import count from time import monotonic from qt.core import QApplication, QAudio, QAudioFormat, QAudioSink, QByteArray, QIODevice, QIODeviceBase, QObject, QProcess, Qt, QTextToSpeech, pyqtSignal, sip -from calibre.constants import bundled_binaries_dir, get_windows_username, is_debugging, iswindows -from calibre.gui2.tts2.types import TTSBackend -from calibre.ptempfile import base_dir +from calibre.constants import is_debugging +from calibre.gui2.tts2.types import Quality, TTSBackend, Voice, piper_cmdline from calibre.spell.break_iterator import sentence_positions, split_into_words_and_positions - - -@lru_cache(2) -def sentinel_path() -> str: - fname = f'piper-sentinel-{os.getpid()}' - if iswindows: - fname += f'-{get_windows_username()}' - else: - fname += f'-{os.geteuid()}' - return os.path.join(base_dir(), fname) +from calibre.utils.localization import canonicalize_lang +from calibre.utils.resources import get_path as P def debug(*a, **kw): @@ -38,19 +28,6 @@ def debug(*a, **kw): print(f'[{monotonic() - debug.first:.2f}]', *a, **kw) -@lru_cache(2) -def piper_cmdline() -> tuple[str, ...]: - ext = '.exe' if iswindows else '' - if bbd := bundled_binaries_dir(): - # TODO: Add path to espeak-ng-data with -- - return (os.path.join(bbd, 'piper' + ext),) - import shutil - exe = shutil.which('piper-tts') - if exe: - return (exe,) - return () - - @dataclass class Utterance: id: int @@ -201,6 +178,7 @@ class Piper(TTSBackend): self._utterances_being_spoken.saying.connect(self.saying) self._utterances_being_spoken.update_status.connect(self._update_status, type=Qt.ConnectionType.QueuedConnection) self._state = QTextToSpeech.State.Ready + self._voices = None self._last_error = '' self._errors_from_piper: list[str] = [] self._pending_stderr_data = b'' @@ -209,6 +187,29 @@ class Piper(TTSBackend): self._synthesis_done.connect(self._utterance_synthesized, type=Qt.ConnectionType.QueuedConnection) atexit.register(self.shutdown) + @property + def available_voices(self) -> dict[str, tuple[Voice, ...]]: + if self._voices is None: + d = json.loads(P('piper-voices.json', data=True)) + ans = [] + for bcp_code, voice_map in d['lang_map'].items(): + lang, sep, country = bcp_code.partition('_') + lang = canonicalize_lang(lang) or lang + for voice_name, qual_map in voice_map.items(): + best_qual = voice = None + for qual, e in qual_map.items(): + q = Quality.from_piper_quality(qual) + if best_qual is None or q.value < best_qual.value: + best_qual = q + voice = Voice(voice_name, lang, country, quality=q, engine_data={ + 'model_url': e['model'], 'config_url': e['config'], + 'model_filename': f'{bcp_code}-{voice_name}-{qual}.onnx', + }) + if voice: + ans.append(voice) + self._voices = tuple(ans) + return {'': self._voices} + def say(self, text: str) -> None: if self._last_error: return diff --git a/src/calibre/gui2/tts2/types.py b/src/calibre/gui2/tts2/types.py index 1a83f0972a..181ac41b15 100644 --- a/src/calibre/gui2/tts2/types.py +++ b/src/calibre/gui2/tts2/types.py @@ -9,7 +9,7 @@ from typing import Literal, NamedTuple from qt.core import QApplication, QLocale, QObject, QTextToSpeech, QVoice, pyqtSignal -from calibre.constants import islinux, ismacos, iswindows +from calibre.constants import bundled_binaries_dir, islinux, ismacos, iswindows from calibre.utils.config import JSONConfig from calibre.utils.config_base import tweaks from calibre.utils.localization import canonicalize_lang @@ -21,6 +21,20 @@ def load_config() -> JSONConfig: return JSONConfig(CONFIG_NAME) +@lru_cache(2) +def piper_cmdline() -> tuple[str, ...]: + ext = '.exe' if iswindows else '' + if bbd := bundled_binaries_dir(): + # TODO: Add path to espeak-ng-data with -- + return (os.path.join(bbd, 'piper' + ext),) + import shutil + exe = shutil.which('piper-tts') + if exe: + return (exe,) + return () + + + class TrackingCapability(Enum): NoTracking: int = auto() WordByWord: int = auto() @@ -44,6 +58,10 @@ class Quality(Enum): Low: int = auto() ExtraLow: int = auto() + @classmethod + def from_piper_quality(self, x: str) -> 'Quality': + return {'x_low': Quality.ExtraLow, 'low': Quality.Low, 'medium': Quality.Medium, 'high': Quality.High}[x] + class Voice(NamedTuple): name: str = '' @@ -51,11 +69,13 @@ class Voice(NamedTuple): country_code: str = '' human_name: str = '' - notes: str = '' # variant from speechd voices, or notes from piper voices + notes: str = '' # variant from speechd voices gender: QVoice.Gender = QVoice.Gender.Unknown age: QVoice.Age = QVoice.Age.Other quality: Quality = Quality.High + engine_data: dict[str, str] | None = None + @property def short_text(self) -> str: return self.human_name or self.name or _('System default voice') @@ -64,6 +84,7 @@ class Voice(NamedTuple): return (self.quality, self.short_text.lower()) + def qvoice_to_voice(v: QVoice) -> QVoice: lang = canonicalize_lang(QLocale.languageToCode(v.language())) or 'und' country = QLocale.territoryToString(v.locale().territory())