Code to load available Piper voices

This commit is contained in:
Kovid Goyal 2024-09-02 11:59:00 +05:30
parent 4b565c124b
commit ec768af6db
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
3 changed files with 52 additions and 30 deletions

View File

@ -63,7 +63,7 @@ class PiperVoices(ReVendor):
if not lang_map: if not lang_map:
raise SystemExit(f'Failed to read any piper voices from: {url}') raise SystemExit(f'Failed to read any piper voices from: {url}')
with open(self.output_file_path, 'w') as f: with open(self.output_file_path, 'w') as f:
json.dump(lang_map, f, indent=2, sort_keys=False) json.dump({'version': 1, 'lang_map': lang_map}, f, indent=2, sort_keys=False)
def clean(self): def clean(self):
with suppress(FileNotFoundError): with suppress(FileNotFoundError):

View File

@ -8,26 +8,16 @@ import re
import sys import sys
from collections import deque from collections import deque
from dataclasses import dataclass from dataclasses import dataclass
from functools import lru_cache
from itertools import count from itertools import count
from time import monotonic from time import monotonic
from qt.core import QApplication, QAudio, QAudioFormat, QAudioSink, QByteArray, QIODevice, QIODeviceBase, QObject, QProcess, Qt, QTextToSpeech, pyqtSignal, sip from qt.core import QApplication, QAudio, QAudioFormat, QAudioSink, QByteArray, QIODevice, QIODeviceBase, QObject, QProcess, Qt, QTextToSpeech, pyqtSignal, sip
from calibre.constants import bundled_binaries_dir, get_windows_username, is_debugging, iswindows from calibre.constants import is_debugging
from calibre.gui2.tts2.types import TTSBackend from calibre.gui2.tts2.types import Quality, TTSBackend, Voice, piper_cmdline
from calibre.ptempfile import base_dir
from calibre.spell.break_iterator import sentence_positions, split_into_words_and_positions from calibre.spell.break_iterator import sentence_positions, split_into_words_and_positions
from calibre.utils.localization import canonicalize_lang
from calibre.utils.resources import get_path as P
@lru_cache(2)
def sentinel_path() -> str:
fname = f'piper-sentinel-{os.getpid()}'
if iswindows:
fname += f'-{get_windows_username()}'
else:
fname += f'-{os.geteuid()}'
return os.path.join(base_dir(), fname)
def debug(*a, **kw): def debug(*a, **kw):
@ -38,19 +28,6 @@ def debug(*a, **kw):
print(f'[{monotonic() - debug.first:.2f}]', *a, **kw) print(f'[{monotonic() - debug.first:.2f}]', *a, **kw)
@lru_cache(2)
def piper_cmdline() -> tuple[str, ...]:
ext = '.exe' if iswindows else ''
if bbd := bundled_binaries_dir():
# TODO: Add path to espeak-ng-data with --
return (os.path.join(bbd, 'piper' + ext),)
import shutil
exe = shutil.which('piper-tts')
if exe:
return (exe,)
return ()
@dataclass @dataclass
class Utterance: class Utterance:
id: int id: int
@ -201,6 +178,7 @@ class Piper(TTSBackend):
self._utterances_being_spoken.saying.connect(self.saying) self._utterances_being_spoken.saying.connect(self.saying)
self._utterances_being_spoken.update_status.connect(self._update_status, type=Qt.ConnectionType.QueuedConnection) self._utterances_being_spoken.update_status.connect(self._update_status, type=Qt.ConnectionType.QueuedConnection)
self._state = QTextToSpeech.State.Ready self._state = QTextToSpeech.State.Ready
self._voices = None
self._last_error = '' self._last_error = ''
self._errors_from_piper: list[str] = [] self._errors_from_piper: list[str] = []
self._pending_stderr_data = b'' self._pending_stderr_data = b''
@ -209,6 +187,29 @@ class Piper(TTSBackend):
self._synthesis_done.connect(self._utterance_synthesized, type=Qt.ConnectionType.QueuedConnection) self._synthesis_done.connect(self._utterance_synthesized, type=Qt.ConnectionType.QueuedConnection)
atexit.register(self.shutdown) atexit.register(self.shutdown)
@property
def available_voices(self) -> dict[str, tuple[Voice, ...]]:
if self._voices is None:
d = json.loads(P('piper-voices.json', data=True))
ans = []
for bcp_code, voice_map in d['lang_map'].items():
lang, sep, country = bcp_code.partition('_')
lang = canonicalize_lang(lang) or lang
for voice_name, qual_map in voice_map.items():
best_qual = voice = None
for qual, e in qual_map.items():
q = Quality.from_piper_quality(qual)
if best_qual is None or q.value < best_qual.value:
best_qual = q
voice = Voice(voice_name, lang, country, quality=q, engine_data={
'model_url': e['model'], 'config_url': e['config'],
'model_filename': f'{bcp_code}-{voice_name}-{qual}.onnx',
})
if voice:
ans.append(voice)
self._voices = tuple(ans)
return {'': self._voices}
def say(self, text: str) -> None: def say(self, text: str) -> None:
if self._last_error: if self._last_error:
return return

View File

@ -9,7 +9,7 @@ from typing import Literal, NamedTuple
from qt.core import QApplication, QLocale, QObject, QTextToSpeech, QVoice, pyqtSignal from qt.core import QApplication, QLocale, QObject, QTextToSpeech, QVoice, pyqtSignal
from calibre.constants import islinux, ismacos, iswindows from calibre.constants import bundled_binaries_dir, islinux, ismacos, iswindows
from calibre.utils.config import JSONConfig from calibre.utils.config import JSONConfig
from calibre.utils.config_base import tweaks from calibre.utils.config_base import tweaks
from calibre.utils.localization import canonicalize_lang from calibre.utils.localization import canonicalize_lang
@ -21,6 +21,20 @@ def load_config() -> JSONConfig:
return JSONConfig(CONFIG_NAME) return JSONConfig(CONFIG_NAME)
@lru_cache(2)
def piper_cmdline() -> tuple[str, ...]:
ext = '.exe' if iswindows else ''
if bbd := bundled_binaries_dir():
# TODO: Add path to espeak-ng-data with --
return (os.path.join(bbd, 'piper' + ext),)
import shutil
exe = shutil.which('piper-tts')
if exe:
return (exe,)
return ()
class TrackingCapability(Enum): class TrackingCapability(Enum):
NoTracking: int = auto() NoTracking: int = auto()
WordByWord: int = auto() WordByWord: int = auto()
@ -44,6 +58,10 @@ class Quality(Enum):
Low: int = auto() Low: int = auto()
ExtraLow: int = auto() ExtraLow: int = auto()
@classmethod
def from_piper_quality(self, x: str) -> 'Quality':
return {'x_low': Quality.ExtraLow, 'low': Quality.Low, 'medium': Quality.Medium, 'high': Quality.High}[x]
class Voice(NamedTuple): class Voice(NamedTuple):
name: str = '' name: str = ''
@ -51,11 +69,13 @@ class Voice(NamedTuple):
country_code: str = '' country_code: str = ''
human_name: str = '' human_name: str = ''
notes: str = '' # variant from speechd voices, or notes from piper voices notes: str = '' # variant from speechd voices
gender: QVoice.Gender = QVoice.Gender.Unknown gender: QVoice.Gender = QVoice.Gender.Unknown
age: QVoice.Age = QVoice.Age.Other age: QVoice.Age = QVoice.Age.Other
quality: Quality = Quality.High quality: Quality = Quality.High
engine_data: dict[str, str] | None = None
@property @property
def short_text(self) -> str: def short_text(self) -> str:
return self.human_name or self.name or _('System default voice') return self.human_name or self.name or _('System default voice')
@ -64,6 +84,7 @@ class Voice(NamedTuple):
return (self.quality, self.short_text.lower()) return (self.quality, self.short_text.lower())
def qvoice_to_voice(v: QVoice) -> QVoice: def qvoice_to_voice(v: QVoice) -> QVoice:
lang = canonicalize_lang(QLocale.languageToCode(v.language())) or 'und' lang = canonicalize_lang(QLocale.languageToCode(v.language())) or 'und'
country = QLocale.territoryToString(v.locale().territory()) country = QLocale.territoryToString(v.locale().territory())