mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-15 11:34:29 -04:00
Code to load available Piper voices
This commit is contained in:
parent
4b565c124b
commit
ec768af6db
@ -63,7 +63,7 @@ class PiperVoices(ReVendor):
|
|||||||
if not lang_map:
|
if not lang_map:
|
||||||
raise SystemExit(f'Failed to read any piper voices from: {url}')
|
raise SystemExit(f'Failed to read any piper voices from: {url}')
|
||||||
with open(self.output_file_path, 'w') as f:
|
with open(self.output_file_path, 'w') as f:
|
||||||
json.dump(lang_map, f, indent=2, sort_keys=False)
|
json.dump({'version': 1, 'lang_map': lang_map}, f, indent=2, sort_keys=False)
|
||||||
|
|
||||||
def clean(self):
|
def clean(self):
|
||||||
with suppress(FileNotFoundError):
|
with suppress(FileNotFoundError):
|
||||||
|
@ -8,26 +8,16 @@ import re
|
|||||||
import sys
|
import sys
|
||||||
from collections import deque
|
from collections import deque
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from functools import lru_cache
|
|
||||||
from itertools import count
|
from itertools import count
|
||||||
from time import monotonic
|
from time import monotonic
|
||||||
|
|
||||||
from qt.core import QApplication, QAudio, QAudioFormat, QAudioSink, QByteArray, QIODevice, QIODeviceBase, QObject, QProcess, Qt, QTextToSpeech, pyqtSignal, sip
|
from qt.core import QApplication, QAudio, QAudioFormat, QAudioSink, QByteArray, QIODevice, QIODeviceBase, QObject, QProcess, Qt, QTextToSpeech, pyqtSignal, sip
|
||||||
|
|
||||||
from calibre.constants import bundled_binaries_dir, get_windows_username, is_debugging, iswindows
|
from calibre.constants import is_debugging
|
||||||
from calibre.gui2.tts2.types import TTSBackend
|
from calibre.gui2.tts2.types import Quality, TTSBackend, Voice, piper_cmdline
|
||||||
from calibre.ptempfile import base_dir
|
|
||||||
from calibre.spell.break_iterator import sentence_positions, split_into_words_and_positions
|
from calibre.spell.break_iterator import sentence_positions, split_into_words_and_positions
|
||||||
|
from calibre.utils.localization import canonicalize_lang
|
||||||
|
from calibre.utils.resources import get_path as P
|
||||||
@lru_cache(2)
|
|
||||||
def sentinel_path() -> str:
|
|
||||||
fname = f'piper-sentinel-{os.getpid()}'
|
|
||||||
if iswindows:
|
|
||||||
fname += f'-{get_windows_username()}'
|
|
||||||
else:
|
|
||||||
fname += f'-{os.geteuid()}'
|
|
||||||
return os.path.join(base_dir(), fname)
|
|
||||||
|
|
||||||
|
|
||||||
def debug(*a, **kw):
|
def debug(*a, **kw):
|
||||||
@ -38,19 +28,6 @@ def debug(*a, **kw):
|
|||||||
print(f'[{monotonic() - debug.first:.2f}]', *a, **kw)
|
print(f'[{monotonic() - debug.first:.2f}]', *a, **kw)
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(2)
|
|
||||||
def piper_cmdline() -> tuple[str, ...]:
|
|
||||||
ext = '.exe' if iswindows else ''
|
|
||||||
if bbd := bundled_binaries_dir():
|
|
||||||
# TODO: Add path to espeak-ng-data with --
|
|
||||||
return (os.path.join(bbd, 'piper' + ext),)
|
|
||||||
import shutil
|
|
||||||
exe = shutil.which('piper-tts')
|
|
||||||
if exe:
|
|
||||||
return (exe,)
|
|
||||||
return ()
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Utterance:
|
class Utterance:
|
||||||
id: int
|
id: int
|
||||||
@ -201,6 +178,7 @@ class Piper(TTSBackend):
|
|||||||
self._utterances_being_spoken.saying.connect(self.saying)
|
self._utterances_being_spoken.saying.connect(self.saying)
|
||||||
self._utterances_being_spoken.update_status.connect(self._update_status, type=Qt.ConnectionType.QueuedConnection)
|
self._utterances_being_spoken.update_status.connect(self._update_status, type=Qt.ConnectionType.QueuedConnection)
|
||||||
self._state = QTextToSpeech.State.Ready
|
self._state = QTextToSpeech.State.Ready
|
||||||
|
self._voices = None
|
||||||
self._last_error = ''
|
self._last_error = ''
|
||||||
self._errors_from_piper: list[str] = []
|
self._errors_from_piper: list[str] = []
|
||||||
self._pending_stderr_data = b''
|
self._pending_stderr_data = b''
|
||||||
@ -209,6 +187,29 @@ class Piper(TTSBackend):
|
|||||||
self._synthesis_done.connect(self._utterance_synthesized, type=Qt.ConnectionType.QueuedConnection)
|
self._synthesis_done.connect(self._utterance_synthesized, type=Qt.ConnectionType.QueuedConnection)
|
||||||
atexit.register(self.shutdown)
|
atexit.register(self.shutdown)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def available_voices(self) -> dict[str, tuple[Voice, ...]]:
|
||||||
|
if self._voices is None:
|
||||||
|
d = json.loads(P('piper-voices.json', data=True))
|
||||||
|
ans = []
|
||||||
|
for bcp_code, voice_map in d['lang_map'].items():
|
||||||
|
lang, sep, country = bcp_code.partition('_')
|
||||||
|
lang = canonicalize_lang(lang) or lang
|
||||||
|
for voice_name, qual_map in voice_map.items():
|
||||||
|
best_qual = voice = None
|
||||||
|
for qual, e in qual_map.items():
|
||||||
|
q = Quality.from_piper_quality(qual)
|
||||||
|
if best_qual is None or q.value < best_qual.value:
|
||||||
|
best_qual = q
|
||||||
|
voice = Voice(voice_name, lang, country, quality=q, engine_data={
|
||||||
|
'model_url': e['model'], 'config_url': e['config'],
|
||||||
|
'model_filename': f'{bcp_code}-{voice_name}-{qual}.onnx',
|
||||||
|
})
|
||||||
|
if voice:
|
||||||
|
ans.append(voice)
|
||||||
|
self._voices = tuple(ans)
|
||||||
|
return {'': self._voices}
|
||||||
|
|
||||||
def say(self, text: str) -> None:
|
def say(self, text: str) -> None:
|
||||||
if self._last_error:
|
if self._last_error:
|
||||||
return
|
return
|
||||||
|
@ -9,7 +9,7 @@ from typing import Literal, NamedTuple
|
|||||||
|
|
||||||
from qt.core import QApplication, QLocale, QObject, QTextToSpeech, QVoice, pyqtSignal
|
from qt.core import QApplication, QLocale, QObject, QTextToSpeech, QVoice, pyqtSignal
|
||||||
|
|
||||||
from calibre.constants import islinux, ismacos, iswindows
|
from calibre.constants import bundled_binaries_dir, islinux, ismacos, iswindows
|
||||||
from calibre.utils.config import JSONConfig
|
from calibre.utils.config import JSONConfig
|
||||||
from calibre.utils.config_base import tweaks
|
from calibre.utils.config_base import tweaks
|
||||||
from calibre.utils.localization import canonicalize_lang
|
from calibre.utils.localization import canonicalize_lang
|
||||||
@ -21,6 +21,20 @@ def load_config() -> JSONConfig:
|
|||||||
return JSONConfig(CONFIG_NAME)
|
return JSONConfig(CONFIG_NAME)
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(2)
|
||||||
|
def piper_cmdline() -> tuple[str, ...]:
|
||||||
|
ext = '.exe' if iswindows else ''
|
||||||
|
if bbd := bundled_binaries_dir():
|
||||||
|
# TODO: Add path to espeak-ng-data with --
|
||||||
|
return (os.path.join(bbd, 'piper' + ext),)
|
||||||
|
import shutil
|
||||||
|
exe = shutil.which('piper-tts')
|
||||||
|
if exe:
|
||||||
|
return (exe,)
|
||||||
|
return ()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class TrackingCapability(Enum):
|
class TrackingCapability(Enum):
|
||||||
NoTracking: int = auto()
|
NoTracking: int = auto()
|
||||||
WordByWord: int = auto()
|
WordByWord: int = auto()
|
||||||
@ -44,6 +58,10 @@ class Quality(Enum):
|
|||||||
Low: int = auto()
|
Low: int = auto()
|
||||||
ExtraLow: int = auto()
|
ExtraLow: int = auto()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_piper_quality(self, x: str) -> 'Quality':
|
||||||
|
return {'x_low': Quality.ExtraLow, 'low': Quality.Low, 'medium': Quality.Medium, 'high': Quality.High}[x]
|
||||||
|
|
||||||
|
|
||||||
class Voice(NamedTuple):
|
class Voice(NamedTuple):
|
||||||
name: str = ''
|
name: str = ''
|
||||||
@ -51,11 +69,13 @@ class Voice(NamedTuple):
|
|||||||
|
|
||||||
country_code: str = ''
|
country_code: str = ''
|
||||||
human_name: str = ''
|
human_name: str = ''
|
||||||
notes: str = '' # variant from speechd voices, or notes from piper voices
|
notes: str = '' # variant from speechd voices
|
||||||
gender: QVoice.Gender = QVoice.Gender.Unknown
|
gender: QVoice.Gender = QVoice.Gender.Unknown
|
||||||
age: QVoice.Age = QVoice.Age.Other
|
age: QVoice.Age = QVoice.Age.Other
|
||||||
quality: Quality = Quality.High
|
quality: Quality = Quality.High
|
||||||
|
|
||||||
|
engine_data: dict[str, str] | None = None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def short_text(self) -> str:
|
def short_text(self) -> str:
|
||||||
return self.human_name or self.name or _('System default voice')
|
return self.human_name or self.name or _('System default voice')
|
||||||
@ -64,6 +84,7 @@ class Voice(NamedTuple):
|
|||||||
return (self.quality, self.short_text.lower())
|
return (self.quality, self.short_text.lower())
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def qvoice_to_voice(v: QVoice) -> QVoice:
|
def qvoice_to_voice(v: QVoice) -> QVoice:
|
||||||
lang = canonicalize_lang(QLocale.languageToCode(v.language())) or 'und'
|
lang = canonicalize_lang(QLocale.languageToCode(v.language())) or 'und'
|
||||||
country = QLocale.territoryToString(v.locale().territory())
|
country = QLocale.territoryToString(v.locale().territory())
|
||||||
|
Loading…
x
Reference in New Issue
Block a user