mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Wire up the piper backend fully
This commit is contained in:
parent
83e877b9bc
commit
c4e4661e21
@ -132,8 +132,9 @@ class Voices(QTreeWidget):
|
||||
current_item = None
|
||||
def qv(parent, voice):
|
||||
nonlocal current_item
|
||||
ans = QTreeWidgetItem(parent, [voice.short_text])
|
||||
ans = QTreeWidgetItem(parent, [voice.short_text(engine_metadata)])
|
||||
ans.setData(0, Qt.ItemDataRole.UserRole, voice)
|
||||
ans.setToolTip(0, voice.tooltip(engine_metadata))
|
||||
if current_voice == voice.name:
|
||||
current_item = ans
|
||||
return ans
|
||||
|
@ -19,6 +19,7 @@ from qt.core import (
|
||||
QDialog,
|
||||
QIODevice,
|
||||
QIODeviceBase,
|
||||
QMediaDevices,
|
||||
QObject,
|
||||
QProcess,
|
||||
Qt,
|
||||
@ -188,6 +189,7 @@ class Piper(TTSBackend):
|
||||
self._process: QProcess | None = None
|
||||
self._audio_sink: QAudioSink | None = None
|
||||
|
||||
self._current_voice: Voice | None = None
|
||||
self._utterances_being_synthesized: deque[Utterance] = deque()
|
||||
self._utterance_counter = count(start=1)
|
||||
self._utterances_being_spoken = UtteranceAudioQueue()
|
||||
@ -219,7 +221,10 @@ class Piper(TTSBackend):
|
||||
else:
|
||||
self._set_error(f'Failed to start piper process: {cmdline}')
|
||||
return
|
||||
self._utterances_being_synthesized.extend(split_into_utterances(text, self._utterance_counter)) # TODO: Use voice language
|
||||
lang = 'en'
|
||||
if self._current_voice and self._current_voice.language_code:
|
||||
lang = self._current_voice.language_code
|
||||
self._utterances_being_synthesized.extend(split_into_utterances(text, self._utterance_counter, lang))
|
||||
self._write_current_utterance()
|
||||
|
||||
def pause(self) -> None:
|
||||
@ -284,9 +289,11 @@ class Piper(TTSBackend):
|
||||
raise Exception(str(e)) from e
|
||||
if not model_path:
|
||||
raise Exception('Could not download voice data')
|
||||
with open(config_path) as f:
|
||||
voice_metadata = json.load(f)
|
||||
audio_rate = voice_metadata['audio']['sample_rate']
|
||||
if 'metadata' not in voice.engine_data:
|
||||
with open(config_path) as f:
|
||||
voice.engine_data['metadata'] = json.load(f)
|
||||
audio_rate = voice.engine_data['metadata']['audio']['sample_rate']
|
||||
self._current_voice = voice
|
||||
self._utterances_being_spoken.clear()
|
||||
self._utterances_being_synthesized.clear()
|
||||
self._errors_from_piper.clear()
|
||||
@ -309,7 +316,18 @@ class Piper(TTSBackend):
|
||||
fmt.setSampleFormat(QAudioFormat.SampleFormat.Int16)
|
||||
fmt.setSampleRate(audio_rate)
|
||||
fmt.setChannelConfig(QAudioFormat.ChannelConfig.ChannelConfigMono)
|
||||
self._audio_sink = QAudioSink(fmt, self) # TODO: Make audio device configurable
|
||||
dev = None
|
||||
if s.audio_device_id:
|
||||
for q in QMediaDevices.audioOutputs():
|
||||
if bytes(q.id()) == s.audio_device_id.id:
|
||||
dev = q
|
||||
break
|
||||
if dev:
|
||||
self._audio_sink = QAudioSink(dev, fmt, self)
|
||||
else:
|
||||
self._audio_sink = QAudioSink(fmt, self)
|
||||
if s.volume is not None:
|
||||
self._audio_sink.setVolume(s.volume)
|
||||
self._audio_sink.stateChanged.connect(self._utterances_being_spoken.audio_state_changed)
|
||||
self._process.start()
|
||||
self._audio_sink.start(self._utterances_being_spoken)
|
||||
|
@ -62,6 +62,16 @@ class Quality(Enum):
|
||||
def from_piper_quality(self, x: str) -> 'Quality':
|
||||
return {'x_low': Quality.ExtraLow, 'low': Quality.Low, 'medium': Quality.Medium, 'high': Quality.High}[x]
|
||||
|
||||
@property
|
||||
def localized_name(self) -> str:
|
||||
if self is Quality.Medium:
|
||||
return _('Medium quality')
|
||||
if self is Quality.Low:
|
||||
return _('Low quality')
|
||||
if self is Quality.ExtraLow:
|
||||
return _('Extra low quality')
|
||||
return _('High quality')
|
||||
|
||||
|
||||
class Voice(NamedTuple):
|
||||
name: str = ''
|
||||
@ -77,11 +87,30 @@ class Voice(NamedTuple):
|
||||
engine_data: dict[str, str] | None = None
|
||||
|
||||
@property
|
||||
def short_text(self) -> str:
|
||||
def basic_name(self) -> str:
|
||||
return self.human_name or self.name or _('System default voice')
|
||||
|
||||
def short_text(self, m: EngineMetadata) -> str:
|
||||
ans = self.basic_name
|
||||
if self.country_code:
|
||||
territory = QLocale.codeToTerritory(self.country_code)
|
||||
ans += f' ({QLocale.territoryToString(territory)})'
|
||||
if m.voices_have_quality_metadata:
|
||||
ans += f' [{self.quality.localized_name}]'
|
||||
return ans
|
||||
|
||||
def tooltip(self, m: EngineMetadata) -> str:
|
||||
ans = []
|
||||
if self.notes:
|
||||
ans.append(self.notes)
|
||||
if self.age is not QVoice.Age.Other:
|
||||
ans.append(_('Age: {}').format(QVoice.ageName(self.age)))
|
||||
if self.gender is not QVoice.Gender.Unknown:
|
||||
ans.append(_('Gender: {}').format(QVoice.genderName(self.gender)))
|
||||
return '\n'.join(ans)
|
||||
|
||||
def sort_key(self) -> tuple[Quality, str]:
|
||||
return (self.quality, self.short_text.lower())
|
||||
return (self.quality.value, self.basic_name.lower())
|
||||
|
||||
|
||||
|
||||
@ -190,10 +219,13 @@ def available_engines() -> dict[str, EngineMetadata]:
|
||||
elif x == 'speechd':
|
||||
continue
|
||||
if islinux:
|
||||
if piper_cmdline():
|
||||
ans['piper'] = EngineMetadata('piper', TrackingCapability.Sentence, can_change_pitch=False, voices_have_quality_metadata=True)
|
||||
from speechd.paths import SPD_SPAWN_CMD
|
||||
cmd = os.getenv("SPEECHD_CMD", SPD_SPAWN_CMD)
|
||||
if cmd and os.access(cmd, os.X_OK) and os.path.isfile(cmd):
|
||||
ans['speechd'] = EngineMetadata('speechd', TrackingCapability.WordByWord, allows_choosing_audio_device=False, has_multiple_output_modules=True)
|
||||
|
||||
return ans
|
||||
|
||||
|
||||
@ -202,6 +234,8 @@ def default_engine_name() -> str:
|
||||
return 'sapi' if tweaks.get('prefer_winsapi') else 'winrt'
|
||||
if ismacos:
|
||||
return 'darwin'
|
||||
if 'piper' in available_engines():
|
||||
return 'piper'
|
||||
if 'speechd' in available_engines():
|
||||
return 'speechd'
|
||||
return 'flite'
|
||||
@ -256,7 +290,12 @@ def create_tts_backend(force_engine: str | None = None) -> TTSBackend:
|
||||
engine_name = engine_name or default_engine_name()
|
||||
if engine_name not in available_engines():
|
||||
engine_name = default_engine_name()
|
||||
if engine_name == 'speechd':
|
||||
if engine_name == 'piper':
|
||||
if engine_name not in engine_instances:
|
||||
from calibre.gui2.tts2.piper import Piper
|
||||
engine_instances[engine_name] = Piper(engine_name, QApplication.instance())
|
||||
ans = engine_instances[engine_name]
|
||||
elif engine_name == 'speechd':
|
||||
if engine_name not in engine_instances:
|
||||
from calibre.gui2.tts2.speechd import SpeechdTTSBackend
|
||||
engine_instances[engine_name] = SpeechdTTSBackend(engine_name, QApplication.instance())
|
||||
|
Loading…
x
Reference in New Issue
Block a user