diff --git a/src/calibre/gui2/tts2/config.py b/src/calibre/gui2/tts2/config.py index 9f1a70e8b6..445a1717dc 100644 --- a/src/calibre/gui2/tts2/config.py +++ b/src/calibre/gui2/tts2/config.py @@ -132,8 +132,9 @@ class Voices(QTreeWidget): current_item = None def qv(parent, voice): nonlocal current_item - ans = QTreeWidgetItem(parent, [voice.short_text]) + ans = QTreeWidgetItem(parent, [voice.short_text(engine_metadata)]) ans.setData(0, Qt.ItemDataRole.UserRole, voice) + ans.setToolTip(0, voice.tooltip(engine_metadata)) if current_voice == voice.name: current_item = ans return ans diff --git a/src/calibre/gui2/tts2/piper.py b/src/calibre/gui2/tts2/piper.py index 0ebc27d008..281125943c 100644 --- a/src/calibre/gui2/tts2/piper.py +++ b/src/calibre/gui2/tts2/piper.py @@ -19,6 +19,7 @@ from qt.core import ( QDialog, QIODevice, QIODeviceBase, + QMediaDevices, QObject, QProcess, Qt, @@ -188,6 +189,7 @@ class Piper(TTSBackend): self._process: QProcess | None = None self._audio_sink: QAudioSink | None = None + self._current_voice: Voice | None = None self._utterances_being_synthesized: deque[Utterance] = deque() self._utterance_counter = count(start=1) self._utterances_being_spoken = UtteranceAudioQueue() @@ -219,7 +221,10 @@ class Piper(TTSBackend): else: self._set_error(f'Failed to start piper process: {cmdline}') return - self._utterances_being_synthesized.extend(split_into_utterances(text, self._utterance_counter)) # TODO: Use voice language + lang = 'en' + if self._current_voice and self._current_voice.language_code: + lang = self._current_voice.language_code + self._utterances_being_synthesized.extend(split_into_utterances(text, self._utterance_counter, lang)) self._write_current_utterance() def pause(self) -> None: @@ -284,9 +289,11 @@ class Piper(TTSBackend): raise Exception(str(e)) from e if not model_path: raise Exception('Could not download voice data') - with open(config_path) as f: - voice_metadata = json.load(f) - audio_rate = voice_metadata['audio']['sample_rate'] + if 'metadata' not in voice.engine_data: + with open(config_path) as f: + voice.engine_data['metadata'] = json.load(f) + audio_rate = voice.engine_data['metadata']['audio']['sample_rate'] + self._current_voice = voice self._utterances_being_spoken.clear() self._utterances_being_synthesized.clear() self._errors_from_piper.clear() @@ -309,7 +316,18 @@ class Piper(TTSBackend): fmt.setSampleFormat(QAudioFormat.SampleFormat.Int16) fmt.setSampleRate(audio_rate) fmt.setChannelConfig(QAudioFormat.ChannelConfig.ChannelConfigMono) - self._audio_sink = QAudioSink(fmt, self) # TODO: Make audio device configurable + dev = None + if s.audio_device_id: + for q in QMediaDevices.audioOutputs(): + if bytes(q.id()) == s.audio_device_id.id: + dev = q + break + if dev: + self._audio_sink = QAudioSink(dev, fmt, self) + else: + self._audio_sink = QAudioSink(fmt, self) + if s.volume is not None: + self._audio_sink.setVolume(s.volume) self._audio_sink.stateChanged.connect(self._utterances_being_spoken.audio_state_changed) self._process.start() self._audio_sink.start(self._utterances_being_spoken) diff --git a/src/calibre/gui2/tts2/types.py b/src/calibre/gui2/tts2/types.py index 6743bcd934..0c1925a64e 100644 --- a/src/calibre/gui2/tts2/types.py +++ b/src/calibre/gui2/tts2/types.py @@ -62,6 +62,16 @@ class Quality(Enum): def from_piper_quality(self, x: str) -> 'Quality': return {'x_low': Quality.ExtraLow, 'low': Quality.Low, 'medium': Quality.Medium, 'high': Quality.High}[x] + @property + def localized_name(self) -> str: + if self is Quality.Medium: + return _('Medium quality') + if self is Quality.Low: + return _('Low quality') + if self is Quality.ExtraLow: + return _('Extra low quality') + return _('High quality') + class Voice(NamedTuple): name: str = '' @@ -77,11 +87,30 @@ class Voice(NamedTuple): engine_data: dict[str, str] | None = None @property - def short_text(self) -> str: + def basic_name(self) -> str: return self.human_name or self.name or _('System default voice') + def short_text(self, m: EngineMetadata) -> str: + ans = self.basic_name + if self.country_code: + territory = QLocale.codeToTerritory(self.country_code) + ans += f' ({QLocale.territoryToString(territory)})' + if m.voices_have_quality_metadata: + ans += f' [{self.quality.localized_name}]' + return ans + + def tooltip(self, m: EngineMetadata) -> str: + ans = [] + if self.notes: + ans.append(self.notes) + if self.age is not QVoice.Age.Other: + ans.append(_('Age: {}').format(QVoice.ageName(self.age))) + if self.gender is not QVoice.Gender.Unknown: + ans.append(_('Gender: {}').format(QVoice.genderName(self.gender))) + return '\n'.join(ans) + def sort_key(self) -> tuple[Quality, str]: - return (self.quality, self.short_text.lower()) + return (self.quality.value, self.basic_name.lower()) @@ -190,10 +219,13 @@ def available_engines() -> dict[str, EngineMetadata]: elif x == 'speechd': continue if islinux: + if piper_cmdline(): + ans['piper'] = EngineMetadata('piper', TrackingCapability.Sentence, can_change_pitch=False, voices_have_quality_metadata=True) from speechd.paths import SPD_SPAWN_CMD cmd = os.getenv("SPEECHD_CMD", SPD_SPAWN_CMD) if cmd and os.access(cmd, os.X_OK) and os.path.isfile(cmd): ans['speechd'] = EngineMetadata('speechd', TrackingCapability.WordByWord, allows_choosing_audio_device=False, has_multiple_output_modules=True) + return ans @@ -202,6 +234,8 @@ def default_engine_name() -> str: return 'sapi' if tweaks.get('prefer_winsapi') else 'winrt' if ismacos: return 'darwin' + if 'piper' in available_engines(): + return 'piper' if 'speechd' in available_engines(): return 'speechd' return 'flite' @@ -256,7 +290,12 @@ def create_tts_backend(force_engine: str | None = None) -> TTSBackend: engine_name = engine_name or default_engine_name() if engine_name not in available_engines(): engine_name = default_engine_name() - if engine_name == 'speechd': + if engine_name == 'piper': + if engine_name not in engine_instances: + from calibre.gui2.tts2.piper import Piper + engine_instances[engine_name] = Piper(engine_name, QApplication.instance()) + ans = engine_instances[engine_name] + elif engine_name == 'speechd': if engine_name not in engine_instances: from calibre.gui2.tts2.speechd import SpeechdTTSBackend engine_instances[engine_name] = SpeechdTTSBackend(engine_name, QApplication.instance())