diff --git a/src/calibre/gui2/tts2/config.py b/src/calibre/gui2/tts2/config.py index 445a1717dc..c058f8b800 100644 --- a/src/calibre/gui2/tts2/config.py +++ b/src/calibre/gui2/tts2/config.py @@ -8,7 +8,6 @@ from calibre.gui2.tts2.types import ( AudioDeviceId, EngineMetadata, EngineSpecificSettings, - TrackingCapability, Voice, available_engines, create_tts_backend, @@ -28,9 +27,10 @@ class EngineChoice(QWidget): self.engine_choice = ec = QComboBox(self) l.addRow(_('Text-to-Speech &engine:'), ec) configured_engine_name = load_config().get('engine', '') - ec.addItem(_('Automatically select (currently {})').format(default_engine_name()), '') - for engine_name in available_engines(): - ec.addItem(engine_name, engine_name) + am = available_engines() + ec.addItem(_('Automatically select (currently {})').format(am[default_engine_name()].human_name), '') + for engine_name, metadata in am.items(): + ec.addItem(metadata.human_name, engine_name) idx = ec.findData(configured_engine_name) if idx > -1: ec.setCurrentIndex(idx) @@ -51,13 +51,7 @@ class EngineChoice(QWidget): def update_description(self): engine = self.value or default_engine_name() metadata = available_engines()[engine] - if metadata.tracking_capability is TrackingCapability.NoTracking: - text = _('The {} engine does not highlight words on the screen as they are spoken') - elif metadata.tracking_capability is TrackingCapability.WordByWord: - text = _('The {} engine highlights words on the screen as they are spoken') - else: - text = _('The {} engine highlights sentences on the screen as they are spoken') - self.engine_description.setText(text.format(engine)) + self.engine_description.setText(metadata.description) class FloatSlider(QSlider): diff --git a/src/calibre/gui2/tts2/types.py b/src/calibre/gui2/tts2/types.py index 0c1925a64e..842df8cbd4 100644 --- a/src/calibre/gui2/tts2/types.py +++ b/src/calibre/gui2/tts2/types.py @@ -42,7 +42,9 @@ class TrackingCapability(Enum): class EngineMetadata(NamedTuple): - name: Literal['winrt', 'darwin', 'sapi', 'flite', 'speechd'] + name: Literal['winrt', 'darwin', 'sapi', 'flite', 'speechd', 'piper'] + human_name: str + description: str tracking_capability: TrackingCapability = TrackingCapability.NoTracking allows_choosing_audio_device: bool = True has_multiple_output_modules: bool = False @@ -195,10 +197,10 @@ def available_engines() -> dict[str, EngineMetadata]: ans = {} e = QTextToSpeech() - def qt_engine_metadata(name: str, allows_choosing_audio_device: bool = False) -> EngineMetadata: + def qt_engine_metadata(name: str, human_name: str, desc: str, allows_choosing_audio_device: bool = False) -> EngineMetadata: e.setEngine(name) cap = int(e.engineCapabilities().value) - return EngineMetadata(name, + return EngineMetadata(name, human_name, desc, tracking_capability=TrackingCapability.WordByWord if cap & int( QTextToSpeech.Capability.WordByWordProgress.value) else TrackingCapability.NoTracking, allows_choosing_audio_device=allows_choosing_audio_device, @@ -206,25 +208,39 @@ def available_engines() -> dict[str, EngineMetadata]: for x in QTextToSpeech.availableEngines(): if x == 'winrt': - ans[x] = qt_engine_metadata(x, True) + ans[x] = qt_engine_metadata(x, _('Modern Windows Speech Engine'), _( + 'The "winrt" engine can track the curretly spoken word on screen. Additional voices for it are available from Microsoft.' + ), True) elif x == 'darwin': - ans[x] = qt_engine_metadata(x) + ans[x] = qt_engine_metadata(x, _('macOS Speech Engine'), _( + 'The "darwin" engine can track the currently spoken word on screen. Additional voices for it are available from Apple.' + )) elif x == 'sapi': - ans[x] = qt_engine_metadata(x) + ans[x] = qt_engine_metadata(x, _('Legacy Windows Speech Engine'), _( + 'The "sapi" engine can track the currently spoken word on screen. It is no longer supported by Microsoft.' + )) elif x == 'macos': # this is slated for removal in Qt 6.8 so skip it continue elif x == 'flite': - ans[x] = qt_engine_metadata(x, True) + ans[x] = qt_engine_metadata(x, _('The "flite" Speech engine'), _( + 'The "filte" engine can track the currently spoken word on screen.' + ), True) elif x == 'speechd': continue if islinux: if piper_cmdline(): - ans['piper'] = EngineMetadata('piper', TrackingCapability.Sentence, can_change_pitch=False, voices_have_quality_metadata=True) + ans['piper'] = EngineMetadata('piper', _('The Piper Neural Speech Engine'), _( + 'The "piper" engine can track the currently spoken sentence on screen. It uses a neural network ' + 'for natural sounding voices. The neural network is run locally on your computer, it is fairly resource intensive to run.' + ), TrackingCapability.Sentence, can_change_pitch=False, voices_have_quality_metadata=True) from speechd.paths import SPD_SPAWN_CMD cmd = os.getenv("SPEECHD_CMD", SPD_SPAWN_CMD) if cmd and os.access(cmd, os.X_OK) and os.path.isfile(cmd): - ans['speechd'] = EngineMetadata('speechd', TrackingCapability.WordByWord, allows_choosing_audio_device=False, has_multiple_output_modules=True) + ans['speechd'] = EngineMetadata('speechd', _('The Speech Dispatcher Speech Engine'), _( + 'The "speechd" engine can usually track the currently spoken word on screen, however, it depends on the' + ' underlying output module it. The default espeak output module does support it.' + ), TrackingCapability.WordByWord, allows_choosing_audio_device=False, has_multiple_output_modules=True) return ans