Nicer engine names/descriptions

2025-08-11 09:13:57 -04:00 · 2024-09-03 11:20:38 +05:30 · 2024-09-03 11:20:38 +05:30 · a23944c5ca
commit a23944c5ca
parent 16120d8b39
2 changed files with 30 additions and 20 deletions
--- a/src/calibre/gui2/tts2/config.py
+++ b/src/calibre/gui2/tts2/config.py
@ -8,7 +8,6 @@ from calibre.gui2.tts2.types import (
    AudioDeviceId,
    EngineMetadata,
    EngineSpecificSettings,
-    TrackingCapability,
    Voice,
    available_engines,
    create_tts_backend,
@ -28,9 +27,10 @@ class EngineChoice(QWidget):
        self.engine_choice = ec = QComboBox(self)
        l.addRow(_('Text-to-Speech &engine:'), ec)
        configured_engine_name = load_config().get('engine', '')
-        ec.addItem(_('Automatically select (currently {})').format(default_engine_name()), '')
-        for engine_name in available_engines():
-            ec.addItem(engine_name, engine_name)
+        am = available_engines()
+        ec.addItem(_('Automatically select (currently {})').format(am[default_engine_name()].human_name), '')
+        for engine_name, metadata in am.items():
+            ec.addItem(metadata.human_name, engine_name)
        idx = ec.findData(configured_engine_name)
        if idx > -1:
            ec.setCurrentIndex(idx)
@ -51,13 +51,7 @@ class EngineChoice(QWidget):
    def update_description(self):
        engine = self.value or default_engine_name()
        metadata = available_engines()[engine]
-        if metadata.tracking_capability is TrackingCapability.NoTracking:
-            text = _('The {} engine does not highlight words on the screen as they are spoken')
-        elif metadata.tracking_capability is TrackingCapability.WordByWord:
-            text = _('The {} engine highlights words on the screen as they are spoken')
-        else:
-            text = _('The {} engine highlights sentences on the screen as they are spoken')
-        self.engine_description.setText(text.format(engine))
+        self.engine_description.setText(metadata.description)


 class FloatSlider(QSlider):
--- a/src/calibre/gui2/tts2/types.py
+++ b/src/calibre/gui2/tts2/types.py
@ -42,7 +42,9 @@ class TrackingCapability(Enum):


 class EngineMetadata(NamedTuple):
-    name: Literal['winrt', 'darwin', 'sapi', 'flite', 'speechd']
+    name: Literal['winrt', 'darwin', 'sapi', 'flite', 'speechd', 'piper']
+    human_name: str
+    description: str
    tracking_capability: TrackingCapability = TrackingCapability.NoTracking
    allows_choosing_audio_device: bool = True
    has_multiple_output_modules: bool = False
@ -195,10 +197,10 @@ def available_engines() -> dict[str, EngineMetadata]:
    ans = {}
    e = QTextToSpeech()

-    def qt_engine_metadata(name: str, allows_choosing_audio_device: bool = False) -> EngineMetadata:
+    def qt_engine_metadata(name: str, human_name: str, desc: str, allows_choosing_audio_device: bool = False) -> EngineMetadata:
        e.setEngine(name)
        cap = int(e.engineCapabilities().value)
-        return EngineMetadata(name,
+        return EngineMetadata(name, human_name, desc,
            tracking_capability=TrackingCapability.WordByWord if cap & int(
                QTextToSpeech.Capability.WordByWordProgress.value) else TrackingCapability.NoTracking,
            allows_choosing_audio_device=allows_choosing_audio_device,
@ -206,25 +208,39 @@ def available_engines() -> dict[str, EngineMetadata]:

    for x in QTextToSpeech.availableEngines():
        if x == 'winrt':
-            ans[x] = qt_engine_metadata(x, True)
+            ans[x] = qt_engine_metadata(x, _('Modern Windows Speech Engine'), _(
+                'The "winrt" engine can track the curretly spoken word on screen. Additional voices for it are available from Microsoft.'
+                ), True)
        elif x == 'darwin':
-            ans[x] = qt_engine_metadata(x)
+            ans[x] = qt_engine_metadata(x, _('macOS Speech Engine'), _(
+                'The "darwin" engine can track the currently spoken word on screen. Additional voices for it are available from Apple.'
+            ))
        elif x == 'sapi':
-            ans[x] = qt_engine_metadata(x)
+            ans[x] = qt_engine_metadata(x, _('Legacy Windows Speech Engine'), _(
+                'The "sapi" engine can track the currently spoken word on screen. It is no longer supported by Microsoft.'
+            ))
        elif x == 'macos':
            # this is slated for removal in Qt 6.8 so skip it
            continue
        elif x == 'flite':
-            ans[x] = qt_engine_metadata(x, True)
+            ans[x] = qt_engine_metadata(x, _('The "flite" Speech engine'), _(
+                'The "filte" engine can track the currently spoken word on screen.'
+            ), True)
        elif x == 'speechd':
            continue
    if islinux:
        if piper_cmdline():
-            ans['piper'] = EngineMetadata('piper', TrackingCapability.Sentence, can_change_pitch=False, voices_have_quality_metadata=True)
+            ans['piper'] = EngineMetadata('piper', _('The Piper Neural Speech Engine'), _(
+                'The "piper" engine can track the currently spoken sentence on screen. It uses a neural network '
+                'for natural sounding voices. The neural network is run locally on your computer, it is fairly resource intensive to run.'
+            ), TrackingCapability.Sentence, can_change_pitch=False, voices_have_quality_metadata=True)
        from speechd.paths import SPD_SPAWN_CMD
        cmd = os.getenv("SPEECHD_CMD", SPD_SPAWN_CMD)
        if cmd and os.access(cmd, os.X_OK) and os.path.isfile(cmd):
-            ans['speechd'] = EngineMetadata('speechd', TrackingCapability.WordByWord, allows_choosing_audio_device=False, has_multiple_output_modules=True)
+            ans['speechd'] = EngineMetadata('speechd', _('The Speech Dispatcher Speech Engine'), _(
+                'The "speechd" engine can usually track the currently spoken word on screen, however, it depends on the'
+                ' underlying output module it. The default espeak output module does support it.'
+            ), TrackingCapability.WordByWord, allows_choosing_audio_device=False, has_multiple_output_modules=True)

    return ans