Basic functinality implemented in speechd backend

2025-11-23 06:53:02 -05:00 · 2024-08-27 15:32:04 +05:30 · 2024-08-27 15:32:04 +05:30 · 16f7ddb416
commit 16f7ddb416
parent fd0c64bbc6
2 changed files with 166 additions and 44 deletions
--- a/src/calibre/gui2/tts2/speechd.py
+++ b/src/calibre/gui2/tts2/speechd.py
@ -1,43 +1,67 @@
 #!/usr/bin/env python
 # License: GPLv3 Copyright: 2024, Kovid Goyal <kovid at kovidgoyal.net>
-from qt.core import QObject, QTextToSpeech, pyqtSignal
+from contextlib import suppress
 from speechd.client import DataMode, Priority, SpawnError, SSIPClient, SSIPCommunicationError
 from qt.core import QObject, Qt, QTextToSpeech, pyqtSignal
 from speechd.client import CallbackType, DataMode, Priority, SpawnError, SSIPClient, SSIPCommunicationError
 from calibre import prepare_string_for_xml
 from calibre.gui2.tts2.types import EngineSpecificSettings, Voice
 from calibre.utils.localization import canonicalize_lang
 MARK_TEMPLATE = '<mark name="{}"/>'
 def add_markup(text_parts, mark_template=MARK_TEMPLATE, escape_marked_text=prepare_string_for_xml, chunk_size=0):
    buf = []
    size = 0
    for x in text_parts:
        if isinstance(x, int):
            item = mark_template.format(x)
        else:
            item = escape_marked_text(x)
        sz = len(item)
        if chunk_size and size + sz > chunk_size:
            yield ''.join(buf).strip()
            size = 0
            buf = []
        size += sz
        buf.append(item)
    if size:
        yield ''.join(buf).strip()
 def wrap_in_ssml(text):
    return ('<?xml version="1.0"?>\n<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis"><s>' +
            text + '</s></speak>')
 class SpeechdTTSBackend(QObject):
    saying = pyqtSignal(int, int)
    state_changed = pyqtSignal(QTextToSpeech.State)
    _event_signal = pyqtSignal(object, object)
    def __init__(self, engine_name: str = '', settings: EngineSpecificSettings = EngineSpecificSettings(), parent: QObject|None = None):
        super().__init__(parent)
        self._last_error = ''
        self._state = QTextToSpeech.State.Ready
        self._voices = None
        self._system_default_output_module = None
-        self.ssip_client: SSIPClient | None = None
+        self._current_settings = EngineSpecificSettings()
        self._status = {'synthesizing': False, 'paused': False}
        self._next_begin_is_for_resume = False
        self._ssip_client: SSIPClient | None = None
        self._event_signal.connect(self._update_status, type=Qt.ConnectionType.QueuedConnection)
        self._current_marked_text = self._last_mark = None
        self.apply_settings(engine_name, settings)
    @property
    def available_voices(self) -> dict[str, tuple[Voice, ...]]:
       if self._voices is None:
            def v(x) -> Voice:
                name, langcode, variant = x
                return Voice(name, canonicalize_lang(langcode) or 'und', human_name=name, notes=variant)
            if self._ensure_state():
                ans = {}
            try:
-                    om = self.ssip_client.get_output_module()
+                self._voices = self._get_all_voices_for_all_output_modules()
                    for omq in self.ssip_client.list_output_modules():
                        self.ssip_client.set_output_module(omq)
                        ans[omq] = tuple(map(v, self.ssip_client.list_synthesis_voices()))
                    self.ssip_client.set_output_module(om)
                    self._voices = ans
            except Exception as e:
                self._set_error(str(e))
       return self._voices or {}
@ -48,14 +72,60 @@ class SpeechdTTSBackend(QObject):
        except Exception as err:
            self._set_error(str(err))
    def change_rate(self, steps: int = 1) -> bool:
        current = self._current_settings.rate
        new_rate = max(-1, min(current + 0.2 * steps, 1))
        if current == new_rate:
            return False
        try:
            self._ssip_client.set_rate(int(max(-1, min(new_rate, 1)) * 100))
        except Exception as e:
            self._set_error(str(e))
            return False
        self._current_settings = self._current_settings._replace(rate=new_rate)
        return True
    def stop(self) -> None:
        self._current_marked_text = self._last_mark = None
        self._next_cancel_is_for_pause = self._next_begin_is_for_resume = False
        if self._ssip_client is not None:
            try:
                self._ssip_client.stop()
            except Exception as e:
                self._set_error(str(e))
    def speak_simple_text(self, text: str) -> None:
        self.stop()
        self._current_marked_text = self._last_mark = None
        self._speak(prepare_string_for_xml(text))
    def speak_marked_text(self, marked_text: list[str | int]) -> None:
        self.stop()
        text = ''.join(add_markup(marked_text))
        self._current_marked_text = text
        self._last_mark = None
        self._speak(text)
    def __del__(self):
        if self._ssip_client is not None:
            with suppress(Exception):
                self._ssip_client.cancel()
            self._ssip_client.close()
            self._ssip_client = None
    shutdown = __del__
    def _set_state(self, s: QTextToSpeech.State) -> None:
        self._state = s
        self.state_changed.emit(s)
    def _set_error(self, msg: str) -> None:
        self._last_error = msg
-        self._set_state(QTextToSpeech.Error)
+        self._set_state(QTextToSpeech.State.Error)
    def _create_ssip_client(self) -> bool:
        try:
-            self.ssip_client = SSIPClient('calibre')
+            self._ssip_client = SSIPClient('calibre')
-            self.ssip_client.set_priority(Priority.TEXT)
+            self._ssip_client.set_priority(Priority.TEXT)
            return True
        except SSIPCommunicationError as err:
            ex = err.additional_exception()
@ -70,38 +140,85 @@ class SpeechdTTSBackend(QObject):
        return False
    def _ensure_state(self) -> bool:
-        if self.ssip_client is None:
+        if self._ssip_client is None:
-            if not self.create_ssip_client():
+            if not self._create_ssip_client():
                return False
        if self._system_default_output_module is None:
-            self._system_default_output_module = self.ssip_client.get_output_module()
+            self._system_default_output_module = self._ssip_client.get_output_module()
            if self._system_default_output_module == '(null)':
-                mods = self.ssip_client.list_output_modules()
+                mods = self._ssip_client.list_output_modules()
                if not mods:
-                    self._last_error = _('Speech dispatcher on this system is not configured with any available voices. Install some voices first.')
+                    self._set_error(_(
                        'Speech dispatcher on this system is not configured with any available output modules. Install some output modules first.'))
                    return False
                self._system_default_output_module = mods[0]
-        self._set_use_ssml(True)
+        return self._set_use_ssml(True)
    def _set_use_ssml(self, on: bool) -> bool:
        mode = DataMode.SSML if on else DataMode.TEXT
        try:
-            self.ssip_client.set_data_mode(mode)
+            self._ssip_client.set_data_mode(mode)
            return True
        except SSIPCommunicationError:
-            self.ssip_client.close()
+            self._ssip_client.close()
-            self.ssip_client = None
+            self._ssip_client = None
            self._set_error(_('Failed to set support for SSML to: {}').format(on))
        return False
    def _apply_settings(self, settings: EngineSpecificSettings) -> bool:
        if not self._ensure_state():
            return False
-        self.ssip_client.set_pitch_range(int(max(-1, min(settings.pitch, 1)) * 100))
+        self._ssip_client.set_pitch_range(int(max(-1, min(settings.pitch, 1)) * 100))
-        self.ssip_client.set_rate(int(max(-1, min(settings.rate, 1)) * 100))
+        self._ssip_client.set_rate(int(max(-1, min(settings.rate, 1)) * 100))
        if settings.volume is not None:
-            self.ssip_client.set_volume(-100 + int(max(0, min(settings.volume, 1)) * 200))
+            self._ssip_client.set_volume(-100 + int(max(0, min(settings.volume, 1)) * 200))
        om = settings.output_module or self._system_default_output_module
-        self.ssip_client.set_output_module(om)
+        self._ssip_client.set_output_module(om)
        if settings.voice_name:
-            self.ssip_client.set_synthesis_voice(settings.voice_name)
+            self._ssip_client.set_synthesis_voice(settings.voice_name)
        self._current_settings = settings
        return True
    def _get_all_voices_for_all_output_modules(self) -> dict[str, Voice]:
        ans = {}
        def v(x) -> Voice:
            name, langcode, variant = x
            return Voice(name, canonicalize_lang(langcode) or 'und', human_name=name, notes=variant)
        if self._ensure_state():
            om = self._ssip_client.get_output_module()
            for omq in self._ssip_client.list_output_modules():
                self._ssip_client.set_output_module(omq)
                ans[omq] = tuple(map(v, self._ssip_client.list_synthesis_voices()))
            self._ssip_client.set_output_module(om)
        return ans
    def _update_status(self, callback_type, index_mark=None):
        event = None
        if callback_type is CallbackType.INDEX_MARK:
            mark = int(index_mark)
            self._last_mark = mark
            self.saying.emit(mark, mark)
        elif callback_type is CallbackType.BEGIN:
            self._status = {'synthesizing': True, 'paused': False}
            self._set_state(QTextToSpeech.State.Speaking)
            self._next_begin_is_for_resume = False
        elif callback_type is CallbackType.END:
            self._status = {'synthesizing': False, 'paused': False}
            self._set_state(QTextToSpeech.State.Ready)
        elif callback_type is CallbackType.CANCEL:
            if self._next_cancel_is_for_pause:
                self._status = {'synthesizing': True, 'paused': True}
                self._set_state(QTextToSpeech.State.Paused)
            else:
                self._status = {'synthesizing': False, 'paused': False}
                self._set_state(QTextToSpeech.State.Ready)
            self._next_cancel_is_for_pause = False
        return event
    def _speak_callback(self, callback_type: CallbackType, index_mark=None):
        self._event_signal.emit(callback_type, index_mark)
    def _speak(self, text: str) -> None:
        if self._ensure_state():
            self._ssip_client.speak(wrap_in_ssml(text), self._speak_callback)
--- a/src/calibre/gui2/tts2/types.py
+++ b/src/calibre/gui2/tts2/types.py
@ -8,7 +8,8 @@ from typing import Literal, NamedTuple
 from qt.core import QLocale, QObject, QTextToSpeech, QVoice
-from calibre.constants import islinux
+from calibre.constants import islinux, iswindows
 from calibre.utils.config_base import tweaks
 from calibre.utils.localization import canonicalize_lang
@ -74,10 +75,10 @@ def available_engines() -> dict[str, EngineMetadata]:
    def qt_engine_metadata(name: str, allows_choosing_audio_device: bool = False) -> EngineMetadata:
        e.setEngine(name)
-        cap = e.engineCapabilities()
+        cap = int(e.engineCapabilities().value)
-        return EngineMetadata(
+        return EngineMetadata(name,
-            name, TrackingCapability.WordByWord if cap & QTextToSpeech.Capability.WordByWordProgress else TrackingCapability.NoTracking,
+            TrackingCapability.WordByWord if cap & int(QTextToSpeech.Capability.WordByWordProgress.value) else TrackingCapability.NoTracking,
-            allows_choosing_audio_device, cap & QTextToSpeech.Capability.Synthesize)
+            allows_choosing_audio_device, bool(cap & int(QTextToSpeech.Capability.Synthesize.value)))
    for x in QTextToSpeech.availableEngines():
        if x == 'winrt':
@ -102,7 +103,10 @@ def available_engines() -> dict[str, EngineMetadata]:
 def create_tts_backend(engine_name: str = '', settings: EngineSpecificSettings = EngineSpecificSettings(), parent: QObject|None = None):
-    if engine_name == '' and islinux:
+    if engine_name == '':
        if iswindows and tweaks.get('prefer_winsapi'):
            engine_name = 'sapi'
        elif islinux:
            engine_name = 'speechd'
    if engine_name not in available_engines():
        engine_name = ''
@ -147,6 +151,7 @@ def develop(engine_name=''):
    def state_changed(state):
        nonlocal speech_started
        print('State changed:', state)
        if state == QTextToSpeech.State.Speaking:
            speech_started = True
        elif state == QTextToSpeech.State.Error: