Get the speechd backend working

This commit is contained in:
Kovid Goyal 2024-08-31 15:19:54 +05:30
parent 75d3714b41
commit 2ecab362ad
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
3 changed files with 83 additions and 70 deletions

View File

@ -255,7 +255,7 @@ class EngineSpecificConfig(QWidget):
metadata = available_engines()[self.engine_name] metadata = available_engines()[self.engine_name]
output_module = self.output_module.currentData() or '' output_module = self.output_module.currentData() or ''
if metadata.has_multiple_output_modules: if metadata.has_multiple_output_modules:
output_module = output_module or self.default_output_modules[self.engine_name].default_output_module output_module = output_module or self.default_output_modules[self.engine_name]
all_voices = self.voice_data[self.engine_name][output_module] all_voices = self.voice_data[self.engine_name][output_module]
self.voices.set_voices(all_voices, s.voice_name, metadata) self.voices.set_voices(all_voices, s.voice_name, metadata)

View File

@ -1,34 +1,33 @@
#!/usr/bin/env python #!/usr/bin/env python
# License: GPLv3 Copyright: 2024, Kovid Goyal <kovid at kovidgoyal.net> # License: GPLv3 Copyright: 2024, Kovid Goyal <kovid at kovidgoyal.net>
from contextlib import suppress import atexit
from qt.core import QObject, Qt, QTextToSpeech, pyqtSignal from qt.core import QObject, Qt, QTextToSpeech, pyqtSignal
from speechd.client import CallbackType, DataMode, Priority, SpawnError, SSIPClient, SSIPCommunicationError from speechd.client import CallbackType, DataMode, Priority, SpawnError, SSIPClient, SSIPCommunicationError
from calibre import prepare_string_for_xml from calibre import prepare_string_for_xml
from calibre.gui2.tts2.types import EngineSpecificSettings, Voice from calibre.gui2.tts2.types import EngineSpecificSettings, TTSBackend, Voice
from calibre.spell.break_iterator import split_into_words_and_positions
from calibre.utils.localization import canonicalize_lang from calibre.utils.localization import canonicalize_lang
MARK_TEMPLATE = '<mark name="{}"/>' MARK_TEMPLATE = '<mark name="{}"/>'
def add_markup(text_parts, mark_template=MARK_TEMPLATE, escape_marked_text=prepare_string_for_xml, chunk_size=0):
buf = [] def mark_words(text: str, lang: str) -> str:
size = 0 ans = []
for x in text_parts: pos = 0
if isinstance(x, int):
item = mark_template.format(x) def a(x):
else: ans.append(prepare_string_for_xml(x))
item = escape_marked_text(x)
sz = len(item) for offset, sz in split_into_words_and_positions(text, lang):
if chunk_size and size + sz > chunk_size: if offset > pos:
yield ''.join(buf).strip() a(text[pos:offset])
size = 0 ans.append(MARK_TEMPLATE.format(f'{offset}:{sz}'))
buf = [] a(text[offset:offset+sz])
size += sz pos = offset + sz
buf.append(item) return ''.join(ans)
if size:
yield ''.join(buf).strip()
def wrap_in_ssml(text): def wrap_in_ssml(text):
@ -36,10 +35,11 @@ def wrap_in_ssml(text):
text + '</s></speak>') text + '</s></speak>')
class SpeechdTTSBackend(QObject): class SpeechdTTSBackend(TTSBackend):
saying = pyqtSignal(int, int) saying = pyqtSignal(int, int)
state_changed = pyqtSignal(QTextToSpeech.State) state_changed = pyqtSignal(QTextToSpeech.State)
engine_name = 'speechd'
_event_signal = pyqtSignal(object, object) _event_signal = pyqtSignal(object, object)
@ -49,13 +49,14 @@ class SpeechdTTSBackend(QObject):
self._state = QTextToSpeech.State.Ready self._state = QTextToSpeech.State.Ready
self._voices = None self._voices = None
self._system_default_output_module = None self._system_default_output_module = None
self._current_settings = EngineSpecificSettings()
self._status = {'synthesizing': False, 'paused': False} self._status = {'synthesizing': False, 'paused': False}
self._next_begin_is_for_resume = False
self._ssip_client: SSIPClient | None = None self._ssip_client: SSIPClient | None = None
self._voice_lang = 'en'
self._last_mark = self._last_text = ''
self._next_cancel_is_for_pause = False
self._event_signal.connect(self._update_status, type=Qt.ConnectionType.QueuedConnection) self._event_signal.connect(self._update_status, type=Qt.ConnectionType.QueuedConnection)
self._current_marked_text = self._last_mark = None self._apply_settings(EngineSpecificSettings.create_from_config(self.engine_name))
self._apply_settings(EngineSpecificSettings.create_from_config(engine_name)) atexit.register(self.shutdown)
@property @property
def default_output_module(self) -> str: def default_output_module(self) -> str:
@ -72,52 +73,51 @@ class SpeechdTTSBackend(QObject):
self._set_error(str(e)) self._set_error(str(e))
return self._voices or {} return self._voices or {}
@property
def engine_name(self) -> str:
return 'speechd'
def change_rate(self, steps: int = 1) -> bool:
current = self._current_settings.rate
new_rate = max(-1, min(current + 0.2 * steps, 1))
if current == new_rate:
return False
try:
self._ssip_client.set_rate(int(max(-1, min(new_rate, 1)) * 100))
except Exception as e:
self._set_error(str(e))
return False
self._current_settings = self._current_settings._replace(rate=new_rate)
self._current_settings.save_to_config()
return True
def stop(self) -> None: def stop(self) -> None:
self._current_marked_text = self._last_mark = None self._last_mark = self._last_text = ''
self._next_cancel_is_for_pause = self._next_begin_is_for_resume = False
if self._ssip_client is not None: if self._ssip_client is not None:
try: try:
self._ssip_client.stop() self._ssip_client.stop()
except Exception as e: except Exception as e:
self._set_error(str(e)) self._set_error(str(e))
def speak_simple_text(self, text: str) -> None: def say(self, text: str) -> None:
self.stop() self.stop()
self._current_marked_text = self._last_mark = None self._speak(mark_words(text, self._voice_lang))
self._speak(prepare_string_for_xml(text))
def speak_marked_text(self, marked_text: list[str | int]) -> None: def error_message(self) -> str:
self.stop() return self._last_error
text = ''.join(add_markup(marked_text))
self._current_marked_text = text def pause(self) -> None:
self._last_mark = None if self._ssip_client is not None and self._status['synthesizing'] and not self._status['paused']:
try:
self._ssip_client.stop()
self._next_cancel_is_for_pause = True
except Exception as e:
self._set_error(str(e))
def resume(self) -> None:
if self._ssip_client is not None and self._status['synthesizing'] and self._status['paused']:
text = self._last_text
idx = text.find(self._last_mark)
if idx > -1:
text = text[idx:]
self._speak(text) self._speak(text)
def __del__(self): def reload_after_configure(self) -> None:
self._apply_settings(EngineSpecificSettings.create_from_config(self.engine_name))
def shutdown(self):
if self._ssip_client is not None: if self._ssip_client is not None:
with suppress(Exception): try:
self._ssip_client.cancel() self._ssip_client.cancel()
except Exception:
pass
try:
self._ssip_client.close() self._ssip_client.close()
except Exception:
pass
self._ssip_client = None self._ssip_client = None
shutdown = __del__
def _set_state(self, s: QTextToSpeech.State) -> None: def _set_state(self, s: QTextToSpeech.State) -> None:
self._state = s self._state = s
@ -174,15 +174,20 @@ class SpeechdTTSBackend(QObject):
if not self._ensure_state(): if not self._ensure_state():
return False return False
try: try:
om = settings.output_module or self._system_default_output_module
self._ssip_client.set_output_module(om)
if settings.voice_name:
for v in self.available_voices[om]:
if v.name == settings.voice_name:
self._voice_lang = v.language_code
break
self._ssip_client.set_synthesis_voice(settings.voice_name)
else:
self._voice_lang = self.available_voices[om][0].language_code
self._ssip_client.set_pitch_range(int(max(-1, min(settings.pitch, 1)) * 100)) self._ssip_client.set_pitch_range(int(max(-1, min(settings.pitch, 1)) * 100))
self._ssip_client.set_rate(int(max(-1, min(settings.rate, 1)) * 100)) self._ssip_client.set_rate(int(max(-1, min(settings.rate, 1)) * 100))
if settings.volume is not None: if settings.volume is not None:
self._ssip_client.set_volume(-100 + int(max(0, min(settings.volume, 1)) * 200)) self._ssip_client.set_volume(-100 + int(max(0, min(settings.volume, 1)) * 200))
om = settings.output_module or self._system_default_output_module
self._ssip_client.set_output_module(om)
if settings.voice_name:
self._ssip_client.set_synthesis_voice(settings.voice_name)
self._current_settings = settings
return True return True
except Exception as e: except Exception as e:
self._set_error(str(e)) self._set_error(str(e))
@ -205,13 +210,12 @@ class SpeechdTTSBackend(QObject):
def _update_status(self, callback_type, index_mark=None): def _update_status(self, callback_type, index_mark=None):
event = None event = None
if callback_type is CallbackType.INDEX_MARK: if callback_type is CallbackType.INDEX_MARK:
mark = int(index_mark) pos, sep, length = index_mark.partition(':')
self._last_mark = mark self._last_mark = MARK_TEMPLATE.format(index_mark)
self.saying.emit(mark, mark) self.saying.emit(int(pos), int(length))
elif callback_type is CallbackType.BEGIN: elif callback_type is CallbackType.BEGIN:
self._status = {'synthesizing': True, 'paused': False} self._status = {'synthesizing': True, 'paused': False}
self._set_state(QTextToSpeech.State.Speaking) self._set_state(QTextToSpeech.State.Speaking)
self._next_begin_is_for_resume = False
elif callback_type is CallbackType.END: elif callback_type is CallbackType.END:
self._status = {'synthesizing': False, 'paused': False} self._status = {'synthesizing': False, 'paused': False}
self._set_state(QTextToSpeech.State.Ready) self._set_state(QTextToSpeech.State.Ready)
@ -219,10 +223,10 @@ class SpeechdTTSBackend(QObject):
if self._next_cancel_is_for_pause: if self._next_cancel_is_for_pause:
self._status = {'synthesizing': True, 'paused': True} self._status = {'synthesizing': True, 'paused': True}
self._set_state(QTextToSpeech.State.Paused) self._set_state(QTextToSpeech.State.Paused)
self._next_cancel_is_for_pause = False
else: else:
self._status = {'synthesizing': False, 'paused': False} self._status = {'synthesizing': False, 'paused': False}
self._set_state(QTextToSpeech.State.Ready) self._set_state(QTextToSpeech.State.Ready)
self._next_cancel_is_for_pause = False
return event return event
def _speak_callback(self, callback_type: CallbackType, index_mark=None): def _speak_callback(self, callback_type: CallbackType, index_mark=None):
@ -230,4 +234,9 @@ class SpeechdTTSBackend(QObject):
def _speak(self, text: str) -> None: def _speak(self, text: str) -> None:
if self._ensure_state(): if self._ensure_state():
self._last_text = text
self._last_mark = ''
try:
self._ssip_client.speak(wrap_in_ssml(text), self._speak_callback) self._ssip_client.speak(wrap_in_ssml(text), self._speak_callback)
except Exception as e:
self._set_error(str(e))

View File

@ -180,6 +180,8 @@ def default_engine_name() -> str:
return 'sapi' if tweaks.get('prefer_winsapi') else 'winrt' return 'sapi' if tweaks.get('prefer_winsapi') else 'winrt'
if ismacos: if ismacos:
return 'darwin' return 'darwin'
if 'flite' in available_engines():
return 'flite'
return 'speechd' return 'speechd'
@ -208,7 +210,7 @@ class TTSBackend(QObject):
def error_message(self) -> str: def error_message(self) -> str:
raise NotImplementedError() raise NotImplementedError()
def reload_after_configure(self) -> str: def reload_after_configure(self) -> None:
raise NotImplementedError() raise NotImplementedError()
@ -216,6 +218,8 @@ engine_instances: dict[str, TTSBackend] = {}
def create_tts_backend(force_engine: str | None = None) -> TTSBackend: def create_tts_backend(force_engine: str | None = None) -> TTSBackend:
if not available_engines():
raise OSError('There are no available TTS engines. Install a TTS engine before trying to use Read Aloud, such as flite or speech-dispatcher')
prefs = load_config() prefs = load_config()
engine_name = prefs.get('engine', '') if force_engine is None else force_engine engine_name = prefs.get('engine', '') if force_engine is None else force_engine
engine_name = engine_name or default_engine_name() engine_name = engine_name or default_engine_name()