mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 10:44:09 -04:00
Basic functinality implemented in speechd backend
This commit is contained in:
parent
fd0c64bbc6
commit
16f7ddb416
@ -1,43 +1,67 @@
|
||||
#!/usr/bin/env python
|
||||
# License: GPLv3 Copyright: 2024, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
|
||||
from qt.core import QObject, QTextToSpeech, pyqtSignal
|
||||
from speechd.client import DataMode, Priority, SpawnError, SSIPClient, SSIPCommunicationError
|
||||
from contextlib import suppress
|
||||
|
||||
from qt.core import QObject, Qt, QTextToSpeech, pyqtSignal
|
||||
from speechd.client import CallbackType, DataMode, Priority, SpawnError, SSIPClient, SSIPCommunicationError
|
||||
|
||||
from calibre import prepare_string_for_xml
|
||||
from calibre.gui2.tts2.types import EngineSpecificSettings, Voice
|
||||
from calibre.utils.localization import canonicalize_lang
|
||||
|
||||
MARK_TEMPLATE = '<mark name="{}"/>'
|
||||
|
||||
def add_markup(text_parts, mark_template=MARK_TEMPLATE, escape_marked_text=prepare_string_for_xml, chunk_size=0):
|
||||
buf = []
|
||||
size = 0
|
||||
for x in text_parts:
|
||||
if isinstance(x, int):
|
||||
item = mark_template.format(x)
|
||||
else:
|
||||
item = escape_marked_text(x)
|
||||
sz = len(item)
|
||||
if chunk_size and size + sz > chunk_size:
|
||||
yield ''.join(buf).strip()
|
||||
size = 0
|
||||
buf = []
|
||||
size += sz
|
||||
buf.append(item)
|
||||
if size:
|
||||
yield ''.join(buf).strip()
|
||||
|
||||
|
||||
def wrap_in_ssml(text):
|
||||
return ('<?xml version="1.0"?>\n<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis"><s>' +
|
||||
text + '</s></speak>')
|
||||
|
||||
|
||||
class SpeechdTTSBackend(QObject):
|
||||
|
||||
saying = pyqtSignal(int, int)
|
||||
state_changed = pyqtSignal(QTextToSpeech.State)
|
||||
|
||||
_event_signal = pyqtSignal(object, object)
|
||||
|
||||
def __init__(self, engine_name: str = '', settings: EngineSpecificSettings = EngineSpecificSettings(), parent: QObject|None = None):
|
||||
super().__init__(parent)
|
||||
self._last_error = ''
|
||||
self._state = QTextToSpeech.State.Ready
|
||||
self._voices = None
|
||||
self._system_default_output_module = None
|
||||
self.ssip_client: SSIPClient | None = None
|
||||
self._current_settings = EngineSpecificSettings()
|
||||
self._status = {'synthesizing': False, 'paused': False}
|
||||
self._next_begin_is_for_resume = False
|
||||
self._ssip_client: SSIPClient | None = None
|
||||
self._event_signal.connect(self._update_status, type=Qt.ConnectionType.QueuedConnection)
|
||||
self._current_marked_text = self._last_mark = None
|
||||
self.apply_settings(engine_name, settings)
|
||||
|
||||
@property
|
||||
def available_voices(self) -> dict[str, tuple[Voice, ...]]:
|
||||
if self._voices is None:
|
||||
def v(x) -> Voice:
|
||||
name, langcode, variant = x
|
||||
return Voice(name, canonicalize_lang(langcode) or 'und', human_name=name, notes=variant)
|
||||
|
||||
if self._ensure_state():
|
||||
ans = {}
|
||||
try:
|
||||
om = self.ssip_client.get_output_module()
|
||||
for omq in self.ssip_client.list_output_modules():
|
||||
self.ssip_client.set_output_module(omq)
|
||||
ans[omq] = tuple(map(v, self.ssip_client.list_synthesis_voices()))
|
||||
self.ssip_client.set_output_module(om)
|
||||
self._voices = ans
|
||||
self._voices = self._get_all_voices_for_all_output_modules()
|
||||
except Exception as e:
|
||||
self._set_error(str(e))
|
||||
return self._voices or {}
|
||||
@ -48,14 +72,60 @@ class SpeechdTTSBackend(QObject):
|
||||
except Exception as err:
|
||||
self._set_error(str(err))
|
||||
|
||||
def change_rate(self, steps: int = 1) -> bool:
|
||||
current = self._current_settings.rate
|
||||
new_rate = max(-1, min(current + 0.2 * steps, 1))
|
||||
if current == new_rate:
|
||||
return False
|
||||
try:
|
||||
self._ssip_client.set_rate(int(max(-1, min(new_rate, 1)) * 100))
|
||||
except Exception as e:
|
||||
self._set_error(str(e))
|
||||
return False
|
||||
self._current_settings = self._current_settings._replace(rate=new_rate)
|
||||
return True
|
||||
|
||||
def stop(self) -> None:
|
||||
self._current_marked_text = self._last_mark = None
|
||||
self._next_cancel_is_for_pause = self._next_begin_is_for_resume = False
|
||||
if self._ssip_client is not None:
|
||||
try:
|
||||
self._ssip_client.stop()
|
||||
except Exception as e:
|
||||
self._set_error(str(e))
|
||||
|
||||
def speak_simple_text(self, text: str) -> None:
|
||||
self.stop()
|
||||
self._current_marked_text = self._last_mark = None
|
||||
self._speak(prepare_string_for_xml(text))
|
||||
|
||||
def speak_marked_text(self, marked_text: list[str | int]) -> None:
|
||||
self.stop()
|
||||
text = ''.join(add_markup(marked_text))
|
||||
self._current_marked_text = text
|
||||
self._last_mark = None
|
||||
self._speak(text)
|
||||
|
||||
def __del__(self):
|
||||
if self._ssip_client is not None:
|
||||
with suppress(Exception):
|
||||
self._ssip_client.cancel()
|
||||
self._ssip_client.close()
|
||||
self._ssip_client = None
|
||||
shutdown = __del__
|
||||
|
||||
def _set_state(self, s: QTextToSpeech.State) -> None:
|
||||
self._state = s
|
||||
self.state_changed.emit(s)
|
||||
|
||||
def _set_error(self, msg: str) -> None:
|
||||
self._last_error = msg
|
||||
self._set_state(QTextToSpeech.Error)
|
||||
self._set_state(QTextToSpeech.State.Error)
|
||||
|
||||
def _create_ssip_client(self) -> bool:
|
||||
try:
|
||||
self.ssip_client = SSIPClient('calibre')
|
||||
self.ssip_client.set_priority(Priority.TEXT)
|
||||
self._ssip_client = SSIPClient('calibre')
|
||||
self._ssip_client.set_priority(Priority.TEXT)
|
||||
return True
|
||||
except SSIPCommunicationError as err:
|
||||
ex = err.additional_exception()
|
||||
@ -70,38 +140,85 @@ class SpeechdTTSBackend(QObject):
|
||||
return False
|
||||
|
||||
def _ensure_state(self) -> bool:
|
||||
if self.ssip_client is None:
|
||||
if not self.create_ssip_client():
|
||||
if self._ssip_client is None:
|
||||
if not self._create_ssip_client():
|
||||
return False
|
||||
if self._system_default_output_module is None:
|
||||
self._system_default_output_module = self.ssip_client.get_output_module()
|
||||
self._system_default_output_module = self._ssip_client.get_output_module()
|
||||
if self._system_default_output_module == '(null)':
|
||||
mods = self.ssip_client.list_output_modules()
|
||||
mods = self._ssip_client.list_output_modules()
|
||||
if not mods:
|
||||
self._last_error = _('Speech dispatcher on this system is not configured with any available voices. Install some voices first.')
|
||||
self._set_error(_(
|
||||
'Speech dispatcher on this system is not configured with any available output modules. Install some output modules first.'))
|
||||
return False
|
||||
self._system_default_output_module = mods[0]
|
||||
self._set_use_ssml(True)
|
||||
return self._set_use_ssml(True)
|
||||
|
||||
def _set_use_ssml(self, on: bool) -> bool:
|
||||
mode = DataMode.SSML if on else DataMode.TEXT
|
||||
try:
|
||||
self.ssip_client.set_data_mode(mode)
|
||||
self._ssip_client.set_data_mode(mode)
|
||||
return True
|
||||
except SSIPCommunicationError:
|
||||
self.ssip_client.close()
|
||||
self.ssip_client = None
|
||||
self._ssip_client.close()
|
||||
self._ssip_client = None
|
||||
self._set_error(_('Failed to set support for SSML to: {}').format(on))
|
||||
return False
|
||||
|
||||
def _apply_settings(self, settings: EngineSpecificSettings) -> bool:
|
||||
if not self._ensure_state():
|
||||
return False
|
||||
self.ssip_client.set_pitch_range(int(max(-1, min(settings.pitch, 1)) * 100))
|
||||
self.ssip_client.set_rate(int(max(-1, min(settings.rate, 1)) * 100))
|
||||
self._ssip_client.set_pitch_range(int(max(-1, min(settings.pitch, 1)) * 100))
|
||||
self._ssip_client.set_rate(int(max(-1, min(settings.rate, 1)) * 100))
|
||||
if settings.volume is not None:
|
||||
self.ssip_client.set_volume(-100 + int(max(0, min(settings.volume, 1)) * 200))
|
||||
self._ssip_client.set_volume(-100 + int(max(0, min(settings.volume, 1)) * 200))
|
||||
om = settings.output_module or self._system_default_output_module
|
||||
self.ssip_client.set_output_module(om)
|
||||
self._ssip_client.set_output_module(om)
|
||||
if settings.voice_name:
|
||||
self.ssip_client.set_synthesis_voice(settings.voice_name)
|
||||
self._ssip_client.set_synthesis_voice(settings.voice_name)
|
||||
self._current_settings = settings
|
||||
return True
|
||||
|
||||
def _get_all_voices_for_all_output_modules(self) -> dict[str, Voice]:
|
||||
ans = {}
|
||||
def v(x) -> Voice:
|
||||
name, langcode, variant = x
|
||||
return Voice(name, canonicalize_lang(langcode) or 'und', human_name=name, notes=variant)
|
||||
|
||||
if self._ensure_state():
|
||||
om = self._ssip_client.get_output_module()
|
||||
for omq in self._ssip_client.list_output_modules():
|
||||
self._ssip_client.set_output_module(omq)
|
||||
ans[omq] = tuple(map(v, self._ssip_client.list_synthesis_voices()))
|
||||
self._ssip_client.set_output_module(om)
|
||||
return ans
|
||||
|
||||
def _update_status(self, callback_type, index_mark=None):
|
||||
event = None
|
||||
if callback_type is CallbackType.INDEX_MARK:
|
||||
mark = int(index_mark)
|
||||
self._last_mark = mark
|
||||
self.saying.emit(mark, mark)
|
||||
elif callback_type is CallbackType.BEGIN:
|
||||
self._status = {'synthesizing': True, 'paused': False}
|
||||
self._set_state(QTextToSpeech.State.Speaking)
|
||||
self._next_begin_is_for_resume = False
|
||||
elif callback_type is CallbackType.END:
|
||||
self._status = {'synthesizing': False, 'paused': False}
|
||||
self._set_state(QTextToSpeech.State.Ready)
|
||||
elif callback_type is CallbackType.CANCEL:
|
||||
if self._next_cancel_is_for_pause:
|
||||
self._status = {'synthesizing': True, 'paused': True}
|
||||
self._set_state(QTextToSpeech.State.Paused)
|
||||
else:
|
||||
self._status = {'synthesizing': False, 'paused': False}
|
||||
self._set_state(QTextToSpeech.State.Ready)
|
||||
self._next_cancel_is_for_pause = False
|
||||
return event
|
||||
|
||||
def _speak_callback(self, callback_type: CallbackType, index_mark=None):
|
||||
self._event_signal.emit(callback_type, index_mark)
|
||||
|
||||
def _speak(self, text: str) -> None:
|
||||
if self._ensure_state():
|
||||
self._ssip_client.speak(wrap_in_ssml(text), self._speak_callback)
|
||||
|
@ -8,7 +8,8 @@ from typing import Literal, NamedTuple
|
||||
|
||||
from qt.core import QLocale, QObject, QTextToSpeech, QVoice
|
||||
|
||||
from calibre.constants import islinux
|
||||
from calibre.constants import islinux, iswindows
|
||||
from calibre.utils.config_base import tweaks
|
||||
from calibre.utils.localization import canonicalize_lang
|
||||
|
||||
|
||||
@ -74,10 +75,10 @@ def available_engines() -> dict[str, EngineMetadata]:
|
||||
|
||||
def qt_engine_metadata(name: str, allows_choosing_audio_device: bool = False) -> EngineMetadata:
|
||||
e.setEngine(name)
|
||||
cap = e.engineCapabilities()
|
||||
return EngineMetadata(
|
||||
name, TrackingCapability.WordByWord if cap & QTextToSpeech.Capability.WordByWordProgress else TrackingCapability.NoTracking,
|
||||
allows_choosing_audio_device, cap & QTextToSpeech.Capability.Synthesize)
|
||||
cap = int(e.engineCapabilities().value)
|
||||
return EngineMetadata(name,
|
||||
TrackingCapability.WordByWord if cap & int(QTextToSpeech.Capability.WordByWordProgress.value) else TrackingCapability.NoTracking,
|
||||
allows_choosing_audio_device, bool(cap & int(QTextToSpeech.Capability.Synthesize.value)))
|
||||
|
||||
for x in QTextToSpeech.availableEngines():
|
||||
if x == 'winrt':
|
||||
@ -102,7 +103,10 @@ def available_engines() -> dict[str, EngineMetadata]:
|
||||
|
||||
|
||||
def create_tts_backend(engine_name: str = '', settings: EngineSpecificSettings = EngineSpecificSettings(), parent: QObject|None = None):
|
||||
if engine_name == '' and islinux:
|
||||
if engine_name == '':
|
||||
if iswindows and tweaks.get('prefer_winsapi'):
|
||||
engine_name = 'sapi'
|
||||
elif islinux:
|
||||
engine_name = 'speechd'
|
||||
if engine_name not in available_engines():
|
||||
engine_name = ''
|
||||
@ -147,6 +151,7 @@ def develop(engine_name=''):
|
||||
|
||||
def state_changed(state):
|
||||
nonlocal speech_started
|
||||
print('State changed:', state)
|
||||
if state == QTextToSpeech.State.Speaking:
|
||||
speech_started = True
|
||||
elif state == QTextToSpeech.State.Error:
|
||||
|
Loading…
x
Reference in New Issue
Block a user