mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 10:44:09 -04:00
Get the speechd backend working
This commit is contained in:
parent
75d3714b41
commit
2ecab362ad
@ -255,7 +255,7 @@ class EngineSpecificConfig(QWidget):
|
|||||||
metadata = available_engines()[self.engine_name]
|
metadata = available_engines()[self.engine_name]
|
||||||
output_module = self.output_module.currentData() or ''
|
output_module = self.output_module.currentData() or ''
|
||||||
if metadata.has_multiple_output_modules:
|
if metadata.has_multiple_output_modules:
|
||||||
output_module = output_module or self.default_output_modules[self.engine_name].default_output_module
|
output_module = output_module or self.default_output_modules[self.engine_name]
|
||||||
all_voices = self.voice_data[self.engine_name][output_module]
|
all_voices = self.voice_data[self.engine_name][output_module]
|
||||||
self.voices.set_voices(all_voices, s.voice_name, metadata)
|
self.voices.set_voices(all_voices, s.voice_name, metadata)
|
||||||
|
|
||||||
|
@ -1,34 +1,33 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# License: GPLv3 Copyright: 2024, Kovid Goyal <kovid at kovidgoyal.net>
|
# License: GPLv3 Copyright: 2024, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
|
||||||
from contextlib import suppress
|
import atexit
|
||||||
|
|
||||||
from qt.core import QObject, Qt, QTextToSpeech, pyqtSignal
|
from qt.core import QObject, Qt, QTextToSpeech, pyqtSignal
|
||||||
from speechd.client import CallbackType, DataMode, Priority, SpawnError, SSIPClient, SSIPCommunicationError
|
from speechd.client import CallbackType, DataMode, Priority, SpawnError, SSIPClient, SSIPCommunicationError
|
||||||
|
|
||||||
from calibre import prepare_string_for_xml
|
from calibre import prepare_string_for_xml
|
||||||
from calibre.gui2.tts2.types import EngineSpecificSettings, Voice
|
from calibre.gui2.tts2.types import EngineSpecificSettings, TTSBackend, Voice
|
||||||
|
from calibre.spell.break_iterator import split_into_words_and_positions
|
||||||
from calibre.utils.localization import canonicalize_lang
|
from calibre.utils.localization import canonicalize_lang
|
||||||
|
|
||||||
MARK_TEMPLATE = '<mark name="{}"/>'
|
MARK_TEMPLATE = '<mark name="{}"/>'
|
||||||
|
|
||||||
def add_markup(text_parts, mark_template=MARK_TEMPLATE, escape_marked_text=prepare_string_for_xml, chunk_size=0):
|
|
||||||
buf = []
|
def mark_words(text: str, lang: str) -> str:
|
||||||
size = 0
|
ans = []
|
||||||
for x in text_parts:
|
pos = 0
|
||||||
if isinstance(x, int):
|
|
||||||
item = mark_template.format(x)
|
def a(x):
|
||||||
else:
|
ans.append(prepare_string_for_xml(x))
|
||||||
item = escape_marked_text(x)
|
|
||||||
sz = len(item)
|
for offset, sz in split_into_words_and_positions(text, lang):
|
||||||
if chunk_size and size + sz > chunk_size:
|
if offset > pos:
|
||||||
yield ''.join(buf).strip()
|
a(text[pos:offset])
|
||||||
size = 0
|
ans.append(MARK_TEMPLATE.format(f'{offset}:{sz}'))
|
||||||
buf = []
|
a(text[offset:offset+sz])
|
||||||
size += sz
|
pos = offset + sz
|
||||||
buf.append(item)
|
return ''.join(ans)
|
||||||
if size:
|
|
||||||
yield ''.join(buf).strip()
|
|
||||||
|
|
||||||
|
|
||||||
def wrap_in_ssml(text):
|
def wrap_in_ssml(text):
|
||||||
@ -36,10 +35,11 @@ def wrap_in_ssml(text):
|
|||||||
text + '</s></speak>')
|
text + '</s></speak>')
|
||||||
|
|
||||||
|
|
||||||
class SpeechdTTSBackend(QObject):
|
class SpeechdTTSBackend(TTSBackend):
|
||||||
|
|
||||||
saying = pyqtSignal(int, int)
|
saying = pyqtSignal(int, int)
|
||||||
state_changed = pyqtSignal(QTextToSpeech.State)
|
state_changed = pyqtSignal(QTextToSpeech.State)
|
||||||
|
engine_name = 'speechd'
|
||||||
|
|
||||||
_event_signal = pyqtSignal(object, object)
|
_event_signal = pyqtSignal(object, object)
|
||||||
|
|
||||||
@ -49,13 +49,14 @@ class SpeechdTTSBackend(QObject):
|
|||||||
self._state = QTextToSpeech.State.Ready
|
self._state = QTextToSpeech.State.Ready
|
||||||
self._voices = None
|
self._voices = None
|
||||||
self._system_default_output_module = None
|
self._system_default_output_module = None
|
||||||
self._current_settings = EngineSpecificSettings()
|
|
||||||
self._status = {'synthesizing': False, 'paused': False}
|
self._status = {'synthesizing': False, 'paused': False}
|
||||||
self._next_begin_is_for_resume = False
|
|
||||||
self._ssip_client: SSIPClient | None = None
|
self._ssip_client: SSIPClient | None = None
|
||||||
|
self._voice_lang = 'en'
|
||||||
|
self._last_mark = self._last_text = ''
|
||||||
|
self._next_cancel_is_for_pause = False
|
||||||
self._event_signal.connect(self._update_status, type=Qt.ConnectionType.QueuedConnection)
|
self._event_signal.connect(self._update_status, type=Qt.ConnectionType.QueuedConnection)
|
||||||
self._current_marked_text = self._last_mark = None
|
self._apply_settings(EngineSpecificSettings.create_from_config(self.engine_name))
|
||||||
self._apply_settings(EngineSpecificSettings.create_from_config(engine_name))
|
atexit.register(self.shutdown)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def default_output_module(self) -> str:
|
def default_output_module(self) -> str:
|
||||||
@ -72,52 +73,51 @@ class SpeechdTTSBackend(QObject):
|
|||||||
self._set_error(str(e))
|
self._set_error(str(e))
|
||||||
return self._voices or {}
|
return self._voices or {}
|
||||||
|
|
||||||
@property
|
|
||||||
def engine_name(self) -> str:
|
|
||||||
return 'speechd'
|
|
||||||
|
|
||||||
def change_rate(self, steps: int = 1) -> bool:
|
|
||||||
current = self._current_settings.rate
|
|
||||||
new_rate = max(-1, min(current + 0.2 * steps, 1))
|
|
||||||
if current == new_rate:
|
|
||||||
return False
|
|
||||||
try:
|
|
||||||
self._ssip_client.set_rate(int(max(-1, min(new_rate, 1)) * 100))
|
|
||||||
except Exception as e:
|
|
||||||
self._set_error(str(e))
|
|
||||||
return False
|
|
||||||
self._current_settings = self._current_settings._replace(rate=new_rate)
|
|
||||||
self._current_settings.save_to_config()
|
|
||||||
return True
|
|
||||||
|
|
||||||
def stop(self) -> None:
|
def stop(self) -> None:
|
||||||
self._current_marked_text = self._last_mark = None
|
self._last_mark = self._last_text = ''
|
||||||
self._next_cancel_is_for_pause = self._next_begin_is_for_resume = False
|
|
||||||
if self._ssip_client is not None:
|
if self._ssip_client is not None:
|
||||||
try:
|
try:
|
||||||
self._ssip_client.stop()
|
self._ssip_client.stop()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self._set_error(str(e))
|
self._set_error(str(e))
|
||||||
|
|
||||||
def speak_simple_text(self, text: str) -> None:
|
def say(self, text: str) -> None:
|
||||||
self.stop()
|
self.stop()
|
||||||
self._current_marked_text = self._last_mark = None
|
self._speak(mark_words(text, self._voice_lang))
|
||||||
self._speak(prepare_string_for_xml(text))
|
|
||||||
|
|
||||||
def speak_marked_text(self, marked_text: list[str | int]) -> None:
|
def error_message(self) -> str:
|
||||||
self.stop()
|
return self._last_error
|
||||||
text = ''.join(add_markup(marked_text))
|
|
||||||
self._current_marked_text = text
|
def pause(self) -> None:
|
||||||
self._last_mark = None
|
if self._ssip_client is not None and self._status['synthesizing'] and not self._status['paused']:
|
||||||
|
try:
|
||||||
|
self._ssip_client.stop()
|
||||||
|
self._next_cancel_is_for_pause = True
|
||||||
|
except Exception as e:
|
||||||
|
self._set_error(str(e))
|
||||||
|
|
||||||
|
def resume(self) -> None:
|
||||||
|
if self._ssip_client is not None and self._status['synthesizing'] and self._status['paused']:
|
||||||
|
text = self._last_text
|
||||||
|
idx = text.find(self._last_mark)
|
||||||
|
if idx > -1:
|
||||||
|
text = text[idx:]
|
||||||
self._speak(text)
|
self._speak(text)
|
||||||
|
|
||||||
def __del__(self):
|
def reload_after_configure(self) -> None:
|
||||||
|
self._apply_settings(EngineSpecificSettings.create_from_config(self.engine_name))
|
||||||
|
|
||||||
|
def shutdown(self):
|
||||||
if self._ssip_client is not None:
|
if self._ssip_client is not None:
|
||||||
with suppress(Exception):
|
try:
|
||||||
self._ssip_client.cancel()
|
self._ssip_client.cancel()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
self._ssip_client.close()
|
self._ssip_client.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
self._ssip_client = None
|
self._ssip_client = None
|
||||||
shutdown = __del__
|
|
||||||
|
|
||||||
def _set_state(self, s: QTextToSpeech.State) -> None:
|
def _set_state(self, s: QTextToSpeech.State) -> None:
|
||||||
self._state = s
|
self._state = s
|
||||||
@ -174,15 +174,20 @@ class SpeechdTTSBackend(QObject):
|
|||||||
if not self._ensure_state():
|
if not self._ensure_state():
|
||||||
return False
|
return False
|
||||||
try:
|
try:
|
||||||
|
om = settings.output_module or self._system_default_output_module
|
||||||
|
self._ssip_client.set_output_module(om)
|
||||||
|
if settings.voice_name:
|
||||||
|
for v in self.available_voices[om]:
|
||||||
|
if v.name == settings.voice_name:
|
||||||
|
self._voice_lang = v.language_code
|
||||||
|
break
|
||||||
|
self._ssip_client.set_synthesis_voice(settings.voice_name)
|
||||||
|
else:
|
||||||
|
self._voice_lang = self.available_voices[om][0].language_code
|
||||||
self._ssip_client.set_pitch_range(int(max(-1, min(settings.pitch, 1)) * 100))
|
self._ssip_client.set_pitch_range(int(max(-1, min(settings.pitch, 1)) * 100))
|
||||||
self._ssip_client.set_rate(int(max(-1, min(settings.rate, 1)) * 100))
|
self._ssip_client.set_rate(int(max(-1, min(settings.rate, 1)) * 100))
|
||||||
if settings.volume is not None:
|
if settings.volume is not None:
|
||||||
self._ssip_client.set_volume(-100 + int(max(0, min(settings.volume, 1)) * 200))
|
self._ssip_client.set_volume(-100 + int(max(0, min(settings.volume, 1)) * 200))
|
||||||
om = settings.output_module or self._system_default_output_module
|
|
||||||
self._ssip_client.set_output_module(om)
|
|
||||||
if settings.voice_name:
|
|
||||||
self._ssip_client.set_synthesis_voice(settings.voice_name)
|
|
||||||
self._current_settings = settings
|
|
||||||
return True
|
return True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self._set_error(str(e))
|
self._set_error(str(e))
|
||||||
@ -205,13 +210,12 @@ class SpeechdTTSBackend(QObject):
|
|||||||
def _update_status(self, callback_type, index_mark=None):
|
def _update_status(self, callback_type, index_mark=None):
|
||||||
event = None
|
event = None
|
||||||
if callback_type is CallbackType.INDEX_MARK:
|
if callback_type is CallbackType.INDEX_MARK:
|
||||||
mark = int(index_mark)
|
pos, sep, length = index_mark.partition(':')
|
||||||
self._last_mark = mark
|
self._last_mark = MARK_TEMPLATE.format(index_mark)
|
||||||
self.saying.emit(mark, mark)
|
self.saying.emit(int(pos), int(length))
|
||||||
elif callback_type is CallbackType.BEGIN:
|
elif callback_type is CallbackType.BEGIN:
|
||||||
self._status = {'synthesizing': True, 'paused': False}
|
self._status = {'synthesizing': True, 'paused': False}
|
||||||
self._set_state(QTextToSpeech.State.Speaking)
|
self._set_state(QTextToSpeech.State.Speaking)
|
||||||
self._next_begin_is_for_resume = False
|
|
||||||
elif callback_type is CallbackType.END:
|
elif callback_type is CallbackType.END:
|
||||||
self._status = {'synthesizing': False, 'paused': False}
|
self._status = {'synthesizing': False, 'paused': False}
|
||||||
self._set_state(QTextToSpeech.State.Ready)
|
self._set_state(QTextToSpeech.State.Ready)
|
||||||
@ -219,10 +223,10 @@ class SpeechdTTSBackend(QObject):
|
|||||||
if self._next_cancel_is_for_pause:
|
if self._next_cancel_is_for_pause:
|
||||||
self._status = {'synthesizing': True, 'paused': True}
|
self._status = {'synthesizing': True, 'paused': True}
|
||||||
self._set_state(QTextToSpeech.State.Paused)
|
self._set_state(QTextToSpeech.State.Paused)
|
||||||
|
self._next_cancel_is_for_pause = False
|
||||||
else:
|
else:
|
||||||
self._status = {'synthesizing': False, 'paused': False}
|
self._status = {'synthesizing': False, 'paused': False}
|
||||||
self._set_state(QTextToSpeech.State.Ready)
|
self._set_state(QTextToSpeech.State.Ready)
|
||||||
self._next_cancel_is_for_pause = False
|
|
||||||
return event
|
return event
|
||||||
|
|
||||||
def _speak_callback(self, callback_type: CallbackType, index_mark=None):
|
def _speak_callback(self, callback_type: CallbackType, index_mark=None):
|
||||||
@ -230,4 +234,9 @@ class SpeechdTTSBackend(QObject):
|
|||||||
|
|
||||||
def _speak(self, text: str) -> None:
|
def _speak(self, text: str) -> None:
|
||||||
if self._ensure_state():
|
if self._ensure_state():
|
||||||
|
self._last_text = text
|
||||||
|
self._last_mark = ''
|
||||||
|
try:
|
||||||
self._ssip_client.speak(wrap_in_ssml(text), self._speak_callback)
|
self._ssip_client.speak(wrap_in_ssml(text), self._speak_callback)
|
||||||
|
except Exception as e:
|
||||||
|
self._set_error(str(e))
|
||||||
|
@ -180,6 +180,8 @@ def default_engine_name() -> str:
|
|||||||
return 'sapi' if tweaks.get('prefer_winsapi') else 'winrt'
|
return 'sapi' if tweaks.get('prefer_winsapi') else 'winrt'
|
||||||
if ismacos:
|
if ismacos:
|
||||||
return 'darwin'
|
return 'darwin'
|
||||||
|
if 'flite' in available_engines():
|
||||||
|
return 'flite'
|
||||||
return 'speechd'
|
return 'speechd'
|
||||||
|
|
||||||
|
|
||||||
@ -208,7 +210,7 @@ class TTSBackend(QObject):
|
|||||||
def error_message(self) -> str:
|
def error_message(self) -> str:
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
def reload_after_configure(self) -> str:
|
def reload_after_configure(self) -> None:
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
|
||||||
@ -216,6 +218,8 @@ engine_instances: dict[str, TTSBackend] = {}
|
|||||||
|
|
||||||
|
|
||||||
def create_tts_backend(force_engine: str | None = None) -> TTSBackend:
|
def create_tts_backend(force_engine: str | None = None) -> TTSBackend:
|
||||||
|
if not available_engines():
|
||||||
|
raise OSError('There are no available TTS engines. Install a TTS engine before trying to use Read Aloud, such as flite or speech-dispatcher')
|
||||||
prefs = load_config()
|
prefs = load_config()
|
||||||
engine_name = prefs.get('engine', '') if force_engine is None else force_engine
|
engine_name = prefs.get('engine', '') if force_engine is None else force_engine
|
||||||
engine_name = engine_name or default_engine_name()
|
engine_name = engine_name or default_engine_name()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user