From 640193a52fa05cdefc667dd6be41f06e2eb3a10f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 25 Aug 2024 21:35:51 +0530 Subject: [PATCH] Start work on speechd backend --- src/calibre/gui2/tts2/qt.py | 61 +++--------------- src/calibre/gui2/tts2/speechd.py | 107 +++++++++++++++++++++++++++++++ src/calibre/gui2/tts2/types.py | 74 +++++++++++++++++++-- 3 files changed, 185 insertions(+), 57 deletions(-) create mode 100644 src/calibre/gui2/tts2/speechd.py diff --git a/src/calibre/gui2/tts2/qt.py b/src/calibre/gui2/tts2/qt.py index 3a41c99081..6f55a0e91d 100644 --- a/src/calibre/gui2/tts2/qt.py +++ b/src/calibre/gui2/tts2/qt.py @@ -1,13 +1,11 @@ #!/usr/bin/env python # License: GPLv3 Copyright: 2024, Kovid Goyal -import sys from typing import NamedTuple from qt.core import QMediaDevices, QObject, QTextToSpeech, pyqtSignal -from calibre.constants import islinux -from calibre.gui2.tts2.types import EngineSpecificSettings +from calibre.gui2.tts2.types import EngineSpecificSettings, Voice, qvoice_to_voice class Pos(NamedTuple): @@ -58,8 +56,15 @@ class QtTTSBackend(QObject): def __init__(self, engine_name: str = '', settings: EngineSpecificSettings = EngineSpecificSettings(), parent: QObject|None = None): super().__init__(parent) self.tracker = Tracker() + self._voices = None self.apply_settings(engine_name, settings) + @property + def available_voices(self) -> dict[str, tuple[Voice, ...]]: + if self._voices is None: + self._voices = tuple(map(qvoice_to_voice, self.tts.availableVoices())) + return {'': self._voices} + def apply_settings(self, engine_name: str, settings: EngineSpecificSettings) -> None: s = {} if settings.audio_device_id: @@ -108,53 +113,3 @@ class QtTTSBackend(QObject): x = self.tracker.mark_word(start, length) if x is not None: self.saying.emit(x[0], x[1]) - - -def develop(): - # {{{ - marked_text = [2, 'Demonstration', ' ', 16, 'of', ' ', 19, 'DOCX', ' ', 24, 'support', ' ', 32, 'in', ' ', 35, 'calibre', '\n\t', 44, 'This', ' ', 49, 'document', ' ', 58, 'demonstrates', ' ', 71, 'the', ' ', 75, 'ability', ' ', 83, 'of', ' ', 86, 'the', ' ', 90, 'calibre', ' ', 98, 'DOCX', ' ', 103, 'Input', ' ', 109, 'plugin', ' ', 116, 'to', ' ', 119, 'convert', ' ', 127, 'the', ' ', 131, 'various', ' ', 139, 'typographic', ' ', 151, 'features', ' ', 160, 'in', ' ', 163, 'a', ' ', 165, 'Microsoft', ' ', 175, 'Word', ' ', 180, '(2007', ' ', 186, 'and', ' ', 190, 'newer)', ' ', 197, 'document.', ' ', 207, 'Convert', ' ', 215, 'this', ' ', 220, 'document', ' ', 229, 'to', ' ', 232, 'a', ' ', 234, 'modern', ' ', 241, 'ebook', ' ', 247, 'format,', ' ', 255, 'such', ' ', 260, 'as', ' ', 263, 'AZW3', ' ', 268, 'for', ' ', 272, 'Kindles', ' ', 280, 'or', ' ', 283, 'EPUB', ' ', 288, 'for', ' ', 292, 'other', ' ', 298, 'ebook', ' ', 304, 'readers,', ' ', 313, 'to', ' ', 316, 'see', ' ', 320, 'it', ' ', 323, 'in', ' ', 326, 'action.', '\n\t', 335, 'There', ' ', 341, 'is', ' ', 344, 'support', ' ', 352, 'for', ' ', 356, 'images,', ' ', 364, 'tables,', ' ', 372, 'lists,', ' ', 379, 'footnotes,', ' ', 390, 'endnotes,', ' ', 400, 'links,', ' ', 407, 'dropcaps', ' ', 416, 'and', ' ', 420, 'various', ' ', 428, 'types', ' ', 434, 'of', ' ', 437, 'text', ' ', 442, 'and', ' ', 446, 'paragraph', ' ', 456, 'level', ' ', 462, 'formatting.', '\n\t', 475, 'To', ' ', 478, 'see', ' ', 482, 'the', ' ', 486, 'DOCX', ' ', 491, 'conversion', ' ', 502, 'in', ' ', 505, 'action,', ' ', 513, 'simply', ' ', 520, 'add', ' ', 524, 'this', ' ', 529, 'file', ' ', 534, 'to', ' ', 537, 'calibre', ' ', 545, 'using', ' ', 551, 'the', ' ', 555, '“Add', ' ', 560, 'Books”', ' ', 567, 'button', ' ', 574, 'and', ' ', 578, 'then', ' ', 583, 'click', ' ', 589, '“Convert”.', ' ', 601, 'Set', ' ', 605, 'the', ' ', 609, 'output', ' ', 616, 'format', ' ', 623, 'in', ' ', 626, 'the', ' ', 630, 'top', ' ', 634, 'right', ' ', 640, 'corner', ' ', 647, 'of', ' ', 650, 'the', ' ', 654, 'conversion', ' ', 665, 'dialog', ' ', 672, 'to', ' ', 675, 'EPUB', ' ', 680, 'or', ' ', 683, 'AZW3', ' ', 688, 'and', ' ', 692, 'click', ' ', 698, '“OK”.', '\n\t\xa0\n\t'] # noqa }}} - - from calibre.gui2 import Application - app = Application([]) - app.shutdown_signal_received.connect(lambda: app.exit(1)) - engine_name = '' - if islinux: - engine_name = 'flite' - tts = QtTTSBackend(engine_name=engine_name) - speech_started = False - - def print_saying(s, e): - bits = [] - in_region = False - for x in marked_text: - if isinstance(x, int): - if in_region: - if x >= e: - break - else: - if x == s: - in_region = True - elif x > e: - break - elif in_region: - bits.append(x) - print('Saying:', repr(''.join(bits))) - - def state_changed(state): - nonlocal speech_started - if state == QTextToSpeech.State.Speaking: - speech_started = True - elif state == QTextToSpeech.State.Error: - print(tts.error_message(), file=sys.stderr) - app.exit(1) - elif state == QTextToSpeech.State.Ready: - if speech_started: - app.quit() - tts.saying.connect(print_saying) - tts.state_changed.connect(state_changed) - tts.speak_marked_text(marked_text) - app.exec() - - -if __name__ == '__main__': - develop() diff --git a/src/calibre/gui2/tts2/speechd.py b/src/calibre/gui2/tts2/speechd.py new file mode 100644 index 0000000000..706579bbfd --- /dev/null +++ b/src/calibre/gui2/tts2/speechd.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python +# License: GPLv3 Copyright: 2024, Kovid Goyal + +from qt.core import QObject, QTextToSpeech, pyqtSignal +from speechd.client import DataMode, Priority, SpawnError, SSIPClient, SSIPCommunicationError + +from calibre.gui2.tts2.types import EngineSpecificSettings, Voice +from calibre.utils.localization import canonicalize_lang + + +class SpeechdTTSBackend(QObject): + + saying = pyqtSignal(int, int) + state_changed = pyqtSignal(QTextToSpeech.State) + + def __init__(self, engine_name: str = '', settings: EngineSpecificSettings = EngineSpecificSettings(), parent: QObject|None = None): + super().__init__(parent) + self._last_error = '' + self._state = QTextToSpeech.State.Ready + self._voices = None + self._system_default_output_module = None + self.ssip_client: SSIPClient | None = None + self.apply_settings(engine_name, settings) + + @property + def available_voices(self) -> dict[str, tuple[Voice, ...]]: + if self._voices is None: + def v(x) -> Voice: + name, langcode, variant = x + return Voice(name, canonicalize_lang(langcode) or 'und', human_name=name, notes=variant) + + if self._ensure_state(): + ans = {} + try: + om = self.ssip_client.get_output_module() + for omq in self.ssip_client.list_output_modules(): + self.ssip_client.set_output_module(omq) + ans[omq] = tuple(map(v, self.ssip_client.list_synthesis_voices())) + self.ssip_client.set_output_module(om) + self._voices = ans + except Exception as e: + self._set_error(str(e)) + return self._voices or {} + + def apply_settings(self, engine_name: str, settings: EngineSpecificSettings) -> None: + try: + self._apply_settings(settings) + except Exception as err: + self._set_error(str(err)) + + def _set_error(self, msg: str) -> None: + self._last_error = msg + self._set_state(QTextToSpeech.Error) + + def _create_ssip_client(self) -> bool: + try: + self.ssip_client = SSIPClient('calibre') + self.ssip_client.set_priority(Priority.TEXT) + return True + except SSIPCommunicationError as err: + ex = err.additional_exception() + if isinstance(ex, SpawnError): + self._set_error(_('Could not find speech-dispatcher on your system. Please install it.')) + else: + self._set_error(str(err)) + except SpawnError: + self._set_error(_('Could not find speech-dispatcher on your system. Please install it.')) + except Exception as err: + self._set_error(str(err)) + return False + + def _ensure_state(self) -> bool: + if self.ssip_client is None: + if not self.create_ssip_client(): + return False + if self._system_default_output_module is None: + self._system_default_output_module = self.ssip_client.get_output_module() + if self._system_default_output_module == '(null)': + mods = self.ssip_client.list_output_modules() + if not mods: + self._last_error = _('Speech dispatcher on this system is not configured with any available voices. Install some voices first.') + return False + self._system_default_output_module = mods[0] + self._set_use_ssml(True) + + def _set_use_ssml(self, on: bool) -> bool: + mode = DataMode.SSML if on else DataMode.TEXT + try: + self.ssip_client.set_data_mode(mode) + return True + except SSIPCommunicationError: + self.ssip_client.close() + self.ssip_client = None + self._set_error(_('Failed to set support for SSML to: {}').format(on)) + + def _apply_settings(self, settings: EngineSpecificSettings) -> bool: + if not self._ensure_state(): + return False + self.ssip_client.set_pitch_range(int(max(-1, min(settings.pitch, 1)) * 100)) + self.ssip_client.set_rate(int(max(-1, min(settings.rate, 1)) * 100)) + if settings.volume is not None: + self.ssip_client.set_volume(-100 + int(max(0, min(settings.volume, 1)) * 200)) + om = settings.output_module or self._system_default_output_module + self.ssip_client.set_output_module(om) + if settings.voice_name: + self.ssip_client.set_synthesis_voice(settings.voice_name) + return True diff --git a/src/calibre/gui2/tts2/types.py b/src/calibre/gui2/tts2/types.py index 741eaeafe2..904a77b41a 100644 --- a/src/calibre/gui2/tts2/types.py +++ b/src/calibre/gui2/tts2/types.py @@ -5,8 +5,9 @@ from enum import Enum, auto from functools import lru_cache from typing import Literal, NamedTuple -from qt.core import QLocale, QTextToSpeech, QVoice +from qt.core import QLocale, QObject, QTextToSpeech, QVoice +from calibre.constants import islinux from calibre.utils.localization import canonicalize_lang @@ -21,6 +22,10 @@ class EngineMetadata(NamedTuple): tracking_capability: TrackingCapability = TrackingCapability.NoTracking allows_choosing_audio_device: bool = True can_synthesize_audio_data: bool = True + has_multiple_output_modules: bool = False + can_change_rate: bool = True + can_change_pitch: bool = True + can_change_volume: bool = True class Quality(Enum): @@ -32,8 +37,8 @@ class Quality(Enum): class Voice(NamedTuple): name: str language_code: str - country_code: str + country_code: str = '' human_name: str = '' notes: str = '' gender: QVoice.Gender = QVoice.Gender.Unknown @@ -58,6 +63,7 @@ class EngineSpecificSettings(NamedTuple): rate: float = 0 # -1 to 1 0 is normal speech pitch: float = 0 # -1 to 1 0 is normal speech volume: float | None = None # 0 to 1, None is platform default volume + output_module: str = '' @@ -86,6 +92,66 @@ def available_engines() -> dict[str, EngineMetadata]: elif x == 'flite': ans[x] = qt_engine_metadata(x, True) elif x == 'speechd': - # TODO: Replace this with our own speechd client that supports word tracking - ans[x] = qt_engine_metadata(x) + ans[x] = EngineMetadata(x, TrackingCapability.WordByWord, allows_choosing_audio_device=False, has_multiple_output_modules=True) return ans + + +def create_tts_backend(engine_name: str = '', settings: EngineSpecificSettings = EngineSpecificSettings(), parent: QObject|None = None): + if engine_name == '' and islinux: + engine_name = 'speechd' + if engine_name not in available_engines(): + engine_name = '' + if engine_name == 'speechd': + from calibre.gui2.tts2.speechd import SpeechdTTSBackend + return SpeechdTTSBackend(engine_name, settings, parent) + from calibre.gui2.tts2.qt import QtTTSBackend + return QtTTSBackend(engine_name, settings, parent) + + +def develop(engine_name=''): + # {{{ + marked_text = [2, 'Demonstration', ' ', 16, 'of', ' ', 19, 'DOCX', ' ', 24, 'support', ' ', 32, 'in', ' ', 35, 'calibre', '\n\t', 44, 'This', ' ', 49, 'document', ' ', 58, 'demonstrates', ' ', 71, 'the', ' ', 75, 'ability', ' ', 83, 'of', ' ', 86, 'the', ' ', 90, 'calibre', ' ', 98, 'DOCX', ' ', 103, 'Input', ' ', 109, 'plugin', ' ', 116, 'to', ' ', 119, 'convert', ' ', 127, 'the', ' ', 131, 'various', ' ', 139, 'typographic', ' ', 151, 'features', ' ', 160, 'in', ' ', 163, 'a', ' ', 165, 'Microsoft', ' ', 175, 'Word', ' ', 180, '(2007', ' ', 186, 'and', ' ', 190, 'newer)', ' ', 197, 'document.', ' ', 207, 'Convert', ' ', 215, 'this', ' ', 220, 'document', ' ', 229, 'to', ' ', 232, 'a', ' ', 234, 'modern', ' ', 241, 'ebook', ' ', 247, 'format,', ' ', 255, 'such', ' ', 260, 'as', ' ', 263, 'AZW3', ' ', 268, 'for', ' ', 272, 'Kindles', ' ', 280, 'or', ' ', 283, 'EPUB', ' ', 288, 'for', ' ', 292, 'other', ' ', 298, 'ebook', ' ', 304, 'readers,', ' ', 313, 'to', ' ', 316, 'see', ' ', 320, 'it', ' ', 323, 'in', ' ', 326, 'action.', '\n\t', 335, 'There', ' ', 341, 'is', ' ', 344, 'support', ' ', 352, 'for', ' ', 356, 'images,', ' ', 364, 'tables,', ' ', 372, 'lists,', ' ', 379, 'footnotes,', ' ', 390, 'endnotes,', ' ', 400, 'links,', ' ', 407, 'dropcaps', ' ', 416, 'and', ' ', 420, 'various', ' ', 428, 'types', ' ', 434, 'of', ' ', 437, 'text', ' ', 442, 'and', ' ', 446, 'paragraph', ' ', 456, 'level', ' ', 462, 'formatting.', '\n\t', 475, 'To', ' ', 478, 'see', ' ', 482, 'the', ' ', 486, 'DOCX', ' ', 491, 'conversion', ' ', 502, 'in', ' ', 505, 'action,', ' ', 513, 'simply', ' ', 520, 'add', ' ', 524, 'this', ' ', 529, 'file', ' ', 534, 'to', ' ', 537, 'calibre', ' ', 545, 'using', ' ', 551, 'the', ' ', 555, '“Add', ' ', 560, 'Books”', ' ', 567, 'button', ' ', 574, 'and', ' ', 578, 'then', ' ', 583, 'click', ' ', 589, '“Convert”.', ' ', 601, 'Set', ' ', 605, 'the', ' ', 609, 'output', ' ', 616, 'format', ' ', 623, 'in', ' ', 626, 'the', ' ', 630, 'top', ' ', 634, 'right', ' ', 640, 'corner', ' ', 647, 'of', ' ', 650, 'the', ' ', 654, 'conversion', ' ', 665, 'dialog', ' ', 672, 'to', ' ', 675, 'EPUB', ' ', 680, 'or', ' ', 683, 'AZW3', ' ', 688, 'and', ' ', 692, 'click', ' ', 698, '“OK”.', '\n\t\xa0\n\t'] # noqa }}} + + from calibre.gui2 import Application + app = Application([]) + app.shutdown_signal_received.connect(lambda: app.exit(1)) + tts = create_tts_backend(engine_name=engine_name) + speech_started = False + + def print_saying(s, e): + bits = [] + in_region = False + for x in marked_text: + if isinstance(x, int): + if in_region: + if x >= e: + break + else: + if x == s: + in_region = True + elif x > e: + break + elif in_region: + bits.append(x) + print('Saying:', repr(''.join(bits))) + + import sys + + def state_changed(state): + nonlocal speech_started + if state == QTextToSpeech.State.Speaking: + speech_started = True + elif state == QTextToSpeech.State.Error: + print(tts.error_message(), file=sys.stderr) + app.exit(1) + elif state == QTextToSpeech.State.Ready: + if speech_started: + app.quit() + tts.saying.connect(print_saying) + tts.state_changed.connect(state_changed) + tts.speak_marked_text(marked_text) + app.exec() + + +if __name__ == '__main__': + develop()