diff --git a/src/calibre/gui2/tts2/config.py b/src/calibre/gui2/tts2/config.py new file mode 100644 index 0000000000..239a44a485 --- /dev/null +++ b/src/calibre/gui2/tts2/config.py @@ -0,0 +1,225 @@ +#!/usr/bin/env python +# License: GPLv3 Copyright: 2024, Kovid Goyal + + +from qt.core import QCheckBox, QFormLayout, QLabel, QLocale, QSize, QSlider, Qt, QTreeWidget, QTreeWidgetItem, QVBoxLayout, QWidget, pyqtSignal + +from calibre.gui2.tts2.types import ( + EngineMetadata, + EngineSpecificSettings, + TrackingCapability, + Voice, + available_engines, + create_tts_backend, + default_engine_name, + load_config, +) +from calibre.gui2.widgets2 import Dialog, QComboBox + + +class EngineChoice(QWidget): + + changed = pyqtSignal(str) + + def __init__(self, parent): + super().__init__(parent) + self.l = l = QFormLayout(self) + self.engine_choice = ec = QComboBox(self) + l.addRow(_('Text-to-Speech &engine:'), ec) + configured_engine_name = load_config().get('engine', '') + l.addItem(_('Automatically select (currently {})').format(default_engine_name()), '') + for engine_name in available_engines(): + l.addItem(engine_name) + idx = ec.findData(configured_engine_name) + if idx > -1: + ec.setCurrentIndex(idx) + self.engine_description = la = QLabel(self) + la.setWordWrap(True) + l.addWidget(la) + ec.currentIndexChanged.connect(self.current_changed) + self.update_description() + + @property + def value(self) -> str: + return self.engine_choice.currentData() + + def current_changed(self): + self.changed.emit(self.value) + self.update_description(self) + + def update_description(self): + engine = self.value or default_engine_name() + metadata = available_engines()[engine] + if metadata.tracking_capability is TrackingCapability.NoTracking: + text = _('The {} engine does not highlight words on the screen as they are spoken') + elif metadata.tracking_capability is TrackingCapability.WordByWord: + text = _('The {} engine highlights words on the screen as they are spoken') + else: + text = _('The {} engine highlights sentences on the screen as they are spoken') + self.engine_description.setText(text) + + +class FloatSlider(QSlider): + + def __init__(self, minimum: float = -1, maximum: float = 1, factor: int = 10, parent=None): + QSlider.__init__(parent) + self.setRange(int(minimum * factor), int(maximum * factor)) + self.setSingleStep(int((self.maximum() - self.minimum()) / (2 * factor))) + self.setPageStep(5 * self.singleStep()) + self.setTicksPosition(QSlider.TickPosition.TicksBelow) + if maximum - minimum >= 2: + self.setTickInterval((self.maximum() - self.minimum()) // 2) + else: + self.setTickInterval(self.maximum() - self.minimum()) + self.factor = factor + + @property + def val(self) -> float: + return self.value() / self.factor + + @val.setter + def val(self, v) -> None: + self.setValue(int(v * self.factor)) + + +class Volume(QWidget): + + def __init__(self, parent=None): + super().__init__(parent) + self.l = l = QFormLayout(self) + self.system = e = QCheckBox(_('Use system default volume'), self) + l.addWidget(e) + self.vol = v = FloatSlider(minimum=0, parent=self) + l.addRow(_('&Volume of speech'), v) + self.e.toggled.connect(self.update_state) + self.update_state() + + def update_state(self): + self.vol.setEnabled(not self.system.isChecked()) + + @property + def val(self): + if self.system.isChecked(): + return None + return self.vol.val + + @val.setter + def val(self, v): + self.system.setChecked(v is None) + if v is not None: + self.vol.val = v + + +class Voices(QTreeWidget): + + def __init__(self, parent=None): + super().__init__(parent) + self.system_default_voice = Voice() + + def sizeHint(self) -> QSize: + return QSize(400, 600) + + def set_voices(self, all_voices: tuple[Voice, ...], current_voice: str, engine_metadata: EngineMetadata) -> None: + self.clear() + def qv(parent, voice): + ans = QTreeWidgetItem(parent, voice.short_text) + ans.setData(0, Qt.ItemDataRole.UserRole, voice) + return ans + qv(self.invisibleRootItem(), self.system_default_voice) + vmap = {} + for v in all_voices: + vmap.setdefault(v.language_code, []).append(v) + for vs in vmap.values(): + vs.sort(key=lambda v: v.sort_key()) + parent_map = {} + def lang(langcode): + return QLocale.languageToString(QLocale.codeToLanguage(langcode)) + + for langcode in sorted(vmap, key=lambda lc: lang(lc).lower()): + parent = parent_map.get(langcode) + if parent is None: + parent_map[langcode] = parent = QTreeWidgetItem(self.invisibleRootItem(), lang(langcode)) + for voice in vmap[langcode]: + qv(parent, voice) + + +class EngineSpecificConfig(QWidget): + + def __init__(self, parent): + super().__init__(parent) + self.l = l = QFormLayout(self) + self.output_module = om = QComboBox(self) + l.addRow(_('&Output module:'), om) + self.engine_name = '' + om.currentIndexChanged.connect(self.rebuild_voices) + self.engine_instances = {} + self.voice_data = {} + self.engine_specific_settings = {} + self.rate = r = FloatSlider(parent=self) + l.addRow(_('&Speed of speech:'), r) + self.pitch = p = FloatSlider(parent=self) + l.addRow(_('&Pitch of speech:'), p) + self.volume = v = Volume(self) + l.addWidget(v) + self.voices = v = Voices(self) + la = QLabel(_('V&oices:')) + la.setBuddy(v) + l.addWidget(la) + l.addWidget(v) + + def set_engine(self, engine_name): + self.engine_name = engine_name + metadata = available_engines()[engine_name] + if engine_name not in self.engine_instances: + self.engine_instances[engine_name] = tts = create_tts_backend(force_engine=engine_name) + self.voice_data[engine_name] = tts.available_voices + self.engine_specific_settings[engine_name] = EngineSpecificSettings.create_from_config(engine_name) + else: + tts = self.engine_instances[engine_name] + self.output_module.blockSignals(True) + self.output_module.clear() + if metadata.has_multiple_output_modules and len(self.voice_data[engine_name]) > 1: + self.output_module.setVisible(True) + self.layout().setRowVisible(self.output_module, True) + self.output_module.clear() + self.output_module.addItem(_('System default (currently {})').format(tts.default_output_module), '') + for om in self.voice_data[engine_name]: + self.output_module.addItem(om, om) + if (idx := self.output_module.findData(self.engine_specific_settings[engine_name].output_module)) > -1: + self.output_module.setCurrentIndex(idx) + else: + self.layout().setRowVisible(self.output_module, False) + self.output_module.blockSignals(False) + try: + s = self.engine_specific_settings[self.engine_name] + except KeyError: + return + self.rate.val = s.rate + self.pitch.val = s.pitch + self.layout().setRowVisible(self.pitch, metadata.can_change_pitch) + self.volume.val = s.volume + self.rebuild_voice_table() + + def rebuild_voices(self): + try: + s = self.engine_specific_settings[self.engine_name] + except KeyError: + return + metadata = available_engines()[self.engine_name] + output_module = self.output_module.currentData() + if metadata.has_multiple_output_modules: + output_module = output_module or self.engine_instances.default_output_module + all_voices = self.voice_data[self.engine_name][output_module] + self.voices.set_voices(all_voices, s.voice_name, metadata) + + +class ConfigDialog(Dialog): + + def __init__(self, current_tts_backend, parent=None): + self.current_tts_backend = current_tts_backend + super().__init__(_('Configure Read aloud'), 'configure-read-aloud2', parent=parent) + + def setup_ui(self): + self.l = l = QVBoxLayout(self) + self.engine_choice = ec = EngineChoice(self) + l.addWidget(ec) diff --git a/src/calibre/gui2/tts2/qt.py b/src/calibre/gui2/tts2/qt.py index dc4220449c..868ceaf0ea 100644 --- a/src/calibre/gui2/tts2/qt.py +++ b/src/calibre/gui2/tts2/qt.py @@ -65,6 +65,10 @@ class QtTTSBackend(QObject): self._voices = tuple(map(qvoice_to_voice, self.tts.availableVoices())) return {'': self._voices} + @property + def engine_name(self) -> str: + return self.tts.engine() + def change_rate(self, steps: int = 1) -> bool: current = self.tts.rate() new_rate = max(-1, min(current + 0.2 * steps, 1)) diff --git a/src/calibre/gui2/tts2/speechd.py b/src/calibre/gui2/tts2/speechd.py index 69d96c0ee8..633cab0532 100644 --- a/src/calibre/gui2/tts2/speechd.py +++ b/src/calibre/gui2/tts2/speechd.py @@ -57,6 +57,12 @@ class SpeechdTTSBackend(QObject): self._current_marked_text = self._last_mark = None self._apply_settings(EngineSpecificSettings.create_from_config(engine_name)) + @property + def default_output_module(self) -> str: + if self._ensure_state(): + return self._system_default_output_module + return '' + @property def available_voices(self) -> dict[str, tuple[Voice, ...]]: if self._voices is None: @@ -66,6 +72,10 @@ class SpeechdTTSBackend(QObject): self._set_error(str(e)) return self._voices or {} + @property + def engine_name(self) -> str: + return 'speechd' + def change_rate(self, steps: int = 1) -> bool: current = self._current_settings.rate new_rate = max(-1, min(current + 0.2 * steps, 1)) diff --git a/src/calibre/gui2/tts2/types.py b/src/calibre/gui2/tts2/types.py index f120602df0..4403090e3e 100644 --- a/src/calibre/gui2/tts2/types.py +++ b/src/calibre/gui2/tts2/types.py @@ -9,7 +9,7 @@ from typing import Literal, NamedTuple from qt.core import QLocale, QObject, QTextToSpeech, QVoice -from calibre.constants import islinux, iswindows +from calibre.constants import islinux, ismacos, iswindows from calibre.utils.config import JSONConfig from calibre.utils.config_base import tweaks from calibre.utils.localization import canonicalize_lang @@ -35,6 +35,7 @@ class EngineMetadata(NamedTuple): has_multiple_output_modules: bool = False can_change_pitch: bool = True can_change_volume: bool = True + voices_have_quality_metadata: bool = False class Quality(Enum): @@ -44,16 +45,23 @@ class Quality(Enum): class Voice(NamedTuple): - name: str - language_code: str + name: str = '' + language_code: str = '' country_code: str = '' human_name: str = '' - notes: str = '' + notes: str = '' # variant from speechd voices, or notes from piper voices gender: QVoice.Gender = QVoice.Gender.Unknown age: QVoice.Age = QVoice.Age.Other quality: Quality = Quality.High + @property + def short_text(self) -> str: + return self.human_name or self.name or _('System default voice') + + def sort_key(self) -> tuple[Quality, str]: + return (self.quality, self.short_text.lower()) + def qvoice_to_voice(v: QVoice) -> QVoice: lang = canonicalize_lang(QLocale.languageToCode(v.language())) or 'und' @@ -93,9 +101,10 @@ class EngineSpecificSettings(NamedTuple): volume = None with suppress(Exception): volume = max(0, min(float(prefs.get('volume')), 1)) + om = str(prefs.get('output_module', '')) + voice = str(prefs.get('voice_map', {}).get(om, '')) return EngineSpecificSettings( - voice_name=str(prefs.get('voice_name', '')), - output_module=str(prefs.get('output_module', '')), + voice_name=voice, output_module=om, audio_device_id=audio_device_id, rate=rate, pitch=pitch, volume=volume, engine_name=engine_name) @classmethod @@ -109,7 +118,7 @@ class EngineSpecificSettings(NamedTuple): if self.audio_device_id: ans['audio_device_id'] = {'id': self.audio_device_id.id.hex(), 'description': self.audio_device_id.description} if self.voice_name: - ans['voice_name'] = self.voice_name + ans['voice_map'] = { self.output_module: self.voice_name } if self.rate: ans['rate'] = self.rate if self.pitch: @@ -165,19 +174,27 @@ def available_engines() -> dict[str, EngineMetadata]: return ans -def create_tts_backend(engine_name: str = '', parent: QObject|None = None): - if engine_name == '': - if iswindows and tweaks.get('prefer_winsapi'): - engine_name = 'sapi' - elif islinux: - engine_name = 'speechd' +def default_engine_name() -> str: + if iswindows: + return 'sapi' if tweaks.get('prefer_winsapi') else 'winrt' + if ismacos: + return 'darwin' + return 'speechd' + + +def create_tts_backend(parent: QObject|None = None, force_engine: str | None = None): + prefs = load_config() + engine_name = prefs.get('engine', '') if force_engine is None else force_engine + engine_name = engine_name or default_engine_name() if engine_name not in available_engines(): - engine_name = '' + engine_name = default_engine_name() if engine_name == 'speechd': from calibre.gui2.tts2.speechd import SpeechdTTSBackend ans = SpeechdTTSBackend(engine_name, parent) else: + if engine_name not in available_engines(): + engine_name = '' # let Qt pick the engine from calibre.gui2.tts2.qt import QtTTSBackend ans = QtTTSBackend(engine_name, parent) return ans @@ -190,7 +207,7 @@ def develop(engine_name=''): from calibre.gui2 import Application app = Application([]) app.shutdown_signal_received.connect(lambda: app.exit(1)) - tts = create_tts_backend(engine_name=engine_name) + tts = create_tts_backend(force_engine=engine_name) speech_started = False def print_saying(s, e):