Work on new config dialog for TTS

This commit is contained in:
Kovid Goyal 2024-08-30 08:58:13 +05:30
parent 1ebb8b6574
commit 723bc8b829
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
4 changed files with 271 additions and 15 deletions

View File

@ -0,0 +1,225 @@
#!/usr/bin/env python
# License: GPLv3 Copyright: 2024, Kovid Goyal <kovid at kovidgoyal.net>
from qt.core import QCheckBox, QFormLayout, QLabel, QLocale, QSize, QSlider, Qt, QTreeWidget, QTreeWidgetItem, QVBoxLayout, QWidget, pyqtSignal
from calibre.gui2.tts2.types import (
EngineMetadata,
EngineSpecificSettings,
TrackingCapability,
Voice,
available_engines,
create_tts_backend,
default_engine_name,
load_config,
)
from calibre.gui2.widgets2 import Dialog, QComboBox
class EngineChoice(QWidget):
changed = pyqtSignal(str)
def __init__(self, parent):
super().__init__(parent)
self.l = l = QFormLayout(self)
self.engine_choice = ec = QComboBox(self)
l.addRow(_('Text-to-Speech &engine:'), ec)
configured_engine_name = load_config().get('engine', '')
l.addItem(_('Automatically select (currently {})').format(default_engine_name()), '')
for engine_name in available_engines():
l.addItem(engine_name)
idx = ec.findData(configured_engine_name)
if idx > -1:
ec.setCurrentIndex(idx)
self.engine_description = la = QLabel(self)
la.setWordWrap(True)
l.addWidget(la)
ec.currentIndexChanged.connect(self.current_changed)
self.update_description()
@property
def value(self) -> str:
return self.engine_choice.currentData()
def current_changed(self):
self.changed.emit(self.value)
self.update_description(self)
def update_description(self):
engine = self.value or default_engine_name()
metadata = available_engines()[engine]
if metadata.tracking_capability is TrackingCapability.NoTracking:
text = _('The {} engine does not highlight words on the screen as they are spoken')
elif metadata.tracking_capability is TrackingCapability.WordByWord:
text = _('The {} engine highlights words on the screen as they are spoken')
else:
text = _('The {} engine highlights sentences on the screen as they are spoken')
self.engine_description.setText(text)
class FloatSlider(QSlider):
def __init__(self, minimum: float = -1, maximum: float = 1, factor: int = 10, parent=None):
QSlider.__init__(parent)
self.setRange(int(minimum * factor), int(maximum * factor))
self.setSingleStep(int((self.maximum() - self.minimum()) / (2 * factor)))
self.setPageStep(5 * self.singleStep())
self.setTicksPosition(QSlider.TickPosition.TicksBelow)
if maximum - minimum >= 2:
self.setTickInterval((self.maximum() - self.minimum()) // 2)
else:
self.setTickInterval(self.maximum() - self.minimum())
self.factor = factor
@property
def val(self) -> float:
return self.value() / self.factor
@val.setter
def val(self, v) -> None:
self.setValue(int(v * self.factor))
class Volume(QWidget):
def __init__(self, parent=None):
super().__init__(parent)
self.l = l = QFormLayout(self)
self.system = e = QCheckBox(_('Use system default volume'), self)
l.addWidget(e)
self.vol = v = FloatSlider(minimum=0, parent=self)
l.addRow(_('&Volume of speech'), v)
self.e.toggled.connect(self.update_state)
self.update_state()
def update_state(self):
self.vol.setEnabled(not self.system.isChecked())
@property
def val(self):
if self.system.isChecked():
return None
return self.vol.val
@val.setter
def val(self, v):
self.system.setChecked(v is None)
if v is not None:
self.vol.val = v
class Voices(QTreeWidget):
def __init__(self, parent=None):
super().__init__(parent)
self.system_default_voice = Voice()
def sizeHint(self) -> QSize:
return QSize(400, 600)
def set_voices(self, all_voices: tuple[Voice, ...], current_voice: str, engine_metadata: EngineMetadata) -> None:
self.clear()
def qv(parent, voice):
ans = QTreeWidgetItem(parent, voice.short_text)
ans.setData(0, Qt.ItemDataRole.UserRole, voice)
return ans
qv(self.invisibleRootItem(), self.system_default_voice)
vmap = {}
for v in all_voices:
vmap.setdefault(v.language_code, []).append(v)
for vs in vmap.values():
vs.sort(key=lambda v: v.sort_key())
parent_map = {}
def lang(langcode):
return QLocale.languageToString(QLocale.codeToLanguage(langcode))
for langcode in sorted(vmap, key=lambda lc: lang(lc).lower()):
parent = parent_map.get(langcode)
if parent is None:
parent_map[langcode] = parent = QTreeWidgetItem(self.invisibleRootItem(), lang(langcode))
for voice in vmap[langcode]:
qv(parent, voice)
class EngineSpecificConfig(QWidget):
def __init__(self, parent):
super().__init__(parent)
self.l = l = QFormLayout(self)
self.output_module = om = QComboBox(self)
l.addRow(_('&Output module:'), om)
self.engine_name = ''
om.currentIndexChanged.connect(self.rebuild_voices)
self.engine_instances = {}
self.voice_data = {}
self.engine_specific_settings = {}
self.rate = r = FloatSlider(parent=self)
l.addRow(_('&Speed of speech:'), r)
self.pitch = p = FloatSlider(parent=self)
l.addRow(_('&Pitch of speech:'), p)
self.volume = v = Volume(self)
l.addWidget(v)
self.voices = v = Voices(self)
la = QLabel(_('V&oices:'))
la.setBuddy(v)
l.addWidget(la)
l.addWidget(v)
def set_engine(self, engine_name):
self.engine_name = engine_name
metadata = available_engines()[engine_name]
if engine_name not in self.engine_instances:
self.engine_instances[engine_name] = tts = create_tts_backend(force_engine=engine_name)
self.voice_data[engine_name] = tts.available_voices
self.engine_specific_settings[engine_name] = EngineSpecificSettings.create_from_config(engine_name)
else:
tts = self.engine_instances[engine_name]
self.output_module.blockSignals(True)
self.output_module.clear()
if metadata.has_multiple_output_modules and len(self.voice_data[engine_name]) > 1:
self.output_module.setVisible(True)
self.layout().setRowVisible(self.output_module, True)
self.output_module.clear()
self.output_module.addItem(_('System default (currently {})').format(tts.default_output_module), '')
for om in self.voice_data[engine_name]:
self.output_module.addItem(om, om)
if (idx := self.output_module.findData(self.engine_specific_settings[engine_name].output_module)) > -1:
self.output_module.setCurrentIndex(idx)
else:
self.layout().setRowVisible(self.output_module, False)
self.output_module.blockSignals(False)
try:
s = self.engine_specific_settings[self.engine_name]
except KeyError:
return
self.rate.val = s.rate
self.pitch.val = s.pitch
self.layout().setRowVisible(self.pitch, metadata.can_change_pitch)
self.volume.val = s.volume
self.rebuild_voice_table()
def rebuild_voices(self):
try:
s = self.engine_specific_settings[self.engine_name]
except KeyError:
return
metadata = available_engines()[self.engine_name]
output_module = self.output_module.currentData()
if metadata.has_multiple_output_modules:
output_module = output_module or self.engine_instances.default_output_module
all_voices = self.voice_data[self.engine_name][output_module]
self.voices.set_voices(all_voices, s.voice_name, metadata)
class ConfigDialog(Dialog):
def __init__(self, current_tts_backend, parent=None):
self.current_tts_backend = current_tts_backend
super().__init__(_('Configure Read aloud'), 'configure-read-aloud2', parent=parent)
def setup_ui(self):
self.l = l = QVBoxLayout(self)
self.engine_choice = ec = EngineChoice(self)
l.addWidget(ec)

View File

@ -65,6 +65,10 @@ class QtTTSBackend(QObject):
self._voices = tuple(map(qvoice_to_voice, self.tts.availableVoices())) self._voices = tuple(map(qvoice_to_voice, self.tts.availableVoices()))
return {'': self._voices} return {'': self._voices}
@property
def engine_name(self) -> str:
return self.tts.engine()
def change_rate(self, steps: int = 1) -> bool: def change_rate(self, steps: int = 1) -> bool:
current = self.tts.rate() current = self.tts.rate()
new_rate = max(-1, min(current + 0.2 * steps, 1)) new_rate = max(-1, min(current + 0.2 * steps, 1))

View File

@ -57,6 +57,12 @@ class SpeechdTTSBackend(QObject):
self._current_marked_text = self._last_mark = None self._current_marked_text = self._last_mark = None
self._apply_settings(EngineSpecificSettings.create_from_config(engine_name)) self._apply_settings(EngineSpecificSettings.create_from_config(engine_name))
@property
def default_output_module(self) -> str:
if self._ensure_state():
return self._system_default_output_module
return ''
@property @property
def available_voices(self) -> dict[str, tuple[Voice, ...]]: def available_voices(self) -> dict[str, tuple[Voice, ...]]:
if self._voices is None: if self._voices is None:
@ -66,6 +72,10 @@ class SpeechdTTSBackend(QObject):
self._set_error(str(e)) self._set_error(str(e))
return self._voices or {} return self._voices or {}
@property
def engine_name(self) -> str:
return 'speechd'
def change_rate(self, steps: int = 1) -> bool: def change_rate(self, steps: int = 1) -> bool:
current = self._current_settings.rate current = self._current_settings.rate
new_rate = max(-1, min(current + 0.2 * steps, 1)) new_rate = max(-1, min(current + 0.2 * steps, 1))

View File

@ -9,7 +9,7 @@ from typing import Literal, NamedTuple
from qt.core import QLocale, QObject, QTextToSpeech, QVoice from qt.core import QLocale, QObject, QTextToSpeech, QVoice
from calibre.constants import islinux, iswindows from calibre.constants import islinux, ismacos, iswindows
from calibre.utils.config import JSONConfig from calibre.utils.config import JSONConfig
from calibre.utils.config_base import tweaks from calibre.utils.config_base import tweaks
from calibre.utils.localization import canonicalize_lang from calibre.utils.localization import canonicalize_lang
@ -35,6 +35,7 @@ class EngineMetadata(NamedTuple):
has_multiple_output_modules: bool = False has_multiple_output_modules: bool = False
can_change_pitch: bool = True can_change_pitch: bool = True
can_change_volume: bool = True can_change_volume: bool = True
voices_have_quality_metadata: bool = False
class Quality(Enum): class Quality(Enum):
@ -44,16 +45,23 @@ class Quality(Enum):
class Voice(NamedTuple): class Voice(NamedTuple):
name: str name: str = ''
language_code: str language_code: str = ''
country_code: str = '' country_code: str = ''
human_name: str = '' human_name: str = ''
notes: str = '' notes: str = '' # variant from speechd voices, or notes from piper voices
gender: QVoice.Gender = QVoice.Gender.Unknown gender: QVoice.Gender = QVoice.Gender.Unknown
age: QVoice.Age = QVoice.Age.Other age: QVoice.Age = QVoice.Age.Other
quality: Quality = Quality.High quality: Quality = Quality.High
@property
def short_text(self) -> str:
return self.human_name or self.name or _('System default voice')
def sort_key(self) -> tuple[Quality, str]:
return (self.quality, self.short_text.lower())
def qvoice_to_voice(v: QVoice) -> QVoice: def qvoice_to_voice(v: QVoice) -> QVoice:
lang = canonicalize_lang(QLocale.languageToCode(v.language())) or 'und' lang = canonicalize_lang(QLocale.languageToCode(v.language())) or 'und'
@ -93,9 +101,10 @@ class EngineSpecificSettings(NamedTuple):
volume = None volume = None
with suppress(Exception): with suppress(Exception):
volume = max(0, min(float(prefs.get('volume')), 1)) volume = max(0, min(float(prefs.get('volume')), 1))
om = str(prefs.get('output_module', ''))
voice = str(prefs.get('voice_map', {}).get(om, ''))
return EngineSpecificSettings( return EngineSpecificSettings(
voice_name=str(prefs.get('voice_name', '')), voice_name=voice, output_module=om,
output_module=str(prefs.get('output_module', '')),
audio_device_id=audio_device_id, rate=rate, pitch=pitch, volume=volume, engine_name=engine_name) audio_device_id=audio_device_id, rate=rate, pitch=pitch, volume=volume, engine_name=engine_name)
@classmethod @classmethod
@ -109,7 +118,7 @@ class EngineSpecificSettings(NamedTuple):
if self.audio_device_id: if self.audio_device_id:
ans['audio_device_id'] = {'id': self.audio_device_id.id.hex(), 'description': self.audio_device_id.description} ans['audio_device_id'] = {'id': self.audio_device_id.id.hex(), 'description': self.audio_device_id.description}
if self.voice_name: if self.voice_name:
ans['voice_name'] = self.voice_name ans['voice_map'] = { self.output_module: self.voice_name }
if self.rate: if self.rate:
ans['rate'] = self.rate ans['rate'] = self.rate
if self.pitch: if self.pitch:
@ -165,19 +174,27 @@ def available_engines() -> dict[str, EngineMetadata]:
return ans return ans
def create_tts_backend(engine_name: str = '', parent: QObject|None = None): def default_engine_name() -> str:
if engine_name == '': if iswindows:
if iswindows and tweaks.get('prefer_winsapi'): return 'sapi' if tweaks.get('prefer_winsapi') else 'winrt'
engine_name = 'sapi' if ismacos:
elif islinux: return 'darwin'
engine_name = 'speechd' return 'speechd'
def create_tts_backend(parent: QObject|None = None, force_engine: str | None = None):
prefs = load_config()
engine_name = prefs.get('engine', '') if force_engine is None else force_engine
engine_name = engine_name or default_engine_name()
if engine_name not in available_engines(): if engine_name not in available_engines():
engine_name = '' engine_name = default_engine_name()
if engine_name == 'speechd': if engine_name == 'speechd':
from calibre.gui2.tts2.speechd import SpeechdTTSBackend from calibre.gui2.tts2.speechd import SpeechdTTSBackend
ans = SpeechdTTSBackend(engine_name, parent) ans = SpeechdTTSBackend(engine_name, parent)
else: else:
if engine_name not in available_engines():
engine_name = '' # let Qt pick the engine
from calibre.gui2.tts2.qt import QtTTSBackend from calibre.gui2.tts2.qt import QtTTSBackend
ans = QtTTSBackend(engine_name, parent) ans = QtTTSBackend(engine_name, parent)
return ans return ans
@ -190,7 +207,7 @@ def develop(engine_name=''):
from calibre.gui2 import Application from calibre.gui2 import Application
app = Application([]) app = Application([])
app.shutdown_signal_received.connect(lambda: app.exit(1)) app.shutdown_signal_received.connect(lambda: app.exit(1))
tts = create_tts_backend(engine_name=engine_name) tts = create_tts_backend(force_engine=engine_name)
speech_started = False speech_started = False
def print_saying(s, e): def print_saying(s, e):