Finish the TTS config dialog

This commit is contained in:
Kovid Goyal 2024-08-30 18:41:54 +05:30
parent 17ada13951
commit 5ebbe1804b
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
3 changed files with 132 additions and 38 deletions

View File

@ -2,9 +2,10 @@
# License: GPLv3 Copyright: 2024, Kovid Goyal <kovid at kovidgoyal.net>
from qt.core import QCheckBox, QFormLayout, QLabel, QLocale, QSize, QSlider, Qt, QTreeWidget, QTreeWidgetItem, QVBoxLayout, QWidget, pyqtSignal
from qt.core import QCheckBox, QFormLayout, QLabel, QLocale, QMediaDevices, QSize, QSlider, Qt, QTreeWidget, QTreeWidgetItem, QVBoxLayout, QWidget, pyqtSignal
from calibre.gui2.tts2.types import (
AudioDeviceId,
EngineMetadata,
EngineSpecificSettings,
TrackingCapability,
@ -27,15 +28,15 @@ class EngineChoice(QWidget):
self.engine_choice = ec = QComboBox(self)
l.addRow(_('Text-to-Speech &engine:'), ec)
configured_engine_name = load_config().get('engine', '')
l.addItem(_('Automatically select (currently {})').format(default_engine_name()), '')
ec.addItem(_('Automatically select (currently {})').format(default_engine_name()), '')
for engine_name in available_engines():
l.addItem(engine_name)
ec.addItem(engine_name, engine_name)
idx = ec.findData(configured_engine_name)
if idx > -1:
ec.setCurrentIndex(idx)
self.engine_description = la = QLabel(self)
la.setWordWrap(True)
l.addWidget(la)
l.addRow(la)
ec.currentIndexChanged.connect(self.current_changed)
self.update_description()
@ -45,7 +46,7 @@ class EngineChoice(QWidget):
def current_changed(self):
self.changed.emit(self.value)
self.update_description(self)
self.update_description()
def update_description(self):
engine = self.value or default_engine_name()
@ -56,22 +57,28 @@ class EngineChoice(QWidget):
text = _('The {} engine highlights words on the screen as they are spoken')
else:
text = _('The {} engine highlights sentences on the screen as they are spoken')
self.engine_description.setText(text)
self.engine_description.setText(text.format(engine))
class FloatSlider(QSlider):
def __init__(self, minimum: float = -1, maximum: float = 1, factor: int = 10, parent=None):
QSlider.__init__(parent)
super().__init__(parent)
self.factor = factor
self.setRange(int(minimum * factor), int(maximum * factor))
self.setSingleStep(int((self.maximum() - self.minimum()) / (2 * factor)))
self.setPageStep(5 * self.singleStep())
self.setTicksPosition(QSlider.TickPosition.TicksBelow)
self.setTickPosition(QSlider.TickPosition.TicksBelow)
self.setOrientation(Qt.Orientation.Horizontal)
if maximum - minimum >= 2:
self.setTickInterval((self.maximum() - self.minimum()) // 2)
else:
self.setTickInterval(self.maximum() - self.minimum())
self.factor = factor
def sizeHint(self) -> QSize:
ans = super().sizeHint()
ans.setWidth(ans.width() * 2)
return ans
@property
def val(self) -> float:
@ -87,15 +94,16 @@ class Volume(QWidget):
def __init__(self, parent=None):
super().__init__(parent)
self.l = l = QFormLayout(self)
l.setContentsMargins(0, 0, 0, 0)
self.system = e = QCheckBox(_('Use system default volume'), self)
l.addWidget(e)
l.addRow(e)
self.vol = v = FloatSlider(minimum=0, parent=self)
l.addRow(_('&Volume of speech'), v)
self.e.toggled.connect(self.update_state)
l.addRow(_('&Volume of speech:'), v)
e.toggled.connect(self.update_state)
self.update_state()
def update_state(self):
self.vol.setEnabled(not self.system.isChecked())
self.layout().setRowVisible(self.vol, not self.system.isChecked())
@property
def val(self):
@ -106,14 +114,14 @@ class Volume(QWidget):
@val.setter
def val(self, v):
self.system.setChecked(v is None)
if v is not None:
self.vol.val = v
self.vol.val = 0.5 if v is None else v
class Voices(QTreeWidget):
def __init__(self, parent=None):
super().__init__(parent)
self.setHeaderHidden(True)
self.system_default_voice = Voice()
def sizeHint(self) -> QSize:
@ -121,9 +129,13 @@ class Voices(QTreeWidget):
def set_voices(self, all_voices: tuple[Voice, ...], current_voice: str, engine_metadata: EngineMetadata) -> None:
self.clear()
current_item = None
def qv(parent, voice):
ans = QTreeWidgetItem(parent, voice.short_text)
nonlocal current_item
ans = QTreeWidgetItem(parent, [voice.short_text])
ans.setData(0, Qt.ItemDataRole.UserRole, voice)
if current_voice == voice.name:
current_item = ans
return ans
qv(self.invisibleRootItem(), self.system_default_voice)
vmap = {}
@ -138,9 +150,17 @@ class Voices(QTreeWidget):
for langcode in sorted(vmap, key=lambda lc: lang(lc).lower()):
parent = parent_map.get(langcode)
if parent is None:
parent_map[langcode] = parent = QTreeWidgetItem(self.invisibleRootItem(), lang(langcode))
parent_map[langcode] = parent = QTreeWidgetItem(self.invisibleRootItem(), [lang(langcode)])
parent.setFlags(parent.flags() & ~Qt.ItemFlag.ItemIsSelectable)
for voice in vmap[langcode]:
qv(parent, voice)
if current_item is not None:
self.setCurrentItem(current_item)
@property
def val(self) -> str:
voice = self.currentItem().data(0, Qt.ItemDataRole.UserRole)
return voice.name if voice else ''
class EngineSpecificConfig(QWidget):
@ -148,6 +168,10 @@ class EngineSpecificConfig(QWidget):
def __init__(self, parent):
super().__init__(parent)
self.l = l = QFormLayout(self)
devs = QMediaDevices.audioOutputs()
dad = QMediaDevices.defaultAudioOutput()
self.all_audio_devices = [AudioDeviceId(bytes(x.id()), x.description()) for x in devs]
self.default_audio_device = AudioDeviceId(bytes(dad.id()), dad.description())
self.output_module = om = QComboBox(self)
l.addRow(_('&Output module:'), om)
self.engine_name = ''
@ -160,14 +184,19 @@ class EngineSpecificConfig(QWidget):
self.pitch = p = FloatSlider(parent=self)
l.addRow(_('&Pitch of speech:'), p)
self.volume = v = Volume(self)
l.addWidget(v)
l.addRow(v)
self.audio_device = ad = QComboBox(self)
l.addRow(_('Output a&udio to:'), ad)
self.voices = v = Voices(self)
la = QLabel(_('V&oices:'))
la.setBuddy(v)
l.addWidget(la)
l.addWidget(v)
l.addRow(la)
l.addRow(v)
def set_engine(self, engine_name):
engine_name = engine_name or default_engine_name()
if self.engine_name and self.engine_name != engine_name:
self.engine_specific_settings[self.engine_name] = self.as_settings()
self.engine_name = engine_name
metadata = available_engines()[engine_name]
if engine_name not in self.engine_instances:
@ -178,10 +207,8 @@ class EngineSpecificConfig(QWidget):
tts = self.engine_instances[engine_name]
self.output_module.blockSignals(True)
self.output_module.clear()
if metadata.has_multiple_output_modules and len(self.voice_data[engine_name]) > 1:
self.output_module.setVisible(True)
if metadata.has_multiple_output_modules:
self.layout().setRowVisible(self.output_module, True)
self.output_module.clear()
self.output_module.addItem(_('System default (currently {})').format(tts.default_output_module), '')
for om in self.voice_data[engine_name]:
self.output_module.addItem(om, om)
@ -195,10 +222,31 @@ class EngineSpecificConfig(QWidget):
except KeyError:
return
self.rate.val = s.rate
if metadata.can_change_pitch:
self.pitch.val = s.pitch
self.layout().setRowVisible(self.pitch, True)
else:
self.pitch.val = 0
self.layout().setRowVisible(self.pitch, False)
self.layout().setRowVisible(self.pitch, metadata.can_change_pitch)
if metadata.can_change_volume:
self.layout().setRowVisible(self.volume, True)
self.volume.val = s.volume
self.rebuild_voice_table()
else:
self.layout().setRowVisible(self.volume, False)
self.volume.val = None
self.audio_device.clear()
if metadata.allows_choosing_audio_device:
self.audio_device.addItem(_('System default (currently {})').format(self.default_audio_device.description), '')
for ad in self.all_audio_devices:
self.audio_device.addItem(ad.description, ad.id.hex())
if cad := self.engine_specific_settings[engine_name].audio_device_id:
if (idx := self.audio_device.findData(cad.id.hex())):
self.audio_device.setCurrentIndex(idx)
self.layout().setRowVisible(self.audio_device, True)
else:
self.layout().setRowVisible(self.audio_device, False)
self.rebuild_voices()
def rebuild_voices(self):
try:
@ -206,12 +254,27 @@ class EngineSpecificConfig(QWidget):
except KeyError:
return
metadata = available_engines()[self.engine_name]
output_module = self.output_module.currentData()
output_module = self.output_module.currentData() or ''
if metadata.has_multiple_output_modules:
output_module = output_module or self.engine_instances.default_output_module
output_module = output_module or self.engine_instances[self.engine_name].default_output_module
all_voices = self.voice_data[self.engine_name][output_module]
self.voices.set_voices(all_voices, s.voice_name, metadata)
def as_settings(self) -> EngineSpecificSettings:
ans = EngineSpecificSettings(
engine_name=self.engine_name,
rate=self.rate.val, voice_name=self.voices.val, pitch=self.pitch.val, volume=self.volume.val)
metadata = available_engines()[self.engine_name]
if metadata.has_multiple_output_modules and self.output_module.currentIndex() > 0:
ans = ans._replace(output_module=self.output_module.currentData())
if metadata.allows_choosing_audio_device and self.audio_device.currentIndex() > 0:
aid = bytes.fromhex(self.audio_device.currentData())
for ad in self.all_audio_devices:
if ad.id == aid:
ans = ans._replace(audio_device_id=ad)
break
return ans
class ConfigDialog(Dialog):
@ -222,4 +285,31 @@ class ConfigDialog(Dialog):
def setup_ui(self):
self.l = l = QVBoxLayout(self)
self.engine_choice = ec = EngineChoice(self)
self.engine_specific_config = esc = EngineSpecificConfig(self)
ec.changed.connect(esc.set_engine)
l.addWidget(ec)
l.addWidget(esc)
l.addWidget(self.bb)
esc.set_engine(ec.value)
def accept(self):
s = self.engine_specific_config.as_settings()
prefs = load_config()
with prefs:
if engine_name := self.engine_choice.value:
prefs['engine'] = engine_name
else:
prefs.pop('engine', None)
s.save_to_config(prefs)
super().accept()
def develop():
from calibre.gui2 import Application
app = Application([])
d = ConfigDialog(create_tts_backend(app))
d.exec()
if __name__ == '__main__':
develop()

View File

@ -69,6 +69,10 @@ class QtTTSBackend(QObject):
def engine_name(self) -> str:
return self.tts.engine()
@property
def default_output_module(self) -> str:
return ''
def change_rate(self, steps: int = 1) -> bool:
current = self.tts.rate()
new_rate = max(-1, min(current + 0.2 * steps, 1))
@ -129,9 +133,9 @@ class QtTTSBackend(QObject):
if settings.volume is not None:
self.tts.setVolume(max(0, min(float(settings.volume), 1)))
if settings.voice_name:
for v in self.availableVoices():
for v in self.tts.availableVoices():
if v.name() == settings.voice_name:
self.setVoice(v)
self.tts.setVoice(v)
break
self.tts.sayingWord.connect(self._saying_word)
self.tts.stateChanged.connect(self.state_changed.emit)

View File

@ -31,8 +31,8 @@ class EngineMetadata(NamedTuple):
name: Literal['winrt', 'darwin', 'sapi', 'flite', 'speechd']
tracking_capability: TrackingCapability = TrackingCapability.NoTracking
allows_choosing_audio_device: bool = True
can_synthesize_audio_data: bool = True
has_multiple_output_modules: bool = False
can_synthesize_audio_data: bool = True
can_change_pitch: bool = True
can_change_volume: bool = True
voices_have_quality_metadata: bool = False
@ -102,9 +102,8 @@ class EngineSpecificSettings(NamedTuple):
with suppress(Exception):
volume = max(0, min(float(prefs.get('volume')), 1))
om = str(prefs.get('output_module', ''))
voice = str(prefs.get('voice_map', {}).get(om, ''))
return EngineSpecificSettings(
voice_name=voice, output_module=om,
voice_name=str(prefs.get('voice', '')), output_module=om,
audio_device_id=audio_device_id, rate=rate, pitch=pitch, volume=volume, engine_name=engine_name)
@classmethod
@ -118,7 +117,7 @@ class EngineSpecificSettings(NamedTuple):
if self.audio_device_id:
ans['audio_device_id'] = {'id': self.audio_device_id.id.hex(), 'description': self.audio_device_id.description}
if self.voice_name:
ans['voice_map'] = { self.output_module: self.voice_name }
ans['voice'] = self.voice_name
if self.rate:
ans['rate'] = self.rate
if self.pitch:
@ -129,8 +128,7 @@ class EngineSpecificSettings(NamedTuple):
ans['output_module'] = self.output_module
return ans
def save_to_config(self):
prefs = load_config()
def save_to_config(self, prefs):
val = self.as_dict
engines = prefs.get('engines', {})
if not val:
@ -149,8 +147,10 @@ def available_engines() -> dict[str, EngineMetadata]:
e.setEngine(name)
cap = int(e.engineCapabilities().value)
return EngineMetadata(name,
TrackingCapability.WordByWord if cap & int(QTextToSpeech.Capability.WordByWordProgress.value) else TrackingCapability.NoTracking,
allows_choosing_audio_device, bool(cap & int(QTextToSpeech.Capability.Synthesize.value)))
tracking_capability=TrackingCapability.WordByWord if cap & int(
QTextToSpeech.Capability.WordByWordProgress.value) else TrackingCapability.NoTracking,
allows_choosing_audio_device=allows_choosing_audio_device,
can_synthesize_audio_data=bool(cap & int(QTextToSpeech.Capability.Synthesize.value)))
for x in QTextToSpeech.availableEngines():
if x == 'winrt':