Finish the TTS config dialog

This commit is contained in:
Kovid Goyal 2024-08-30 18:41:54 +05:30
parent 17ada13951
commit 5ebbe1804b
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
3 changed files with 132 additions and 38 deletions

View File

@ -2,9 +2,10 @@
# License: GPLv3 Copyright: 2024, Kovid Goyal <kovid at kovidgoyal.net> # License: GPLv3 Copyright: 2024, Kovid Goyal <kovid at kovidgoyal.net>
from qt.core import QCheckBox, QFormLayout, QLabel, QLocale, QSize, QSlider, Qt, QTreeWidget, QTreeWidgetItem, QVBoxLayout, QWidget, pyqtSignal from qt.core import QCheckBox, QFormLayout, QLabel, QLocale, QMediaDevices, QSize, QSlider, Qt, QTreeWidget, QTreeWidgetItem, QVBoxLayout, QWidget, pyqtSignal
from calibre.gui2.tts2.types import ( from calibre.gui2.tts2.types import (
AudioDeviceId,
EngineMetadata, EngineMetadata,
EngineSpecificSettings, EngineSpecificSettings,
TrackingCapability, TrackingCapability,
@ -27,15 +28,15 @@ class EngineChoice(QWidget):
self.engine_choice = ec = QComboBox(self) self.engine_choice = ec = QComboBox(self)
l.addRow(_('Text-to-Speech &engine:'), ec) l.addRow(_('Text-to-Speech &engine:'), ec)
configured_engine_name = load_config().get('engine', '') configured_engine_name = load_config().get('engine', '')
l.addItem(_('Automatically select (currently {})').format(default_engine_name()), '') ec.addItem(_('Automatically select (currently {})').format(default_engine_name()), '')
for engine_name in available_engines(): for engine_name in available_engines():
l.addItem(engine_name) ec.addItem(engine_name, engine_name)
idx = ec.findData(configured_engine_name) idx = ec.findData(configured_engine_name)
if idx > -1: if idx > -1:
ec.setCurrentIndex(idx) ec.setCurrentIndex(idx)
self.engine_description = la = QLabel(self) self.engine_description = la = QLabel(self)
la.setWordWrap(True) la.setWordWrap(True)
l.addWidget(la) l.addRow(la)
ec.currentIndexChanged.connect(self.current_changed) ec.currentIndexChanged.connect(self.current_changed)
self.update_description() self.update_description()
@ -45,7 +46,7 @@ class EngineChoice(QWidget):
def current_changed(self): def current_changed(self):
self.changed.emit(self.value) self.changed.emit(self.value)
self.update_description(self) self.update_description()
def update_description(self): def update_description(self):
engine = self.value or default_engine_name() engine = self.value or default_engine_name()
@ -56,22 +57,28 @@ class EngineChoice(QWidget):
text = _('The {} engine highlights words on the screen as they are spoken') text = _('The {} engine highlights words on the screen as they are spoken')
else: else:
text = _('The {} engine highlights sentences on the screen as they are spoken') text = _('The {} engine highlights sentences on the screen as they are spoken')
self.engine_description.setText(text) self.engine_description.setText(text.format(engine))
class FloatSlider(QSlider): class FloatSlider(QSlider):
def __init__(self, minimum: float = -1, maximum: float = 1, factor: int = 10, parent=None): def __init__(self, minimum: float = -1, maximum: float = 1, factor: int = 10, parent=None):
QSlider.__init__(parent) super().__init__(parent)
self.factor = factor
self.setRange(int(minimum * factor), int(maximum * factor)) self.setRange(int(minimum * factor), int(maximum * factor))
self.setSingleStep(int((self.maximum() - self.minimum()) / (2 * factor))) self.setSingleStep(int((self.maximum() - self.minimum()) / (2 * factor)))
self.setPageStep(5 * self.singleStep()) self.setPageStep(5 * self.singleStep())
self.setTicksPosition(QSlider.TickPosition.TicksBelow) self.setTickPosition(QSlider.TickPosition.TicksBelow)
self.setOrientation(Qt.Orientation.Horizontal)
if maximum - minimum >= 2: if maximum - minimum >= 2:
self.setTickInterval((self.maximum() - self.minimum()) // 2) self.setTickInterval((self.maximum() - self.minimum()) // 2)
else: else:
self.setTickInterval(self.maximum() - self.minimum()) self.setTickInterval(self.maximum() - self.minimum())
self.factor = factor
def sizeHint(self) -> QSize:
ans = super().sizeHint()
ans.setWidth(ans.width() * 2)
return ans
@property @property
def val(self) -> float: def val(self) -> float:
@ -87,15 +94,16 @@ class Volume(QWidget):
def __init__(self, parent=None): def __init__(self, parent=None):
super().__init__(parent) super().__init__(parent)
self.l = l = QFormLayout(self) self.l = l = QFormLayout(self)
l.setContentsMargins(0, 0, 0, 0)
self.system = e = QCheckBox(_('Use system default volume'), self) self.system = e = QCheckBox(_('Use system default volume'), self)
l.addWidget(e) l.addRow(e)
self.vol = v = FloatSlider(minimum=0, parent=self) self.vol = v = FloatSlider(minimum=0, parent=self)
l.addRow(_('&Volume of speech'), v) l.addRow(_('&Volume of speech:'), v)
self.e.toggled.connect(self.update_state) e.toggled.connect(self.update_state)
self.update_state() self.update_state()
def update_state(self): def update_state(self):
self.vol.setEnabled(not self.system.isChecked()) self.layout().setRowVisible(self.vol, not self.system.isChecked())
@property @property
def val(self): def val(self):
@ -106,14 +114,14 @@ class Volume(QWidget):
@val.setter @val.setter
def val(self, v): def val(self, v):
self.system.setChecked(v is None) self.system.setChecked(v is None)
if v is not None: self.vol.val = 0.5 if v is None else v
self.vol.val = v
class Voices(QTreeWidget): class Voices(QTreeWidget):
def __init__(self, parent=None): def __init__(self, parent=None):
super().__init__(parent) super().__init__(parent)
self.setHeaderHidden(True)
self.system_default_voice = Voice() self.system_default_voice = Voice()
def sizeHint(self) -> QSize: def sizeHint(self) -> QSize:
@ -121,9 +129,13 @@ class Voices(QTreeWidget):
def set_voices(self, all_voices: tuple[Voice, ...], current_voice: str, engine_metadata: EngineMetadata) -> None: def set_voices(self, all_voices: tuple[Voice, ...], current_voice: str, engine_metadata: EngineMetadata) -> None:
self.clear() self.clear()
current_item = None
def qv(parent, voice): def qv(parent, voice):
ans = QTreeWidgetItem(parent, voice.short_text) nonlocal current_item
ans = QTreeWidgetItem(parent, [voice.short_text])
ans.setData(0, Qt.ItemDataRole.UserRole, voice) ans.setData(0, Qt.ItemDataRole.UserRole, voice)
if current_voice == voice.name:
current_item = ans
return ans return ans
qv(self.invisibleRootItem(), self.system_default_voice) qv(self.invisibleRootItem(), self.system_default_voice)
vmap = {} vmap = {}
@ -138,9 +150,17 @@ class Voices(QTreeWidget):
for langcode in sorted(vmap, key=lambda lc: lang(lc).lower()): for langcode in sorted(vmap, key=lambda lc: lang(lc).lower()):
parent = parent_map.get(langcode) parent = parent_map.get(langcode)
if parent is None: if parent is None:
parent_map[langcode] = parent = QTreeWidgetItem(self.invisibleRootItem(), lang(langcode)) parent_map[langcode] = parent = QTreeWidgetItem(self.invisibleRootItem(), [lang(langcode)])
parent.setFlags(parent.flags() & ~Qt.ItemFlag.ItemIsSelectable)
for voice in vmap[langcode]: for voice in vmap[langcode]:
qv(parent, voice) qv(parent, voice)
if current_item is not None:
self.setCurrentItem(current_item)
@property
def val(self) -> str:
voice = self.currentItem().data(0, Qt.ItemDataRole.UserRole)
return voice.name if voice else ''
class EngineSpecificConfig(QWidget): class EngineSpecificConfig(QWidget):
@ -148,6 +168,10 @@ class EngineSpecificConfig(QWidget):
def __init__(self, parent): def __init__(self, parent):
super().__init__(parent) super().__init__(parent)
self.l = l = QFormLayout(self) self.l = l = QFormLayout(self)
devs = QMediaDevices.audioOutputs()
dad = QMediaDevices.defaultAudioOutput()
self.all_audio_devices = [AudioDeviceId(bytes(x.id()), x.description()) for x in devs]
self.default_audio_device = AudioDeviceId(bytes(dad.id()), dad.description())
self.output_module = om = QComboBox(self) self.output_module = om = QComboBox(self)
l.addRow(_('&Output module:'), om) l.addRow(_('&Output module:'), om)
self.engine_name = '' self.engine_name = ''
@ -160,14 +184,19 @@ class EngineSpecificConfig(QWidget):
self.pitch = p = FloatSlider(parent=self) self.pitch = p = FloatSlider(parent=self)
l.addRow(_('&Pitch of speech:'), p) l.addRow(_('&Pitch of speech:'), p)
self.volume = v = Volume(self) self.volume = v = Volume(self)
l.addWidget(v) l.addRow(v)
self.audio_device = ad = QComboBox(self)
l.addRow(_('Output a&udio to:'), ad)
self.voices = v = Voices(self) self.voices = v = Voices(self)
la = QLabel(_('V&oices:')) la = QLabel(_('V&oices:'))
la.setBuddy(v) la.setBuddy(v)
l.addWidget(la) l.addRow(la)
l.addWidget(v) l.addRow(v)
def set_engine(self, engine_name): def set_engine(self, engine_name):
engine_name = engine_name or default_engine_name()
if self.engine_name and self.engine_name != engine_name:
self.engine_specific_settings[self.engine_name] = self.as_settings()
self.engine_name = engine_name self.engine_name = engine_name
metadata = available_engines()[engine_name] metadata = available_engines()[engine_name]
if engine_name not in self.engine_instances: if engine_name not in self.engine_instances:
@ -178,10 +207,8 @@ class EngineSpecificConfig(QWidget):
tts = self.engine_instances[engine_name] tts = self.engine_instances[engine_name]
self.output_module.blockSignals(True) self.output_module.blockSignals(True)
self.output_module.clear() self.output_module.clear()
if metadata.has_multiple_output_modules and len(self.voice_data[engine_name]) > 1: if metadata.has_multiple_output_modules:
self.output_module.setVisible(True)
self.layout().setRowVisible(self.output_module, True) self.layout().setRowVisible(self.output_module, True)
self.output_module.clear()
self.output_module.addItem(_('System default (currently {})').format(tts.default_output_module), '') self.output_module.addItem(_('System default (currently {})').format(tts.default_output_module), '')
for om in self.voice_data[engine_name]: for om in self.voice_data[engine_name]:
self.output_module.addItem(om, om) self.output_module.addItem(om, om)
@ -195,10 +222,31 @@ class EngineSpecificConfig(QWidget):
except KeyError: except KeyError:
return return
self.rate.val = s.rate self.rate.val = s.rate
if metadata.can_change_pitch:
self.pitch.val = s.pitch self.pitch.val = s.pitch
self.layout().setRowVisible(self.pitch, True)
else:
self.pitch.val = 0
self.layout().setRowVisible(self.pitch, False)
self.layout().setRowVisible(self.pitch, metadata.can_change_pitch) self.layout().setRowVisible(self.pitch, metadata.can_change_pitch)
if metadata.can_change_volume:
self.layout().setRowVisible(self.volume, True)
self.volume.val = s.volume self.volume.val = s.volume
self.rebuild_voice_table() else:
self.layout().setRowVisible(self.volume, False)
self.volume.val = None
self.audio_device.clear()
if metadata.allows_choosing_audio_device:
self.audio_device.addItem(_('System default (currently {})').format(self.default_audio_device.description), '')
for ad in self.all_audio_devices:
self.audio_device.addItem(ad.description, ad.id.hex())
if cad := self.engine_specific_settings[engine_name].audio_device_id:
if (idx := self.audio_device.findData(cad.id.hex())):
self.audio_device.setCurrentIndex(idx)
self.layout().setRowVisible(self.audio_device, True)
else:
self.layout().setRowVisible(self.audio_device, False)
self.rebuild_voices()
def rebuild_voices(self): def rebuild_voices(self):
try: try:
@ -206,12 +254,27 @@ class EngineSpecificConfig(QWidget):
except KeyError: except KeyError:
return return
metadata = available_engines()[self.engine_name] metadata = available_engines()[self.engine_name]
output_module = self.output_module.currentData() output_module = self.output_module.currentData() or ''
if metadata.has_multiple_output_modules: if metadata.has_multiple_output_modules:
output_module = output_module or self.engine_instances.default_output_module output_module = output_module or self.engine_instances[self.engine_name].default_output_module
all_voices = self.voice_data[self.engine_name][output_module] all_voices = self.voice_data[self.engine_name][output_module]
self.voices.set_voices(all_voices, s.voice_name, metadata) self.voices.set_voices(all_voices, s.voice_name, metadata)
def as_settings(self) -> EngineSpecificSettings:
ans = EngineSpecificSettings(
engine_name=self.engine_name,
rate=self.rate.val, voice_name=self.voices.val, pitch=self.pitch.val, volume=self.volume.val)
metadata = available_engines()[self.engine_name]
if metadata.has_multiple_output_modules and self.output_module.currentIndex() > 0:
ans = ans._replace(output_module=self.output_module.currentData())
if metadata.allows_choosing_audio_device and self.audio_device.currentIndex() > 0:
aid = bytes.fromhex(self.audio_device.currentData())
for ad in self.all_audio_devices:
if ad.id == aid:
ans = ans._replace(audio_device_id=ad)
break
return ans
class ConfigDialog(Dialog): class ConfigDialog(Dialog):
@ -222,4 +285,31 @@ class ConfigDialog(Dialog):
def setup_ui(self): def setup_ui(self):
self.l = l = QVBoxLayout(self) self.l = l = QVBoxLayout(self)
self.engine_choice = ec = EngineChoice(self) self.engine_choice = ec = EngineChoice(self)
self.engine_specific_config = esc = EngineSpecificConfig(self)
ec.changed.connect(esc.set_engine)
l.addWidget(ec) l.addWidget(ec)
l.addWidget(esc)
l.addWidget(self.bb)
esc.set_engine(ec.value)
def accept(self):
s = self.engine_specific_config.as_settings()
prefs = load_config()
with prefs:
if engine_name := self.engine_choice.value:
prefs['engine'] = engine_name
else:
prefs.pop('engine', None)
s.save_to_config(prefs)
super().accept()
def develop():
from calibre.gui2 import Application
app = Application([])
d = ConfigDialog(create_tts_backend(app))
d.exec()
if __name__ == '__main__':
develop()

View File

@ -69,6 +69,10 @@ class QtTTSBackend(QObject):
def engine_name(self) -> str: def engine_name(self) -> str:
return self.tts.engine() return self.tts.engine()
@property
def default_output_module(self) -> str:
return ''
def change_rate(self, steps: int = 1) -> bool: def change_rate(self, steps: int = 1) -> bool:
current = self.tts.rate() current = self.tts.rate()
new_rate = max(-1, min(current + 0.2 * steps, 1)) new_rate = max(-1, min(current + 0.2 * steps, 1))
@ -129,9 +133,9 @@ class QtTTSBackend(QObject):
if settings.volume is not None: if settings.volume is not None:
self.tts.setVolume(max(0, min(float(settings.volume), 1))) self.tts.setVolume(max(0, min(float(settings.volume), 1)))
if settings.voice_name: if settings.voice_name:
for v in self.availableVoices(): for v in self.tts.availableVoices():
if v.name() == settings.voice_name: if v.name() == settings.voice_name:
self.setVoice(v) self.tts.setVoice(v)
break break
self.tts.sayingWord.connect(self._saying_word) self.tts.sayingWord.connect(self._saying_word)
self.tts.stateChanged.connect(self.state_changed.emit) self.tts.stateChanged.connect(self.state_changed.emit)

View File

@ -31,8 +31,8 @@ class EngineMetadata(NamedTuple):
name: Literal['winrt', 'darwin', 'sapi', 'flite', 'speechd'] name: Literal['winrt', 'darwin', 'sapi', 'flite', 'speechd']
tracking_capability: TrackingCapability = TrackingCapability.NoTracking tracking_capability: TrackingCapability = TrackingCapability.NoTracking
allows_choosing_audio_device: bool = True allows_choosing_audio_device: bool = True
can_synthesize_audio_data: bool = True
has_multiple_output_modules: bool = False has_multiple_output_modules: bool = False
can_synthesize_audio_data: bool = True
can_change_pitch: bool = True can_change_pitch: bool = True
can_change_volume: bool = True can_change_volume: bool = True
voices_have_quality_metadata: bool = False voices_have_quality_metadata: bool = False
@ -102,9 +102,8 @@ class EngineSpecificSettings(NamedTuple):
with suppress(Exception): with suppress(Exception):
volume = max(0, min(float(prefs.get('volume')), 1)) volume = max(0, min(float(prefs.get('volume')), 1))
om = str(prefs.get('output_module', '')) om = str(prefs.get('output_module', ''))
voice = str(prefs.get('voice_map', {}).get(om, ''))
return EngineSpecificSettings( return EngineSpecificSettings(
voice_name=voice, output_module=om, voice_name=str(prefs.get('voice', '')), output_module=om,
audio_device_id=audio_device_id, rate=rate, pitch=pitch, volume=volume, engine_name=engine_name) audio_device_id=audio_device_id, rate=rate, pitch=pitch, volume=volume, engine_name=engine_name)
@classmethod @classmethod
@ -118,7 +117,7 @@ class EngineSpecificSettings(NamedTuple):
if self.audio_device_id: if self.audio_device_id:
ans['audio_device_id'] = {'id': self.audio_device_id.id.hex(), 'description': self.audio_device_id.description} ans['audio_device_id'] = {'id': self.audio_device_id.id.hex(), 'description': self.audio_device_id.description}
if self.voice_name: if self.voice_name:
ans['voice_map'] = { self.output_module: self.voice_name } ans['voice'] = self.voice_name
if self.rate: if self.rate:
ans['rate'] = self.rate ans['rate'] = self.rate
if self.pitch: if self.pitch:
@ -129,8 +128,7 @@ class EngineSpecificSettings(NamedTuple):
ans['output_module'] = self.output_module ans['output_module'] = self.output_module
return ans return ans
def save_to_config(self): def save_to_config(self, prefs):
prefs = load_config()
val = self.as_dict val = self.as_dict
engines = prefs.get('engines', {}) engines = prefs.get('engines', {})
if not val: if not val:
@ -149,8 +147,10 @@ def available_engines() -> dict[str, EngineMetadata]:
e.setEngine(name) e.setEngine(name)
cap = int(e.engineCapabilities().value) cap = int(e.engineCapabilities().value)
return EngineMetadata(name, return EngineMetadata(name,
TrackingCapability.WordByWord if cap & int(QTextToSpeech.Capability.WordByWordProgress.value) else TrackingCapability.NoTracking, tracking_capability=TrackingCapability.WordByWord if cap & int(
allows_choosing_audio_device, bool(cap & int(QTextToSpeech.Capability.Synthesize.value))) QTextToSpeech.Capability.WordByWordProgress.value) else TrackingCapability.NoTracking,
allows_choosing_audio_device=allows_choosing_audio_device,
can_synthesize_audio_data=bool(cap & int(QTextToSpeech.Capability.Synthesize.value)))
for x in QTextToSpeech.availableEngines(): for x in QTextToSpeech.availableEngines():
if x == 'winrt': if x == 'winrt':