mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Config widget for embeded TTS config
This commit is contained in:
parent
3e713d2ea8
commit
927f45e1cd
@ -2,7 +2,9 @@
|
|||||||
# License: GPLv3 Copyright: 2024, Kovid Goyal <kovid at kovidgoyal.net>
|
# License: GPLv3 Copyright: 2024, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
|
||||||
from qt.core import (
|
from qt.core import (
|
||||||
|
QAbstractItemView,
|
||||||
QCheckBox,
|
QCheckBox,
|
||||||
|
QDialog,
|
||||||
QDoubleSpinBox,
|
QDoubleSpinBox,
|
||||||
QFont,
|
QFont,
|
||||||
QFormLayout,
|
QFormLayout,
|
||||||
@ -14,6 +16,8 @@ from qt.core import (
|
|||||||
QPushButton,
|
QPushButton,
|
||||||
QSize,
|
QSize,
|
||||||
QSlider,
|
QSlider,
|
||||||
|
QStyle,
|
||||||
|
QStyleOptionViewItem,
|
||||||
Qt,
|
Qt,
|
||||||
QTreeWidget,
|
QTreeWidget,
|
||||||
QTreeWidgetItem,
|
QTreeWidgetItem,
|
||||||
@ -23,6 +27,7 @@ from qt.core import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
from calibre.gui2.tts.types import (
|
from calibre.gui2.tts.types import (
|
||||||
|
TTS_EMBEDED_CONFIG,
|
||||||
AudioDeviceId,
|
AudioDeviceId,
|
||||||
EngineMetadata,
|
EngineMetadata,
|
||||||
EngineSpecificSettings,
|
EngineSpecificSettings,
|
||||||
@ -153,7 +158,8 @@ class Voices(QTreeWidget):
|
|||||||
|
|
||||||
voice_changed = pyqtSignal()
|
voice_changed = pyqtSignal()
|
||||||
|
|
||||||
def __init__(self, parent=None):
|
def __init__(self, parent=None, for_embedding=False):
|
||||||
|
self.for_embedding = for_embedding
|
||||||
super().__init__(parent)
|
super().__init__(parent)
|
||||||
self.setHeaderHidden(True)
|
self.setHeaderHidden(True)
|
||||||
self.system_default_voice = Voice()
|
self.system_default_voice = Voice()
|
||||||
@ -161,6 +167,34 @@ class Voices(QTreeWidget):
|
|||||||
self.normal_font = f = self.font()
|
self.normal_font = f = self.font()
|
||||||
self.highlight_font = f = QFont(f)
|
self.highlight_font = f = QFont(f)
|
||||||
f.setBold(True), f.setItalic(True)
|
f.setBold(True), f.setItalic(True)
|
||||||
|
self.ignore_item_changes = False
|
||||||
|
if self.for_embedding:
|
||||||
|
self.setSelectionMode(QAbstractItemView.SelectionMode.NoSelection)
|
||||||
|
self.itemChanged.connect(self.item_changed)
|
||||||
|
|
||||||
|
def item_changed(self, item: QTreeWidgetItem, column: int):
|
||||||
|
if column == 0 and item.parent() is not self.invisibleRootItem() and not self.ignore_item_changes:
|
||||||
|
if item.checkState(0) == Qt.CheckState.Checked:
|
||||||
|
p = item.parent()
|
||||||
|
for child in (p.child(i) for i in range(p.childCount())):
|
||||||
|
if child is not item and child.checkState(0) == Qt.CheckState.Checked:
|
||||||
|
self.ignore_item_changes = True
|
||||||
|
child.setCheckState(0, Qt.CheckState.Unchecked)
|
||||||
|
self.ignore_item_changes = False
|
||||||
|
|
||||||
|
def mousePressEvent(self, event):
|
||||||
|
item = self.itemAt(event.pos())
|
||||||
|
if self.for_embedding and item and item.parent() is not self.invisibleRootItem():
|
||||||
|
rect = self.visualItemRect(item)
|
||||||
|
x = event.pos().x() - (rect.x() + self.frameWidth())
|
||||||
|
option = QStyleOptionViewItem()
|
||||||
|
self.initViewItemOption(option)
|
||||||
|
option.rect = rect
|
||||||
|
option.features |= QStyleOptionViewItem.ViewItemFeature.HasCheckIndicator
|
||||||
|
checkbox_rect = self.style().subElementRect(QStyle.SubElement.SE_ItemViewItemCheckIndicator, option, self)
|
||||||
|
if x > checkbox_rect.width():
|
||||||
|
item.setCheckState(0, Qt.CheckState.Checked if item.checkState(0) != Qt.CheckState.Checked else Qt.CheckState.Unchecked)
|
||||||
|
super().mousePressEvent(event)
|
||||||
|
|
||||||
def sizeHint(self) -> QSize:
|
def sizeHint(self) -> QSize:
|
||||||
return QSize(400, 500)
|
return QSize(400, 500)
|
||||||
@ -170,8 +204,14 @@ class Voices(QTreeWidget):
|
|||||||
is_downloaded = bool(voice and voice.engine_data and voice.engine_data.get('is_downloaded'))
|
is_downloaded = bool(voice and voice.engine_data and voice.engine_data.get('is_downloaded'))
|
||||||
ans.setFont(0, self.highlight_font if is_downloaded else self.normal_font)
|
ans.setFont(0, self.highlight_font if is_downloaded else self.normal_font)
|
||||||
|
|
||||||
def set_voices(self, all_voices: tuple[Voice, ...], current_voice: str, engine_metadata: EngineMetadata) -> None:
|
def set_voices(
|
||||||
|
self, all_voices: tuple[Voice, ...], current_voice: str, engine_metadata: EngineMetadata,
|
||||||
|
preferred_voices: dict[str, str] | None = None
|
||||||
|
) -> None:
|
||||||
self.clear()
|
self.clear()
|
||||||
|
if self.for_embedding:
|
||||||
|
current_voice = ''
|
||||||
|
preferred_voices = preferred_voices or {}
|
||||||
current_item = None
|
current_item = None
|
||||||
def qv(parent, voice):
|
def qv(parent, voice):
|
||||||
nonlocal current_item
|
nonlocal current_item
|
||||||
@ -179,11 +219,15 @@ class Voices(QTreeWidget):
|
|||||||
ans = QTreeWidgetItem(parent, [text])
|
ans = QTreeWidgetItem(parent, [text])
|
||||||
ans.setData(0, Qt.ItemDataRole.UserRole, voice)
|
ans.setData(0, Qt.ItemDataRole.UserRole, voice)
|
||||||
ans.setToolTip(0, voice.tooltip(engine_metadata))
|
ans.setToolTip(0, voice.tooltip(engine_metadata))
|
||||||
|
if self.for_embedding:
|
||||||
|
ans.setFlags(Qt.ItemFlag.ItemIsUserCheckable | Qt.ItemFlag.ItemIsEnabled)
|
||||||
|
ans.setCheckState(0, Qt.CheckState.Unchecked)
|
||||||
if current_voice == voice.name:
|
if current_voice == voice.name:
|
||||||
current_item = ans
|
current_item = ans
|
||||||
self.set_item_downloaded_state(ans)
|
self.set_item_downloaded_state(ans)
|
||||||
return ans
|
return ans
|
||||||
qv(self.invisibleRootItem(), self.system_default_voice)
|
if not self.for_embedding:
|
||||||
|
qv(self.invisibleRootItem(), self.system_default_voice)
|
||||||
vmap = {}
|
vmap = {}
|
||||||
for v in all_voices:
|
for v in all_voices:
|
||||||
vmap.setdefault(v.language_code, []).append(v)
|
vmap.setdefault(v.language_code, []).append(v)
|
||||||
@ -197,9 +241,12 @@ class Voices(QTreeWidget):
|
|||||||
parent = parent_map.get(langcode)
|
parent = parent_map.get(langcode)
|
||||||
if parent is None:
|
if parent is None:
|
||||||
parent_map[langcode] = parent = QTreeWidgetItem(self.invisibleRootItem(), [lang(langcode)])
|
parent_map[langcode] = parent = QTreeWidgetItem(self.invisibleRootItem(), [lang(langcode)])
|
||||||
parent.setFlags(parent.flags() & ~Qt.ItemFlag.ItemIsSelectable)
|
parent.setFlags(parent.flags() & ~Qt.ItemFlag.ItemIsSelectable & ~Qt.ItemFlag.ItemIsUserCheckable)
|
||||||
|
parent.setData(0, Qt.ItemDataRole.UserRole, langcode)
|
||||||
for voice in vmap[langcode]:
|
for voice in vmap[langcode]:
|
||||||
qv(parent, voice)
|
v = qv(parent, voice)
|
||||||
|
if self.for_embedding and voice.name and preferred_voices.get(langcode) == voice.name:
|
||||||
|
v.setCheckState(0, Qt.CheckState.Checked)
|
||||||
if current_item is not None:
|
if current_item is not None:
|
||||||
self.setCurrentItem(current_item)
|
self.setCurrentItem(current_item)
|
||||||
|
|
||||||
@ -208,6 +255,19 @@ class Voices(QTreeWidget):
|
|||||||
voice = self.current_voice
|
voice = self.current_voice
|
||||||
return voice.name if voice else ''
|
return voice.name if voice else ''
|
||||||
|
|
||||||
|
@property
|
||||||
|
def preferred_voices(self) -> dict[str, str] | None:
|
||||||
|
r = self.invisibleRootItem()
|
||||||
|
ans = {}
|
||||||
|
for parent in (r.child(i) for i in range(r.childCount())):
|
||||||
|
langcode = parent.data(0, Qt.ItemDataRole.UserRole)
|
||||||
|
for child in (parent.child(i) for i in range(parent.childCount())):
|
||||||
|
if child.checkState(0) == Qt.CheckState.Checked:
|
||||||
|
voice = child.data(0, Qt.ItemDataRole.UserRole)
|
||||||
|
if voice.name:
|
||||||
|
ans[langcode] = voice.name
|
||||||
|
return ans or None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def current_voice(self) -> Voice | None:
|
def current_voice(self) -> Voice | None:
|
||||||
ci = self.currentItem()
|
ci = self.currentItem()
|
||||||
@ -224,8 +284,9 @@ class EngineSpecificConfig(QWidget):
|
|||||||
|
|
||||||
voice_changed = pyqtSignal()
|
voice_changed = pyqtSignal()
|
||||||
|
|
||||||
def __init__(self, parent):
|
def __init__(self, parent: QWidget = None, for_embedding: bool = False):
|
||||||
super().__init__(parent)
|
super().__init__(parent)
|
||||||
|
self.for_embedding = for_embedding
|
||||||
self.engine_name = ''
|
self.engine_name = ''
|
||||||
self.l = l = QFormLayout(self)
|
self.l = l = QFormLayout(self)
|
||||||
devs = QMediaDevices.audioOutputs()
|
devs = QMediaDevices.audioOutputs()
|
||||||
@ -249,9 +310,9 @@ class EngineSpecificConfig(QWidget):
|
|||||||
l.addRow(v)
|
l.addRow(v)
|
||||||
self.audio_device = ad = QComboBox(self)
|
self.audio_device = ad = QComboBox(self)
|
||||||
l.addRow(_('Output a&udio to:'), ad)
|
l.addRow(_('Output a&udio to:'), ad)
|
||||||
self.voices = v = Voices(self)
|
self.voices = v = Voices(self, self.for_embedding)
|
||||||
v.voice_changed.connect(self.voice_changed)
|
v.voice_changed.connect(self.voice_changed)
|
||||||
la = QLabel(_('V&oices:'))
|
la = QLabel(_('Choose &default voice for language:') if self.for_embedding else _('V&oices:'))
|
||||||
la.setBuddy(v)
|
la.setBuddy(v)
|
||||||
l.addRow(la)
|
l.addRow(la)
|
||||||
l.addRow(v)
|
l.addRow(v)
|
||||||
@ -265,7 +326,10 @@ class EngineSpecificConfig(QWidget):
|
|||||||
tts = create_tts_backend(force_engine=engine_name)
|
tts = create_tts_backend(force_engine=engine_name)
|
||||||
if engine_name not in self.voice_data:
|
if engine_name not in self.voice_data:
|
||||||
self.voice_data[engine_name] = tts.available_voices
|
self.voice_data[engine_name] = tts.available_voices
|
||||||
self.engine_specific_settings[engine_name] = EngineSpecificSettings.create_from_config(engine_name)
|
if self.for_embedding:
|
||||||
|
self.engine_specific_settings[engine_name] = EngineSpecificSettings.create_from_config(engine_name, TTS_EMBEDED_CONFIG)
|
||||||
|
else:
|
||||||
|
self.engine_specific_settings[engine_name] = EngineSpecificSettings.create_from_config(engine_name)
|
||||||
self.default_output_modules[engine_name] = tts.default_output_module
|
self.default_output_modules[engine_name] = tts.default_output_module
|
||||||
self.output_module.blockSignals(True)
|
self.output_module.blockSignals(True)
|
||||||
self.output_module.clear()
|
self.output_module.clear()
|
||||||
@ -292,7 +356,7 @@ class EngineSpecificConfig(QWidget):
|
|||||||
self.pitch.val = 0
|
self.pitch.val = 0
|
||||||
self.layout().setRowVisible(self.pitch, False)
|
self.layout().setRowVisible(self.pitch, False)
|
||||||
self.layout().setRowVisible(self.pitch, metadata.can_change_pitch)
|
self.layout().setRowVisible(self.pitch, metadata.can_change_pitch)
|
||||||
if metadata.can_change_volume:
|
if metadata.can_change_volume and not self.for_embedding:
|
||||||
self.layout().setRowVisible(self.volume, True)
|
self.layout().setRowVisible(self.volume, True)
|
||||||
self.volume.val = s.volume
|
self.volume.val = s.volume
|
||||||
else:
|
else:
|
||||||
@ -301,7 +365,7 @@ class EngineSpecificConfig(QWidget):
|
|||||||
if metadata.has_sentence_delay:
|
if metadata.has_sentence_delay:
|
||||||
self.sentence_delay.val = s.sentence_delay
|
self.sentence_delay.val = s.sentence_delay
|
||||||
self.audio_device.clear()
|
self.audio_device.clear()
|
||||||
if metadata.allows_choosing_audio_device:
|
if metadata.allows_choosing_audio_device and not self.for_embedding:
|
||||||
self.audio_device.addItem(_('System default (currently {})').format(self.default_audio_device.description), '')
|
self.audio_device.addItem(_('System default (currently {})').format(self.default_audio_device.description), '')
|
||||||
for ad in self.all_audio_devices:
|
for ad in self.all_audio_devices:
|
||||||
self.audio_device.addItem(ad.description, ad.id.hex())
|
self.audio_device.addItem(ad.description, ad.id.hex())
|
||||||
@ -324,12 +388,16 @@ class EngineSpecificConfig(QWidget):
|
|||||||
if metadata.has_multiple_output_modules:
|
if metadata.has_multiple_output_modules:
|
||||||
output_module = output_module or self.default_output_modules[self.engine_name]
|
output_module = output_module or self.default_output_modules[self.engine_name]
|
||||||
all_voices = self.voice_data[self.engine_name][output_module]
|
all_voices = self.voice_data[self.engine_name][output_module]
|
||||||
self.voices.set_voices(all_voices, s.voice_name, metadata)
|
self.voices.set_voices(all_voices, s.voice_name, metadata, s.preferred_voices)
|
||||||
|
|
||||||
def as_settings(self) -> EngineSpecificSettings:
|
def as_settings(self) -> EngineSpecificSettings:
|
||||||
ans = EngineSpecificSettings(
|
ans = EngineSpecificSettings(
|
||||||
engine_name=self.engine_name,
|
engine_name=self.engine_name,
|
||||||
rate=self.rate.val, voice_name=self.voices.val, pitch=self.pitch.val, volume=self.volume.val)
|
rate=self.rate.val, pitch=self.pitch.val, volume=self.volume.val)
|
||||||
|
if self.for_embedding:
|
||||||
|
ans = ans._replace(preferred_voices=self.voices.preferred_voices)
|
||||||
|
else:
|
||||||
|
ans = ans._replace(voice_name=self.voices.val)
|
||||||
metadata = available_engines()[self.engine_name]
|
metadata = available_engines()[self.engine_name]
|
||||||
if metadata.has_sentence_delay:
|
if metadata.has_sentence_delay:
|
||||||
ans = ans._replace(sentence_delay=self.sentence_delay.val)
|
ans = ans._replace(sentence_delay=self.sentence_delay.val)
|
||||||
@ -427,6 +495,42 @@ class ConfigDialog(Dialog):
|
|||||||
super().accept()
|
super().accept()
|
||||||
|
|
||||||
|
|
||||||
|
class EmbeddingConfig(QWidget):
|
||||||
|
|
||||||
|
def __init__(self, parent=None):
|
||||||
|
super().__init__(parent)
|
||||||
|
self.l = l = QVBoxLayout(self)
|
||||||
|
self.engine_specific_config = esc = EngineSpecificConfig(self, for_embedding=True)
|
||||||
|
l.addWidget(esc)
|
||||||
|
self.engine_specific_config.set_engine('piper')
|
||||||
|
|
||||||
|
def save_settings(self):
|
||||||
|
s = self.engine_specific_config.as_settings()
|
||||||
|
prefs = load_config(TTS_EMBEDED_CONFIG)
|
||||||
|
with prefs:
|
||||||
|
s.save_to_config(prefs, TTS_EMBEDED_CONFIG)
|
||||||
|
|
||||||
|
|
||||||
|
def develop_embedding():
|
||||||
|
class D(Dialog):
|
||||||
|
def __init__(self, parent=None):
|
||||||
|
super().__init__('Configure Text to speech audio generation', 'configure-tts-embed', parent=parent)
|
||||||
|
|
||||||
|
def setup_ui(self):
|
||||||
|
self.l = l = QVBoxLayout(self)
|
||||||
|
self.conf = c = EmbeddingConfig(self)
|
||||||
|
l.addWidget(c)
|
||||||
|
l.addWidget(self.bb)
|
||||||
|
|
||||||
|
from calibre.gui2 import Application
|
||||||
|
app = Application([])
|
||||||
|
d = D()
|
||||||
|
if d.exec() == QDialog.DialogCode.Accepted:
|
||||||
|
d.conf.save_settings()
|
||||||
|
del d
|
||||||
|
del app
|
||||||
|
|
||||||
|
|
||||||
def develop():
|
def develop():
|
||||||
from calibre.gui2 import Application
|
from calibre.gui2 import Application
|
||||||
app = Application([])
|
app = Application([])
|
||||||
@ -437,4 +541,4 @@ def develop():
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
develop()
|
develop_embedding()
|
||||||
|
@ -15,10 +15,12 @@ from calibre.utils.config_base import tweaks
|
|||||||
from calibre.utils.localization import canonicalize_lang
|
from calibre.utils.localization import canonicalize_lang
|
||||||
|
|
||||||
CONFIG_NAME = 'tts'
|
CONFIG_NAME = 'tts'
|
||||||
|
TTS_EMBEDED_CONFIG = 'tts-embedded'
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(2)
|
@lru_cache(2)
|
||||||
def load_config() -> JSONConfig:
|
def load_config(config_name=CONFIG_NAME) -> JSONConfig:
|
||||||
return JSONConfig(CONFIG_NAME)
|
return JSONConfig(config_name)
|
||||||
|
|
||||||
|
|
||||||
class TrackingCapability(Enum):
|
class TrackingCapability(Enum):
|
||||||
@ -124,6 +126,7 @@ class EngineSpecificSettings(NamedTuple):
|
|||||||
output_module: str = ''
|
output_module: str = ''
|
||||||
engine_name: str = ''
|
engine_name: str = ''
|
||||||
sentence_delay: float = 0 # seconds >= 0
|
sentence_delay: float = 0 # seconds >= 0
|
||||||
|
preferred_voices: dict[str, str] | None = None
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def create_from_prefs(cls, engine_name: str, prefs: dict[str, object]) -> 'EngineSpecificSettings':
|
def create_from_prefs(cls, engine_name: str, prefs: dict[str, object]) -> 'EngineSpecificSettings':
|
||||||
@ -147,13 +150,15 @@ class EngineSpecificSettings(NamedTuple):
|
|||||||
sentence_delay = 0.
|
sentence_delay = 0.
|
||||||
with suppress(Exception):
|
with suppress(Exception):
|
||||||
sentence_delay = max(0, float(prefs.get('sentence_delay')))
|
sentence_delay = max(0, float(prefs.get('sentence_delay')))
|
||||||
|
with suppress(Exception):
|
||||||
|
preferred_voices = prefs.get('preferred_voices')
|
||||||
return EngineSpecificSettings(
|
return EngineSpecificSettings(
|
||||||
voice_name=str(prefs.get('voice', '')), output_module=om, sentence_delay=sentence_delay,
|
voice_name=str(prefs.get('voice', '')), output_module=om, sentence_delay=sentence_delay, preferred_voices=preferred_voices,
|
||||||
audio_device_id=audio_device_id, rate=rate, pitch=pitch, volume=volume, engine_name=engine_name)
|
audio_device_id=audio_device_id, rate=rate, pitch=pitch, volume=volume, engine_name=engine_name)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def create_from_config(cls, engine_name: str) -> 'EngineSpecificSettings':
|
def create_from_config(cls, engine_name: str, config_name: str = CONFIG_NAME) -> 'EngineSpecificSettings':
|
||||||
prefs = load_config().get('engines', {}).get(engine_name, {})
|
prefs = load_config(config_name).get('engines', {}).get(engine_name, {})
|
||||||
return cls.create_from_prefs(engine_name, prefs)
|
return cls.create_from_prefs(engine_name, prefs)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@ -173,10 +178,12 @@ class EngineSpecificSettings(NamedTuple):
|
|||||||
ans['output_module'] = self.output_module
|
ans['output_module'] = self.output_module
|
||||||
if self.sentence_delay:
|
if self.sentence_delay:
|
||||||
ans['sentence_delay'] = self.sentence_delay
|
ans['sentence_delay'] = self.sentence_delay
|
||||||
|
if self.preferred_voices:
|
||||||
|
ans['preferred_voices'] = self.preferred_voices
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
def save_to_config(self, prefs:JSONConfig | None = None):
|
def save_to_config(self, prefs:JSONConfig | None = None, config_name: str = CONFIG_NAME):
|
||||||
prefs = prefs or load_config()
|
prefs = prefs or load_config(config_name)
|
||||||
val = self.as_dict
|
val = self.as_dict
|
||||||
engines = prefs.get('engines', {})
|
engines = prefs.get('engines', {})
|
||||||
if not val:
|
if not val:
|
||||||
@ -307,10 +314,10 @@ class TTSBackend(QObject):
|
|||||||
engine_instances: dict[str, TTSBackend] = {}
|
engine_instances: dict[str, TTSBackend] = {}
|
||||||
|
|
||||||
|
|
||||||
def create_tts_backend(force_engine: str | None = None) -> TTSBackend:
|
def create_tts_backend(force_engine: str | None = None, config_name: str = CONFIG_NAME) -> TTSBackend:
|
||||||
if not available_engines():
|
if not available_engines():
|
||||||
raise OSError('There are no available TTS engines. Install a TTS engine before trying to use Read Aloud, such as flite or speech-dispatcher')
|
raise OSError('There are no available TTS engines. Install a TTS engine before trying to use Read Aloud, such as flite or speech-dispatcher')
|
||||||
prefs = load_config()
|
prefs = load_config(config_name)
|
||||||
engine_name = prefs.get('engine', '') if force_engine is None else force_engine
|
engine_name = prefs.get('engine', '') if force_engine is None else force_engine
|
||||||
engine_name = engine_name or default_engine_name()
|
engine_name = engine_name or default_engine_name()
|
||||||
if engine_name not in available_engines():
|
if engine_name not in available_engines():
|
||||||
|
Loading…
x
Reference in New Issue
Block a user