diff --git a/src/calibre/gui2/tts/config.py b/src/calibre/gui2/tts/config.py index 1822a65802..4412f06c7d 100644 --- a/src/calibre/gui2/tts/config.py +++ b/src/calibre/gui2/tts/config.py @@ -3,6 +3,7 @@ from qt.core import ( QCheckBox, + QDoubleSpinBox, QFont, QFormLayout, QHBoxLayout, @@ -71,6 +72,26 @@ class EngineChoice(QWidget): self.engine_description.setText(metadata.description) +class SentenceDelay(QDoubleSpinBox): + + def __init__(self, parent=None): + super().__init__(parent) + self.setRange(0., 2.) + self.setDecimals(2) + self.setSuffix(_(' seconds')) + self.setToolTip(_('The number of seconds to pause for at the end of a sentence.')) + self.setSpecialValueText(_('no pause')) + self.setSingleStep(0.05) + + @property + def val(self) -> str: + return max(0.0, self.value()) + + @val.setter + def val(self, v) -> None: + self.setValue(float(v)) + + class FloatSlider(QSlider): def __init__(self, minimum: float = -1, maximum: float = 1, factor: int = 10, parent=None): @@ -220,6 +241,8 @@ class EngineSpecificConfig(QWidget): self.engine_specific_settings = {} self.rate = r = FloatSlider(parent=self) l.addRow(_('&Speed of speech:'), r) + self.sentence_delay = d = SentenceDelay(parent=self) + l.addRow(_('&Pause after sentence:'), d) self.pitch = p = FloatSlider(parent=self) l.addRow(_('&Pitch of speech:'), p) self.volume = v = Volume(self) @@ -256,6 +279,7 @@ class EngineSpecificConfig(QWidget): else: self.layout().setRowVisible(self.output_module, False) self.output_module.blockSignals(False) + self.layout().setRowVisible(self.sentence_delay, metadata.has_sentence_delay) try: s = self.engine_specific_settings[self.engine_name] except KeyError: @@ -274,6 +298,8 @@ class EngineSpecificConfig(QWidget): else: self.layout().setRowVisible(self.volume, False) self.volume.val = None + if metadata.has_sentence_delay: + self.sentence_delay.val = s.sentence_delay self.audio_device.clear() if metadata.allows_choosing_audio_device: self.audio_device.addItem(_('System default (currently {})').format(self.default_audio_device.description), '') @@ -305,6 +331,8 @@ class EngineSpecificConfig(QWidget): engine_name=self.engine_name, rate=self.rate.val, voice_name=self.voices.val, pitch=self.pitch.val, volume=self.volume.val) metadata = available_engines()[self.engine_name] + if metadata.has_sentence_delay: + ans = ans._replace(sentence_delay=self.sentence_delay.val) if metadata.has_multiple_output_modules and self.output_module.currentIndex() > 0: ans = ans._replace(output_module=self.output_module.currentData()) if metadata.allows_choosing_audio_device and self.audio_device.currentIndex() > 0: diff --git a/src/calibre/gui2/tts/piper.py b/src/calibre/gui2/tts/piper.py index 7386ef4e1a..1dcbd43c53 100644 --- a/src/calibre/gui2/tts/piper.py +++ b/src/calibre/gui2/tts/piper.py @@ -286,7 +286,7 @@ class Piper(TTSBackend): cmdline = list(piper_cmdline()) + [ '--model', model_path, '--config', config_path, '--output-raw', '--json-input', - '--sentence-silence', '0', '--length_scale', str(length_scale)] + '--sentence-silence', str(s.sentence_delay), '--length_scale', str(length_scale)] if is_debugging(): cmdline.append('--debug') self._process.setProgram(cmdline[0]) diff --git a/src/calibre/gui2/tts/types.py b/src/calibre/gui2/tts/types.py index ceec7fd934..5697c2b5f9 100644 --- a/src/calibre/gui2/tts/types.py +++ b/src/calibre/gui2/tts/types.py @@ -39,6 +39,7 @@ class EngineMetadata(NamedTuple): can_change_volume: bool = True voices_have_quality_metadata: bool = False has_managed_voices: bool = False + has_sentence_delay: bool = False class Quality(Enum): @@ -122,6 +123,7 @@ class EngineSpecificSettings(NamedTuple): volume: float | None = None # 0 to 1, None is platform default volume output_module: str = '' engine_name: str = '' + sentence_delay: float = 0 # seconds >= 0 @classmethod def create_from_prefs(cls, engine_name: str, prefs: dict[str, object]) -> 'EngineSpecificSettings': @@ -142,8 +144,11 @@ class EngineSpecificSettings(NamedTuple): with suppress(Exception): volume = max(0, min(float(prefs.get('volume')), 1)) om = str(prefs.get('output_module', '')) + sentence_delay = 0. + with suppress(Exception): + sentence_delay = max(0, float(prefs.get('sentence_delay'))) return EngineSpecificSettings( - voice_name=str(prefs.get('voice', '')), output_module=om, + voice_name=str(prefs.get('voice', '')), output_module=om, sentence_delay=sentence_delay, audio_device_id=audio_device_id, rate=rate, pitch=pitch, volume=volume, engine_name=engine_name) @classmethod @@ -166,6 +171,8 @@ class EngineSpecificSettings(NamedTuple): ans['volume'] = self.volume if self.output_module: ans['output_module'] = self.output_module + if self.sentence_delay: + ans['sentence_delay'] = self.sentence_delay return ans def save_to_config(self, prefs:JSONConfig | None = None): @@ -219,7 +226,8 @@ def available_engines() -> dict[str, EngineMetadata]: ans['piper'] = EngineMetadata('piper', _('The Piper Neural Engine'), _( 'The "piper" engine can track the currently spoken sentence on screen. It uses a neural network ' 'for natural sounding voices. The neural network is run locally on your computer, it is fairly resource intensive to run.' - ), TrackingCapability.Sentence, can_change_pitch=False, voices_have_quality_metadata=True, has_managed_voices=True) + ), TrackingCapability.Sentence, can_change_pitch=False, voices_have_quality_metadata=True, has_managed_voices=True, + has_sentence_delay=True) if islinux: try: from speechd.paths import SPD_SPAWN_CMD