From b4c2478948ac96b9961040028496b6ef697b7b98 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 17 Oct 2024 14:13:22 +0530 Subject: [PATCH] More work on UI for TTS embed --- src/calibre/ebooks/oeb/polish/tts.py | 29 ++++-- src/calibre/gui2/tts/piper.py | 7 +- src/calibre/gui2/tweak_book/tts.py | 135 ++++++++++++++++++++++++++- 3 files changed, 156 insertions(+), 15 deletions(-) diff --git a/src/calibre/ebooks/oeb/polish/tts.py b/src/calibre/ebooks/oeb/polish/tts.py index 17678e0740..4afe26f9f5 100644 --- a/src/calibre/ebooks/oeb/polish/tts.py +++ b/src/calibre/ebooks/oeb/polish/tts.py @@ -7,6 +7,7 @@ import os import sys from collections import defaultdict from contextlib import suppress +from functools import partial from typing import NamedTuple from lxml.etree import ElementBase as Element @@ -380,14 +381,15 @@ class ReportProgress: def __init__(self): self.current_stage = '' - def __call__(self, stage: str, item: str, count: int, total: int) -> None: + def __call__(self, stage: str, item: str, count: int, total: int) -> bool: if stage != self.current_stage: self.current_stage = stage print() print(self.current_stage) - return + return False frac = count / total print(f'\r{frac:4.0%} {item}', end='') + return False def make_par(container, seq, html_href, audio_href, elem_id, pos, duration) -> None: @@ -436,12 +438,13 @@ def remove_embedded_tts(container): container.remove_item(aname) -def embed_tts(container, report_progress=None, parent_widget=None): +def embed_tts(container, report_progress=None, callback_to_download_voices=None): report_progress = report_progress or ReportProgress() if container.book_type != 'epub': raise UnsupportedContainerType(_('Only the EPUB format has support for embedding speech overlay audio')) if container.opf_version_parsed[0] < 3: - report_progress(_('Updating book internals'), '', 0, 0) + if report_progress(_('Updating book internals'), '', 0, 0): + return False upgrade_book(container, print) remove_embedded_tts(container) @@ -455,7 +458,8 @@ def embed_tts(container, report_progress=None, parent_widget=None): if container.mime_map.get(name) in OEB_DOCS: name_map[name] = PerFileData(name) stage = _('Processing HTML') - report_progress(stage, '', 0, len(name_map)) + if report_progress(stage, '', 0, len(name_map)): + return False all_voices = set() total_num_sentences = 0 for i, (name, pfd) in enumerate(name_map.items()): @@ -467,10 +471,16 @@ def embed_tts(container, report_progress=None, parent_widget=None): pfd.key_map[key].append(s) all_voices.add(key) container.dirty(name) - report_progress(stage, name, i+1, len(name_map)) - piper.ensure_voices_downloaded(iter(all_voices), parent=parent_widget) + if report_progress(stage, name, i+1, len(name_map)): + return False + if callback_to_download_voices is None: + piper.ensure_voices_downloaded(iter(all_voices)) + else: + if not callback_to_download_voices(partial(piper.ensure_voices_downloaded, iter(all_voices))): + return False stage = _('Converting text to speech') - report_progress(stage, '', 0, total_num_sentences) + if report_progress(stage, '', 0, total_num_sentences): + return False snum = 0 size_of_audio_data = 0 mmap = {container.href_to_name(item.get('href'), container.opf_name):item for item in container.manifest_items} @@ -484,7 +494,8 @@ def embed_tts(container, report_progress=None, parent_widget=None): audio_map[s] = audio_data, duration size_of_audio_data += len(audio_data) snum += 1 - report_progress(stage, _('Sentence number: {}').format(snum), snum, total_num_sentences) + if report_progress(stage, _('Sentence number: {}').format(snum), snum, total_num_sentences): + return False wav = io.BytesIO() wav.write(wav_header_for_pcm_data(size_of_audio_data, HIGH_QUALITY_SAMPLE_RATE)) afitem = container.generate_item(name + '.m4a', id_prefix='tts-') diff --git a/src/calibre/gui2/tts/piper.py b/src/calibre/gui2/tts/piper.py index 8279897eab..840e227877 100644 --- a/src/calibre/gui2/tts/piper.py +++ b/src/calibre/gui2/tts/piper.py @@ -628,10 +628,13 @@ class PiperEmbedded: raw_data = resample_raw_audio_16bit(raw_data, self._current_audio_rate, sample_rate) yield raw_data, duration_of_raw_audio_data(raw_data, sample_rate) - def ensure_voices_downloaded(self, specs: Iterable[tuple[str, str]], parent: QObject = None) -> None: + def ensure_voices_downloaded(self, specs: Iterable[tuple[str, str]], parent: QObject = None) -> bool: for lang, voice_name in specs: voice = self.resolve_voice(lang, voice_name) - download_voice(voice, parent=parent, headless=parent is None) + m, c = download_voice(voice, parent=parent, headless=parent is None) + if not m: + return False + return True def shutdown(self): if self._process is not None: diff --git a/src/calibre/gui2/tweak_book/tts.py b/src/calibre/gui2/tweak_book/tts.py index a171dea8d0..bc5ad482a1 100644 --- a/src/calibre/gui2/tweak_book/tts.py +++ b/src/calibre/gui2/tweak_book/tts.py @@ -1,14 +1,18 @@ #!/usr/bin/env python # License: GPLv3 Copyright: 2024, Kovid Goyal +import os import sys +import traceback -from qt.core import QHBoxLayout, QStackedLayout, QTextBrowser, QVBoxLayout, QWidget +from qt.core import QDialogButtonBox, QHBoxLayout, QIcon, QStackedLayout, Qt, QTextBrowser, QVBoxLayout, QWidget, pyqtSignal +from calibre.gui2 import error_dialog from calibre.gui2.tweak_book.widgets import Dialog +from calibre.gui2.widgets import BusyCursor -class ConfigWidget(QWidget): +class EngineSettingsWidget(QWidget): def __init__(self, parent=None): from calibre.gui2.tts.config import EmbeddingConfig @@ -18,20 +22,138 @@ class ConfigWidget(QWidget): self.conf = c = EmbeddingConfig(self) h.addWidget(c) self.help = q = QTextBrowser(self) - h.addWidget(q) + h.addWidget(q, 10) + q.setHtml(_(''' +

Add Text-to-speech narration

+ +

Add an audio overlay to this book using Text-to-speech technology. Then users reading this book in a reader that supports +audio overlays, such as the calibre viewer, will be able to hear the text read to them, if they wish. + +

You can mark different passages to be spoken by different voices as shown in the example below: + +

<p data-calibre-tts="{0}">This will be voiced by "{0}"</p>
+
<p data-calibre-tts="{1}">This will be voiced by "{1}"</p>
+ +

Note that generating the Text-to-speech audio will be quite slow, +at the rate of approximately one sentence per couple of seconds, depending on your computer's hardware, +so consider leave it running overnight. +''').format('cory', 'ryan')) + self.save_settings = c.save_settings + + +class Progress(QWidget): + + cancel_requested: bool = False + current_stage: str = '' + + def __init__(self, parent: QWidget = None): + super().__init__(parent) + self.v = v = QVBoxLayout(self) + v.setAlignment(Qt.AlignmentFlag.AlignCenter) + v.setContentsMargins(0, 0, 0, 0) + + def __call__(self, stage: str, item: str, count: int, total: int) -> bool: + return self.cancel_requested class TTSEmbed(Dialog): + report_progress = pyqtSignal(object, object) + worker_done = pyqtSignal(object) + ensure_voices_downloaded_signal = pyqtSignal(object, object) + def __init__(self, container, parent=None): self.container = container + from threading import Thread + self.worker_thread = Thread(target=self.worker, daemon=True) + self.worker_done.connect(self.on_worker_done, type=Qt.ConnectionType.QueuedConnection) + self.ensure_voices_downloaded_signal.connect(self.do_ensure_voices_downloaded, type=Qt.ConnectionType.QueuedConnection) super().__init__(_('Add Text-to-speech narration'), 'tts-overlay-dialog', parent=parent) def setup_ui(self): self.v = v = QVBoxLayout(self) - self.stack = s = QStackedLayout(self) + self.engine_settings_widget = e = EngineSettingsWidget(self) + self.stack = s = QStackedLayout() + s.addWidget(e) + s.setCurrentIndex(0) v.addLayout(s) + + self.progress = p = Progress(self) + self.report_progress.connect(self.do_report_progress, type=Qt.ConnectionType.QueuedConnection) + s.addWidget(p) + + self.remove_media_button = b = self.bb.addButton(_('&Remove existing audio'), QDialogButtonBox.ButtonRole.ActionRole) + b.setToolTip(_('Remove any exisiting audio overlays, such as a previously created Text-to-speech narration from this book')) + b.setIcon(QIcon.ic('trash.png')) + b.clicked.connect(self.remove_media) v.addWidget(self.bb) + self.update_button_box() + self.stack.currentChanged.connect(self.update_button_box) + + def update_button_box(self): + if self.stack.currentIndex() == 0: + self.bb.setStandardButtons(QDialogButtonBox.StandardButton.Ok | QDialogButtonBox.StandardButton.Cancel) + self.remove_media_button.setVisible(True) + else: + self.bb.setStandardButtons(QDialogButtonBox.StandardButton.Cancel) + self.remove_media_button.setVisible(False) + + def remove_media(self): + from calibre.ebooks.oeb.polish.tts import remove_embedded_tts + remove_embedded_tts(self.container) + super().accept() + + def accept(self): + if self.stack.currentIndex() == 0: + self.engine_settings_widget.save_settings() + self.stack.setCurrentIndex(1) + self.worker_thread.start() + + def do_report_progress(self, a, kw): + self.progress(*a, **kw) + + def worker(self): + from calibre.ebooks.oeb.polish.tts import embed_tts + def report_progress(*a, **kw): + self.report_progress.emit(a, kw) + return self.progress.cancel_requested + try: + err = embed_tts(self.container, report_progress, self.ensure_voices_downloaded) + except Exception as e: + err = e + err.det_msg = traceback.format_exc() + self.worker_done.emit(err) + + def ensure_voices_downloaded(self, callback): + from queue import Queue + queue = Queue() + self.ensure_voices_downloaded_signal.emit(callback, queue) + e = queue.get() + if isinstance(e, Exception): + raise e + return e + + def do_ensure_voices_downloaded(self, callback, queue): + try: + queue.put(callback(self)) + except Exception as e: + e.det_msg = traceback.format_exc() + queue.put(e) + + def on_worker_done(self, err_or_ok): + if isinstance(err_or_ok, Exception): + error_dialog(self, _('Text-to-speech narration failed'), str(err_or_ok), det_msg=getattr(err_or_ok, 'det_msg', ''), show=True) + return super().reject() + return super().accept() if err_or_ok else super().reject() + + def reject(self): + if self.stack.currentIndex() == 0: + return super().reject() + with BusyCursor(): + self.progress.cancel_requested = True + self.bb.setEnabled(False) + return super().reject() + def develop(): @@ -44,6 +166,11 @@ def develop(): d.exec() del d del app + b, e = os.path.splitext(path) + outpath = b + '-tts' + e + container.commit(outpath) + print('Output saved to:', outpath) + if __name__ == '__main__':