mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
More work on UI for TTS embed
This commit is contained in:
parent
7cbfd2a0df
commit
b4c2478948
@ -7,6 +7,7 @@ import os
|
|||||||
import sys
|
import sys
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from contextlib import suppress
|
from contextlib import suppress
|
||||||
|
from functools import partial
|
||||||
from typing import NamedTuple
|
from typing import NamedTuple
|
||||||
|
|
||||||
from lxml.etree import ElementBase as Element
|
from lxml.etree import ElementBase as Element
|
||||||
@ -380,14 +381,15 @@ class ReportProgress:
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.current_stage = ''
|
self.current_stage = ''
|
||||||
|
|
||||||
def __call__(self, stage: str, item: str, count: int, total: int) -> None:
|
def __call__(self, stage: str, item: str, count: int, total: int) -> bool:
|
||||||
if stage != self.current_stage:
|
if stage != self.current_stage:
|
||||||
self.current_stage = stage
|
self.current_stage = stage
|
||||||
print()
|
print()
|
||||||
print(self.current_stage)
|
print(self.current_stage)
|
||||||
return
|
return False
|
||||||
frac = count / total
|
frac = count / total
|
||||||
print(f'\r{frac:4.0%} {item}', end='')
|
print(f'\r{frac:4.0%} {item}', end='')
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def make_par(container, seq, html_href, audio_href, elem_id, pos, duration) -> None:
|
def make_par(container, seq, html_href, audio_href, elem_id, pos, duration) -> None:
|
||||||
@ -436,12 +438,13 @@ def remove_embedded_tts(container):
|
|||||||
container.remove_item(aname)
|
container.remove_item(aname)
|
||||||
|
|
||||||
|
|
||||||
def embed_tts(container, report_progress=None, parent_widget=None):
|
def embed_tts(container, report_progress=None, callback_to_download_voices=None):
|
||||||
report_progress = report_progress or ReportProgress()
|
report_progress = report_progress or ReportProgress()
|
||||||
if container.book_type != 'epub':
|
if container.book_type != 'epub':
|
||||||
raise UnsupportedContainerType(_('Only the EPUB format has support for embedding speech overlay audio'))
|
raise UnsupportedContainerType(_('Only the EPUB format has support for embedding speech overlay audio'))
|
||||||
if container.opf_version_parsed[0] < 3:
|
if container.opf_version_parsed[0] < 3:
|
||||||
report_progress(_('Updating book internals'), '', 0, 0)
|
if report_progress(_('Updating book internals'), '', 0, 0):
|
||||||
|
return False
|
||||||
upgrade_book(container, print)
|
upgrade_book(container, print)
|
||||||
remove_embedded_tts(container)
|
remove_embedded_tts(container)
|
||||||
|
|
||||||
@ -455,7 +458,8 @@ def embed_tts(container, report_progress=None, parent_widget=None):
|
|||||||
if container.mime_map.get(name) in OEB_DOCS:
|
if container.mime_map.get(name) in OEB_DOCS:
|
||||||
name_map[name] = PerFileData(name)
|
name_map[name] = PerFileData(name)
|
||||||
stage = _('Processing HTML')
|
stage = _('Processing HTML')
|
||||||
report_progress(stage, '', 0, len(name_map))
|
if report_progress(stage, '', 0, len(name_map)):
|
||||||
|
return False
|
||||||
all_voices = set()
|
all_voices = set()
|
||||||
total_num_sentences = 0
|
total_num_sentences = 0
|
||||||
for i, (name, pfd) in enumerate(name_map.items()):
|
for i, (name, pfd) in enumerate(name_map.items()):
|
||||||
@ -467,10 +471,16 @@ def embed_tts(container, report_progress=None, parent_widget=None):
|
|||||||
pfd.key_map[key].append(s)
|
pfd.key_map[key].append(s)
|
||||||
all_voices.add(key)
|
all_voices.add(key)
|
||||||
container.dirty(name)
|
container.dirty(name)
|
||||||
report_progress(stage, name, i+1, len(name_map))
|
if report_progress(stage, name, i+1, len(name_map)):
|
||||||
piper.ensure_voices_downloaded(iter(all_voices), parent=parent_widget)
|
return False
|
||||||
|
if callback_to_download_voices is None:
|
||||||
|
piper.ensure_voices_downloaded(iter(all_voices))
|
||||||
|
else:
|
||||||
|
if not callback_to_download_voices(partial(piper.ensure_voices_downloaded, iter(all_voices))):
|
||||||
|
return False
|
||||||
stage = _('Converting text to speech')
|
stage = _('Converting text to speech')
|
||||||
report_progress(stage, '', 0, total_num_sentences)
|
if report_progress(stage, '', 0, total_num_sentences):
|
||||||
|
return False
|
||||||
snum = 0
|
snum = 0
|
||||||
size_of_audio_data = 0
|
size_of_audio_data = 0
|
||||||
mmap = {container.href_to_name(item.get('href'), container.opf_name):item for item in container.manifest_items}
|
mmap = {container.href_to_name(item.get('href'), container.opf_name):item for item in container.manifest_items}
|
||||||
@ -484,7 +494,8 @@ def embed_tts(container, report_progress=None, parent_widget=None):
|
|||||||
audio_map[s] = audio_data, duration
|
audio_map[s] = audio_data, duration
|
||||||
size_of_audio_data += len(audio_data)
|
size_of_audio_data += len(audio_data)
|
||||||
snum += 1
|
snum += 1
|
||||||
report_progress(stage, _('Sentence number: {}').format(snum), snum, total_num_sentences)
|
if report_progress(stage, _('Sentence number: {}').format(snum), snum, total_num_sentences):
|
||||||
|
return False
|
||||||
wav = io.BytesIO()
|
wav = io.BytesIO()
|
||||||
wav.write(wav_header_for_pcm_data(size_of_audio_data, HIGH_QUALITY_SAMPLE_RATE))
|
wav.write(wav_header_for_pcm_data(size_of_audio_data, HIGH_QUALITY_SAMPLE_RATE))
|
||||||
afitem = container.generate_item(name + '.m4a', id_prefix='tts-')
|
afitem = container.generate_item(name + '.m4a', id_prefix='tts-')
|
||||||
|
@ -628,10 +628,13 @@ class PiperEmbedded:
|
|||||||
raw_data = resample_raw_audio_16bit(raw_data, self._current_audio_rate, sample_rate)
|
raw_data = resample_raw_audio_16bit(raw_data, self._current_audio_rate, sample_rate)
|
||||||
yield raw_data, duration_of_raw_audio_data(raw_data, sample_rate)
|
yield raw_data, duration_of_raw_audio_data(raw_data, sample_rate)
|
||||||
|
|
||||||
def ensure_voices_downloaded(self, specs: Iterable[tuple[str, str]], parent: QObject = None) -> None:
|
def ensure_voices_downloaded(self, specs: Iterable[tuple[str, str]], parent: QObject = None) -> bool:
|
||||||
for lang, voice_name in specs:
|
for lang, voice_name in specs:
|
||||||
voice = self.resolve_voice(lang, voice_name)
|
voice = self.resolve_voice(lang, voice_name)
|
||||||
download_voice(voice, parent=parent, headless=parent is None)
|
m, c = download_voice(voice, parent=parent, headless=parent is None)
|
||||||
|
if not m:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
def shutdown(self):
|
def shutdown(self):
|
||||||
if self._process is not None:
|
if self._process is not None:
|
||||||
|
@ -1,14 +1,18 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# License: GPLv3 Copyright: 2024, Kovid Goyal <kovid at kovidgoyal.net>
|
# License: GPLv3 Copyright: 2024, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
|
||||||
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
import traceback
|
||||||
|
|
||||||
from qt.core import QHBoxLayout, QStackedLayout, QTextBrowser, QVBoxLayout, QWidget
|
from qt.core import QDialogButtonBox, QHBoxLayout, QIcon, QStackedLayout, Qt, QTextBrowser, QVBoxLayout, QWidget, pyqtSignal
|
||||||
|
|
||||||
|
from calibre.gui2 import error_dialog
|
||||||
from calibre.gui2.tweak_book.widgets import Dialog
|
from calibre.gui2.tweak_book.widgets import Dialog
|
||||||
|
from calibre.gui2.widgets import BusyCursor
|
||||||
|
|
||||||
|
|
||||||
class ConfigWidget(QWidget):
|
class EngineSettingsWidget(QWidget):
|
||||||
|
|
||||||
def __init__(self, parent=None):
|
def __init__(self, parent=None):
|
||||||
from calibre.gui2.tts.config import EmbeddingConfig
|
from calibre.gui2.tts.config import EmbeddingConfig
|
||||||
@ -18,20 +22,138 @@ class ConfigWidget(QWidget):
|
|||||||
self.conf = c = EmbeddingConfig(self)
|
self.conf = c = EmbeddingConfig(self)
|
||||||
h.addWidget(c)
|
h.addWidget(c)
|
||||||
self.help = q = QTextBrowser(self)
|
self.help = q = QTextBrowser(self)
|
||||||
h.addWidget(q)
|
h.addWidget(q, 10)
|
||||||
|
q.setHtml(_('''
|
||||||
|
<h2>Add Text-to-speech narration</h2>
|
||||||
|
|
||||||
|
<p>Add an audio overlay to this book using Text-to-speech technology. Then users reading this book in a reader that supports
|
||||||
|
audio overlays, such as the calibre viewer, will be able to hear the text read to them, if they wish.
|
||||||
|
|
||||||
|
<p>You can mark different passages to be spoken by different voices as shown in the example below:
|
||||||
|
|
||||||
|
<div><code><p data-calibre-tts="{0}">This will be voiced by "{0}"</p></code></div>
|
||||||
|
<div><code><p data-calibre-tts="{1}">This will be voiced by "{1}"</p></code></div>
|
||||||
|
|
||||||
|
<p style="font-size: small">Note that generating the Text-to-speech audio will be quite slow,
|
||||||
|
at the rate of approximately one sentence per couple of seconds, depending on your computer's hardware,
|
||||||
|
so consider leave it running overnight.
|
||||||
|
''').format('cory', 'ryan'))
|
||||||
|
self.save_settings = c.save_settings
|
||||||
|
|
||||||
|
|
||||||
|
class Progress(QWidget):
|
||||||
|
|
||||||
|
cancel_requested: bool = False
|
||||||
|
current_stage: str = ''
|
||||||
|
|
||||||
|
def __init__(self, parent: QWidget = None):
|
||||||
|
super().__init__(parent)
|
||||||
|
self.v = v = QVBoxLayout(self)
|
||||||
|
v.setAlignment(Qt.AlignmentFlag.AlignCenter)
|
||||||
|
v.setContentsMargins(0, 0, 0, 0)
|
||||||
|
|
||||||
|
def __call__(self, stage: str, item: str, count: int, total: int) -> bool:
|
||||||
|
return self.cancel_requested
|
||||||
|
|
||||||
|
|
||||||
class TTSEmbed(Dialog):
|
class TTSEmbed(Dialog):
|
||||||
|
|
||||||
|
report_progress = pyqtSignal(object, object)
|
||||||
|
worker_done = pyqtSignal(object)
|
||||||
|
ensure_voices_downloaded_signal = pyqtSignal(object, object)
|
||||||
|
|
||||||
def __init__(self, container, parent=None):
|
def __init__(self, container, parent=None):
|
||||||
self.container = container
|
self.container = container
|
||||||
|
from threading import Thread
|
||||||
|
self.worker_thread = Thread(target=self.worker, daemon=True)
|
||||||
|
self.worker_done.connect(self.on_worker_done, type=Qt.ConnectionType.QueuedConnection)
|
||||||
|
self.ensure_voices_downloaded_signal.connect(self.do_ensure_voices_downloaded, type=Qt.ConnectionType.QueuedConnection)
|
||||||
super().__init__(_('Add Text-to-speech narration'), 'tts-overlay-dialog', parent=parent)
|
super().__init__(_('Add Text-to-speech narration'), 'tts-overlay-dialog', parent=parent)
|
||||||
|
|
||||||
def setup_ui(self):
|
def setup_ui(self):
|
||||||
self.v = v = QVBoxLayout(self)
|
self.v = v = QVBoxLayout(self)
|
||||||
self.stack = s = QStackedLayout(self)
|
self.engine_settings_widget = e = EngineSettingsWidget(self)
|
||||||
|
self.stack = s = QStackedLayout()
|
||||||
|
s.addWidget(e)
|
||||||
|
s.setCurrentIndex(0)
|
||||||
v.addLayout(s)
|
v.addLayout(s)
|
||||||
|
|
||||||
|
self.progress = p = Progress(self)
|
||||||
|
self.report_progress.connect(self.do_report_progress, type=Qt.ConnectionType.QueuedConnection)
|
||||||
|
s.addWidget(p)
|
||||||
|
|
||||||
|
self.remove_media_button = b = self.bb.addButton(_('&Remove existing audio'), QDialogButtonBox.ButtonRole.ActionRole)
|
||||||
|
b.setToolTip(_('Remove any exisiting audio overlays, such as a previously created Text-to-speech narration from this book'))
|
||||||
|
b.setIcon(QIcon.ic('trash.png'))
|
||||||
|
b.clicked.connect(self.remove_media)
|
||||||
v.addWidget(self.bb)
|
v.addWidget(self.bb)
|
||||||
|
self.update_button_box()
|
||||||
|
self.stack.currentChanged.connect(self.update_button_box)
|
||||||
|
|
||||||
|
def update_button_box(self):
|
||||||
|
if self.stack.currentIndex() == 0:
|
||||||
|
self.bb.setStandardButtons(QDialogButtonBox.StandardButton.Ok | QDialogButtonBox.StandardButton.Cancel)
|
||||||
|
self.remove_media_button.setVisible(True)
|
||||||
|
else:
|
||||||
|
self.bb.setStandardButtons(QDialogButtonBox.StandardButton.Cancel)
|
||||||
|
self.remove_media_button.setVisible(False)
|
||||||
|
|
||||||
|
def remove_media(self):
|
||||||
|
from calibre.ebooks.oeb.polish.tts import remove_embedded_tts
|
||||||
|
remove_embedded_tts(self.container)
|
||||||
|
super().accept()
|
||||||
|
|
||||||
|
def accept(self):
|
||||||
|
if self.stack.currentIndex() == 0:
|
||||||
|
self.engine_settings_widget.save_settings()
|
||||||
|
self.stack.setCurrentIndex(1)
|
||||||
|
self.worker_thread.start()
|
||||||
|
|
||||||
|
def do_report_progress(self, a, kw):
|
||||||
|
self.progress(*a, **kw)
|
||||||
|
|
||||||
|
def worker(self):
|
||||||
|
from calibre.ebooks.oeb.polish.tts import embed_tts
|
||||||
|
def report_progress(*a, **kw):
|
||||||
|
self.report_progress.emit(a, kw)
|
||||||
|
return self.progress.cancel_requested
|
||||||
|
try:
|
||||||
|
err = embed_tts(self.container, report_progress, self.ensure_voices_downloaded)
|
||||||
|
except Exception as e:
|
||||||
|
err = e
|
||||||
|
err.det_msg = traceback.format_exc()
|
||||||
|
self.worker_done.emit(err)
|
||||||
|
|
||||||
|
def ensure_voices_downloaded(self, callback):
|
||||||
|
from queue import Queue
|
||||||
|
queue = Queue()
|
||||||
|
self.ensure_voices_downloaded_signal.emit(callback, queue)
|
||||||
|
e = queue.get()
|
||||||
|
if isinstance(e, Exception):
|
||||||
|
raise e
|
||||||
|
return e
|
||||||
|
|
||||||
|
def do_ensure_voices_downloaded(self, callback, queue):
|
||||||
|
try:
|
||||||
|
queue.put(callback(self))
|
||||||
|
except Exception as e:
|
||||||
|
e.det_msg = traceback.format_exc()
|
||||||
|
queue.put(e)
|
||||||
|
|
||||||
|
def on_worker_done(self, err_or_ok):
|
||||||
|
if isinstance(err_or_ok, Exception):
|
||||||
|
error_dialog(self, _('Text-to-speech narration failed'), str(err_or_ok), det_msg=getattr(err_or_ok, 'det_msg', ''), show=True)
|
||||||
|
return super().reject()
|
||||||
|
return super().accept() if err_or_ok else super().reject()
|
||||||
|
|
||||||
|
def reject(self):
|
||||||
|
if self.stack.currentIndex() == 0:
|
||||||
|
return super().reject()
|
||||||
|
with BusyCursor():
|
||||||
|
self.progress.cancel_requested = True
|
||||||
|
self.bb.setEnabled(False)
|
||||||
|
return super().reject()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def develop():
|
def develop():
|
||||||
@ -44,6 +166,11 @@ def develop():
|
|||||||
d.exec()
|
d.exec()
|
||||||
del d
|
del d
|
||||||
del app
|
del app
|
||||||
|
b, e = os.path.splitext(path)
|
||||||
|
outpath = b + '-tts' + e
|
||||||
|
container.commit(outpath)
|
||||||
|
print('Output saved to:', outpath)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
Loading…
x
Reference in New Issue
Block a user