mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
More work on TTS
This commit is contained in:
parent
15020ede75
commit
1813a15653
@ -278,8 +278,7 @@ class EngineSpecificConfig(QWidget):
|
|||||||
|
|
||||||
class ConfigDialog(Dialog):
|
class ConfigDialog(Dialog):
|
||||||
|
|
||||||
def __init__(self, current_tts_backend, parent=None):
|
def __init__(self, parent=None):
|
||||||
self.current_tts_backend = current_tts_backend
|
|
||||||
super().__init__(_('Configure Read aloud'), 'configure-read-aloud2', parent=parent)
|
super().__init__(_('Configure Read aloud'), 'configure-read-aloud2', parent=parent)
|
||||||
|
|
||||||
def setup_ui(self):
|
def setup_ui(self):
|
||||||
@ -307,8 +306,10 @@ class ConfigDialog(Dialog):
|
|||||||
def develop():
|
def develop():
|
||||||
from calibre.gui2 import Application
|
from calibre.gui2 import Application
|
||||||
app = Application([])
|
app = Application([])
|
||||||
d = ConfigDialog(create_tts_backend(app))
|
d = ConfigDialog()
|
||||||
d.exec()
|
d.exec()
|
||||||
|
del d
|
||||||
|
del app
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -2,10 +2,11 @@
|
|||||||
# License: GPLv3 Copyright: 2024, Kovid Goyal <kovid at kovidgoyal.net>
|
# License: GPLv3 Copyright: 2024, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
|
||||||
|
|
||||||
from qt.core import QAction, QPlainTextEdit, QToolBar
|
from qt.core import QAction, QKeySequence, QPlainTextEdit, Qt, QTextCursor, QTextToSpeech, QToolBar
|
||||||
|
|
||||||
from calibre.gui2 import Application
|
from calibre.gui2 import Application
|
||||||
from calibre.gui2.main_window import MainWindow
|
from calibre.gui2.main_window import MainWindow
|
||||||
|
from calibre.gui2.tts2.manager import TTSManager
|
||||||
|
|
||||||
TEXT = '''\
|
TEXT = '''\
|
||||||
Demonstration of DOCX support in calibre
|
Demonstration of DOCX support in calibre
|
||||||
@ -21,36 +22,81 @@ Set the output format in the top right corner of the conversion dialog to EPUB o
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
|
|
||||||
def to_marked_text(text=TEXT):
|
|
||||||
pos = 0
|
|
||||||
for word in text.split():
|
|
||||||
yield pos
|
|
||||||
yield word
|
|
||||||
yield ' '
|
|
||||||
pos += 1 + len(word)
|
|
||||||
|
|
||||||
|
|
||||||
class MainWindow(MainWindow):
|
class MainWindow(MainWindow):
|
||||||
|
|
||||||
def __init__(self, text):
|
def __init__(self, text):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.display = d = QPlainTextEdit(self)
|
self.display = d = QPlainTextEdit(self)
|
||||||
self.toolbar = tb = QToolBar(self)
|
self.toolbar = tb = QToolBar(self)
|
||||||
|
self.tts = TTSManager(self)
|
||||||
|
self.tts.state_changed.connect(self.state_changed, type=Qt.ConnectionType.QueuedConnection)
|
||||||
|
self.tts.saying.connect(self.saying)
|
||||||
self.addToolBar(tb)
|
self.addToolBar(tb)
|
||||||
self.setCentralWidget(d)
|
self.setCentralWidget(d)
|
||||||
d.setPlainText(text)
|
d.setPlainText(text)
|
||||||
d.setReadOnly(True)
|
d.setReadOnly(True)
|
||||||
self.marked_text = to_marked_text(text)
|
c = d.textCursor()
|
||||||
self.resize(self.sizeHint())
|
c.setPosition(0)
|
||||||
|
marked_text = []
|
||||||
|
while True:
|
||||||
|
marked_text.append(c.position())
|
||||||
|
if not c.movePosition(QTextCursor.MoveOperation.NextWord, QTextCursor.MoveMode.KeepAnchor):
|
||||||
|
break
|
||||||
|
marked_text.append(c.selectedText())
|
||||||
|
c.setPosition(c.position())
|
||||||
|
c.setPosition(0)
|
||||||
|
self.marked_text = marked_text
|
||||||
self.play_action = pa = QAction('Play')
|
self.play_action = pa = QAction('Play')
|
||||||
|
pa.setShortcut(QKeySequence(Qt.Key.Key_Space))
|
||||||
pa.setCheckable(True)
|
pa.setCheckable(True)
|
||||||
|
pa.toggled.connect(self.toggled)
|
||||||
self.toolbar.addAction(pa)
|
self.toolbar.addAction(pa)
|
||||||
|
self.stop_action = sa = QAction('Stop')
|
||||||
|
sa.setShortcut(QKeySequence(Qt.Key.Key_Escape))
|
||||||
|
sa.triggered.connect(self.tts.stop)
|
||||||
|
self.toolbar.addAction(sa)
|
||||||
self.faster_action = fa = QAction('Faster')
|
self.faster_action = fa = QAction('Faster')
|
||||||
self.toolbar.addAction(fa)
|
self.toolbar.addAction(fa)
|
||||||
self.slower_action = sa = QAction('Slower')
|
self.slower_action = sa = QAction('Slower')
|
||||||
self.toolbar.addAction(sa)
|
self.toolbar.addAction(sa)
|
||||||
self.configure_action = ca = QAction('Configure')
|
self.configure_action = ca = QAction('Configure')
|
||||||
self.toolbar.addAction(ca)
|
self.toolbar.addAction(ca)
|
||||||
|
ca.triggered.connect(self.tts.configure)
|
||||||
|
|
||||||
|
self.state_changed(self.tts.state)
|
||||||
|
self.resize(self.sizeHint())
|
||||||
|
|
||||||
|
def state_changed(self, state):
|
||||||
|
self.statusBar().showMessage(str(state))
|
||||||
|
if state in (QTextToSpeech.State.Ready, QTextToSpeech.State.Paused, QTextToSpeech.State.Error):
|
||||||
|
self.play_action.setChecked(False)
|
||||||
|
if state is QTextToSpeech.State.Ready:
|
||||||
|
c = self.display.textCursor()
|
||||||
|
c.setPosition(0)
|
||||||
|
self.display.setTextCursor(c)
|
||||||
|
else:
|
||||||
|
self.play_action.setChecked(True)
|
||||||
|
self.stop_action.setEnabled(state in (QTextToSpeech.State.Speaking, QTextToSpeech.State.Synthesizing))
|
||||||
|
|
||||||
|
def toggled(self):
|
||||||
|
if self.play_action.isChecked():
|
||||||
|
self.play_action.setText('Pause')
|
||||||
|
if self.tts.state is QTextToSpeech.State.Paused:
|
||||||
|
self.tts.resume()
|
||||||
|
elif self.tts.state in (QTextToSpeech.State.Ready, QTextToSpeech.State.Error):
|
||||||
|
self.tts.speak_marked_text(self.marked_text)
|
||||||
|
else:
|
||||||
|
if self.tts.state in (QTextToSpeech.State.Speaking, QTextToSpeech.State.Synthesizing):
|
||||||
|
self.tts.pause()
|
||||||
|
self.play_action.setText('Play')
|
||||||
|
|
||||||
|
def saying(self, first, last):
|
||||||
|
c = self.display.textCursor()
|
||||||
|
c.setPosition(first)
|
||||||
|
if last != first:
|
||||||
|
c.setPosition(last, QTextCursor.MoveMode.KeepAnchor)
|
||||||
|
c.movePosition(QTextCursor.MoveOperation.WordRight, QTextCursor.MoveMode.KeepAnchor)
|
||||||
|
self.display.setTextCursor(c)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
@ -2,21 +2,160 @@
|
|||||||
# License: GPLv3 Copyright: 2024, Kovid Goyal <kovid at kovidgoyal.net>
|
# License: GPLv3 Copyright: 2024, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
|
||||||
|
|
||||||
from qt.core import QObject
|
from collections import deque
|
||||||
|
from typing import NamedTuple
|
||||||
|
|
||||||
|
from qt.core import QDialog, QObject, QTextToSpeech, pyqtSignal
|
||||||
|
|
||||||
|
from calibre.gui2 import error_dialog
|
||||||
|
|
||||||
|
|
||||||
|
class Utterance(NamedTuple):
|
||||||
|
text: str
|
||||||
|
index_in_positions: int
|
||||||
|
offset_in_text: int
|
||||||
|
reached_offset: int = 0
|
||||||
|
|
||||||
|
|
||||||
|
class Position(NamedTuple):
|
||||||
|
mark: int
|
||||||
|
offset_in_text: int
|
||||||
|
|
||||||
|
|
||||||
|
class Tracker:
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.clear()
|
||||||
|
|
||||||
|
def clear(self):
|
||||||
|
self.positions: list[Position] = []
|
||||||
|
self.last_pos = 0
|
||||||
|
self.queue: deque[Utterance] = deque()
|
||||||
|
|
||||||
|
def parse_marked_text(self, marked_text, limit = 32 * 1024):
|
||||||
|
self.clear()
|
||||||
|
text = []
|
||||||
|
text_len = chunk_len = index_in_positions = offset_in_text = 0
|
||||||
|
|
||||||
|
def commit():
|
||||||
|
self.queue.append(Utterance(''.join(text), index_in_positions, offset_in_text))
|
||||||
|
|
||||||
|
for x in marked_text:
|
||||||
|
if isinstance(x, int):
|
||||||
|
self.positions.append(Position(x, text_len))
|
||||||
|
else:
|
||||||
|
text_len += len(x)
|
||||||
|
chunk_len += len(x)
|
||||||
|
text.append(x)
|
||||||
|
if chunk_len > limit:
|
||||||
|
commit()
|
||||||
|
chunk_len = 0
|
||||||
|
text = []
|
||||||
|
index_in_positions = max(0, len(self.positions) - 1)
|
||||||
|
offset_in_text = text_len
|
||||||
|
if len(text):
|
||||||
|
commit()
|
||||||
|
self.marked_text = marked_text
|
||||||
|
return self.current_text()
|
||||||
|
|
||||||
|
def pop_first(self):
|
||||||
|
if self.queue:
|
||||||
|
self.queue.popleft()
|
||||||
|
|
||||||
|
def current_text(self):
|
||||||
|
if self.queue:
|
||||||
|
return self.queue[0].text
|
||||||
|
return ''
|
||||||
|
|
||||||
|
def resume(self):
|
||||||
|
self.last_pos = 0
|
||||||
|
if self.queue:
|
||||||
|
self.last_pos = self.queue[0].index_in_positions
|
||||||
|
if self.queue[0].reached_offset:
|
||||||
|
o = self.queue[0].reached_offset
|
||||||
|
# make sure positions remain the same for word tracking
|
||||||
|
self.queue[0].text = (' ' * o) + self.queue[0].text[o:]
|
||||||
|
return self.current_text()
|
||||||
|
|
||||||
|
def boundary_reached(self, start):
|
||||||
|
if self.queue:
|
||||||
|
self.queue[0] = self.queue[0]._replace(reached_offset=start)
|
||||||
|
|
||||||
|
def mark_word_or_sentence(self, start, length):
|
||||||
|
if not self.queue:
|
||||||
|
return
|
||||||
|
start += self.queue[0].offset_in_text
|
||||||
|
end = start + length
|
||||||
|
matches = []
|
||||||
|
while self.last_pos < len(self.positions):
|
||||||
|
pos = self.positions[self.last_pos]
|
||||||
|
if start <= pos.offset_in_text < end:
|
||||||
|
matches.append(pos)
|
||||||
|
elif pos.offset_in_text >= end:
|
||||||
|
break
|
||||||
|
self.last_pos += 1
|
||||||
|
if len(matches):
|
||||||
|
return matches[0].mark, matches[-1].mark
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
class TTSManager(QObject):
|
class TTSManager(QObject):
|
||||||
|
|
||||||
|
state_changed = pyqtSignal(QTextToSpeech.State)
|
||||||
|
saying = pyqtSignal(int, int)
|
||||||
|
|
||||||
def __init__(self, parent=None):
|
def __init__(self, parent=None):
|
||||||
super().__init__(parent)
|
super().__init__(parent)
|
||||||
self._tts = None
|
self._tts = None
|
||||||
|
self.state = QTextToSpeech.State.Ready
|
||||||
|
self.tracker = Tracker()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def tts(self):
|
def tts(self):
|
||||||
if self._tts is None:
|
if self._tts is None:
|
||||||
from calibre.gui2.tts.types import create_tts_backend
|
from calibre.gui2.tts2.types import create_tts_backend
|
||||||
self._tts = create_tts_backend(parent=self)
|
self._tts = create_tts_backend(parent=self)
|
||||||
|
self._tts.state_changed.connect(self._state_changed)
|
||||||
|
self._tts.saying.connect(self._saying)
|
||||||
return self._tts
|
return self._tts
|
||||||
|
|
||||||
|
def stop(self) -> None:
|
||||||
|
self.tracker.clear()
|
||||||
|
self.tts.stop()
|
||||||
|
|
||||||
|
def pause(self) -> None:
|
||||||
|
self.tts.pause()
|
||||||
|
|
||||||
|
def resume(self) -> None:
|
||||||
|
self.tts.resume()
|
||||||
|
|
||||||
|
def speak_simple_text(self, text: str) -> None:
|
||||||
|
self.speak_marked_text([0, text])
|
||||||
|
|
||||||
def speak_marked_text(self, marked_text):
|
def speak_marked_text(self, marked_text):
|
||||||
pass
|
self.stop()
|
||||||
|
self.tts.say(self.tracker.parse_marked_text(marked_text))
|
||||||
|
|
||||||
|
def configure(self) -> None:
|
||||||
|
from calibre.gui2.tts2.config import ConfigDialog
|
||||||
|
self.tts.pause()
|
||||||
|
d = ConfigDialog(parent=self)
|
||||||
|
if d.exec() == QDialog.DialogCode.Accepted:
|
||||||
|
self.stop()
|
||||||
|
self._tts = None
|
||||||
|
if self._tts is None:
|
||||||
|
self.tts.say(self.tracker.resume())
|
||||||
|
else:
|
||||||
|
self.tts.resume()
|
||||||
|
|
||||||
|
def _state_changed(self, state: QTextToSpeech.State) -> None:
|
||||||
|
self.state = state
|
||||||
|
if state is QTextToSpeech.State.Error:
|
||||||
|
error_dialog(self, _('Read aloud failed'), self.tts.error_message(), show=True)
|
||||||
|
self.state_changed.emit(state)
|
||||||
|
|
||||||
|
def _saying(self, offset: int, length: int) -> None:
|
||||||
|
self.tracker.boundary_reached(offset)
|
||||||
|
x = self.tracker.mark_word_or_sentence(offset, length)
|
||||||
|
if x is not None:
|
||||||
|
self.saying.emit(x[0], x[1])
|
||||||
|
@ -1,53 +1,12 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# License: GPLv3 Copyright: 2024, Kovid Goyal <kovid at kovidgoyal.net>
|
# License: GPLv3 Copyright: 2024, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
|
||||||
from typing import NamedTuple
|
|
||||||
|
|
||||||
from qt.core import QMediaDevices, QObject, QTextToSpeech, pyqtSignal
|
from qt.core import QMediaDevices, QObject, QTextToSpeech, pyqtSignal
|
||||||
|
|
||||||
from calibre.gui2.tts2.types import EngineSpecificSettings, Voice, qvoice_to_voice
|
from calibre.gui2.tts2.types import EngineSpecificSettings, Voice, qvoice_to_voice
|
||||||
|
|
||||||
|
|
||||||
class Pos(NamedTuple):
|
|
||||||
mark: int
|
|
||||||
offset_in_text: int
|
|
||||||
|
|
||||||
|
|
||||||
class Tracker:
|
|
||||||
|
|
||||||
def reset(self) -> None:
|
|
||||||
self.positions: list[Pos] = []
|
|
||||||
self.last_pos: int = 0
|
|
||||||
|
|
||||||
def parse_marked_text(self, marked_text: list[str | int]) -> str:
|
|
||||||
self.reset()
|
|
||||||
text: list[str] = []
|
|
||||||
text_len: int = 0
|
|
||||||
for x in marked_text:
|
|
||||||
if isinstance(x, int):
|
|
||||||
self.positions.append(Pos(x, text_len))
|
|
||||||
else:
|
|
||||||
text_len += len(x)
|
|
||||||
text.append(x)
|
|
||||||
return ''.join(text)
|
|
||||||
|
|
||||||
def mark_word(self, start: int, length: int) -> tuple[int, int] | None:
|
|
||||||
end = start + length
|
|
||||||
matches: list[Pos] = []
|
|
||||||
while True:
|
|
||||||
if self.last_pos >= len(self.positions):
|
|
||||||
break
|
|
||||||
pos = self.positions[self.last_pos]
|
|
||||||
if start <= pos.offset_in_text < end:
|
|
||||||
matches.append(pos)
|
|
||||||
elif pos.offset_in_text >= end:
|
|
||||||
break
|
|
||||||
self.last_pos += 1
|
|
||||||
if matches:
|
|
||||||
return matches[0].mark, matches[-1].mark
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
class QtTTSBackend(QObject):
|
class QtTTSBackend(QObject):
|
||||||
|
|
||||||
saying = pyqtSignal(int, int)
|
saying = pyqtSignal(int, int)
|
||||||
@ -55,7 +14,6 @@ class QtTTSBackend(QObject):
|
|||||||
|
|
||||||
def __init__(self, engine_name: str = '', parent: QObject|None = None):
|
def __init__(self, engine_name: str = '', parent: QObject|None = None):
|
||||||
super().__init__(parent)
|
super().__init__(parent)
|
||||||
self.tracker = Tracker()
|
|
||||||
self._voices = None
|
self._voices = None
|
||||||
self._create_engine(engine_name)
|
self._create_engine(engine_name)
|
||||||
|
|
||||||
@ -86,9 +44,6 @@ class QtTTSBackend(QObject):
|
|||||||
def shutdown(self) -> None:
|
def shutdown(self) -> None:
|
||||||
self.tts.stop(QTextToSpeech.BoundaryHint.Immediate)
|
self.tts.stop(QTextToSpeech.BoundaryHint.Immediate)
|
||||||
|
|
||||||
def speak_simple_text(self, text: str) -> None:
|
|
||||||
self.tts.say(text)
|
|
||||||
|
|
||||||
def pause(self) -> None:
|
def pause(self) -> None:
|
||||||
self.tts.pause()
|
self.tts.pause()
|
||||||
|
|
||||||
@ -98,11 +53,8 @@ class QtTTSBackend(QObject):
|
|||||||
def stop(self) -> None:
|
def stop(self) -> None:
|
||||||
self.tts.stop()
|
self.tts.stop()
|
||||||
|
|
||||||
def resume_after_configure(self) -> None:
|
def say(self, text: str) -> None:
|
||||||
raise NotImplementedError('TODO: Implement me')
|
self.tts.say(text)
|
||||||
|
|
||||||
def speak_marked_text(self, marked_text: list[str | int]) -> None:
|
|
||||||
self.tts.say(self.tracker.parse_marked_text(marked_text))
|
|
||||||
|
|
||||||
def error_message(self) -> str:
|
def error_message(self) -> str:
|
||||||
return self.tts.errorString()
|
return self.tts.errorString()
|
||||||
@ -142,6 +94,4 @@ class QtTTSBackend(QObject):
|
|||||||
self._current_settings = settings
|
self._current_settings = settings
|
||||||
|
|
||||||
def _saying_word(self, word: str, utterance_id: int, start: int, length: int) -> None:
|
def _saying_word(self, word: str, utterance_id: int, start: int, length: int) -> None:
|
||||||
x = self.tracker.mark_word(start, length)
|
self.saying.emit(start, length)
|
||||||
if x is not None:
|
|
||||||
self.saying.emit(x[0], x[1])
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user