Work on integrating winspeech

This commit is contained in:
Kovid Goyal 2023-02-01 20:29:46 +05:30
parent 438cf020e3
commit 78d890c925
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 271 additions and 9 deletions

View File

@ -0,0 +1,156 @@
#!/usr/bin/env python
# License: GPL v3 Copyright: 2020, Kovid Goyal <kovid at kovidgoyal.net>
from functools import partial
from calibre.utils.windows.winspeech import WinSpeech, Error, MarkReached, MediaStateChanged, MediaState
from .common import Event, EventType
def split_into_chunks(marked_text, chunk_size):
chunk = []
tlen = 0
for x in marked_text:
if isinstance(x, int):
chunk.append(x)
else:
sz = len(x)
if tlen + sz > chunk_size:
mark = None
if chunk and isinstance(chunk[-1], int):
mark = chunk[-1]
del chunk[-1]
yield chunk
chunk = [] if mark is None else [mark]
tlen = sz
chunk.append(x)
else:
chunk.append(x)
tlen += sz
if chunk:
yield chunk
class Client:
mark_template = ''
name = 'winspeech'
min_rate = 0.5
max_rate = 6.0
default_system_rate = 1.0
chunk_size = 128 * 1024
@classmethod
def escape_marked_text(cls, text):
return text
def __init__(self, settings=None, dispatch_on_main_thread=lambda f: f()):
self.backend = WinSpeech(self.dispatch_msg)
self.last_mark = -1
self.current_callback = None
self.dispatch_on_main_thread = dispatch_on_main_thread
self.synthesizing = False
self.settings = settings or {}
self.apply_settings()
def __del__(self):
if self.backend is not None:
self.backend.shutdown()
self.backend = None
shutdown = __del__
def dispatch_msg(self, msg):
self.dispatch_on_main_thread(partial(self.handle_event, msg))
def handle_event(self, x):
if isinstance(x, MarkReached):
self.last_mark = x.id
elif isinstance(x, MediaStateChanged) and self.current_chunks:
if x.state is MediaState.opened:
if self.current_chunk == 0:
self.callback_ignoring_errors(Event(EventType.begin))
elif x.state is MediaState.ended:
if self.current_chunk >= len(self.chunks) - 1:
self.clear_chunks()
self.callback_ignoring_errors(Event(EventType.end))
else:
self.current_chunk += 1
self.backend.speak(self.chunks[self.current_chunk], is_cued=True)
elif x.state is MediaState.failed:
raise x.as_exception()
elif isinstance(x, Error):
raise x.as_exception(check_for_no_audio_devices=True)
else:
raise KeyError(f'Unknown event type: {x}')
def speak_simple_text(self, text):
self.current_callback = None
self.clear_chunks()
self.backend.speak(text)
def speak_marked_text(self, text, callback):
self.backend.pause()
self.clear_chunks()
self.current_callback = callback
self.chunks = tuple(split_into_chunks(text, self.chunk_size))
self.current_chunk = 0
if self.chunks:
self.backend.speak(self.chunks[self.current_chunk], is_cued=True)
self.synthesizing = True
def callback_ignoring_errors(self, ev):
if self.current_callback is not None:
try:
self.current_callback(ev)
except Exception:
import traceback
traceback.print_exc()
def clear_chunks(self):
self.synthesizing = False
self.current_chunk = 0
self.current_chunks = []
self.last_mark = -1
def stop(self):
self.backend.pause()
self.clear_chunks()
if self.current_callback is not None:
self.current_callback(Event(EventType.cancel))
def pause(self):
self.backend.pause()
self.synthesizing = False
if self.current_callback is not None:
self.current_callback(Event(EventType.pause))
def resume(self):
self.backend.play()
self.synthesizing = True
if self.current_callback is not None:
self.current_callback(Event(EventType.resume))
def apply_settings(self, new_settings=None):
pass
def config_widget(self, backend_settings, parent):
from calibre.gui2.tts.windows_config import Widget
return Widget(self, backend_settings, parent)
def change_rate(self, steps=1):
rate = current_rate = self.settings.get('rate', self.default_system_rate)
if rate < 1:
step_size = 0.1
else:
step_size = 0.5
rate += steps * step_size
rate = max(self.min_rate, min(rate, self.max_rate))
if rate != current_rate:
self.settings['rate'] = rate
was_synthesizing = self.synthesizing
self.pause()
self.apply_settings()
if was_synthesizing:
self.synthesizing = True
self.resume_after_configure()
return self.settings

View File

@ -6,13 +6,15 @@ import json
import os
import struct
import sys
from contextlib import closing
from contextlib import closing, suppress
from enum import Enum, auto
from itertools import count
from queue import Queue
from queue import Empty, Queue
from threading import Thread
from time import monotonic
from typing import NamedTuple, Tuple
from calibre.constants import DEBUG
from calibre.utils.ipc.simple_worker import start_pipe_worker
from calibre.utils.shm import SharedMemory
@ -91,14 +93,36 @@ class MarkReached(NamedTuple):
id: int
class SpeechError(OSError):
def __init__(self, err, msg=''):
val = 'There was an error in the Windows Speech subsystem. '
if msg:
val += f'{msg}. '
val += err.msg + ': ' + err.error + f'\nFile: {err.file} Line: {err.line}'
if err.hr:
val += f' HRESULT: 0x{err.hr:x}'
super().__init__(val)
class NoAudioDevices(Exception):
def __init__(self):
super().__init__(_('No active audio output devices found. Connect headphones or speakers.'))
class Error(NamedTuple):
msg: str
error: str = ''
line: int = 0
file: str = 'winspeech.py'
hr: str = ''
hr: str = 0
related_to: int = 0
def as_exception(self, msg='', check_for_no_audio_devices=False):
if check_for_no_audio_devices and self.hr == 0x8004503a:
raise NoAudioDevices(_('No active audio output devices found. Connect headphones or speakers.'))
raise SpeechError(self, msg)
class Synthesizing(NamedTuple):
related_to: int
@ -145,7 +169,11 @@ class MediaStateChanged(NamedTuple):
state: MediaState
error: str = ""
code: MediaPlayerError = MediaPlayerError.unknown
hr: str = ""
hr: int = 0
def as_exception(self):
err = Error("Playback of speech stream failed", self.error + f' ({self.code})', hr=self.hr)
return err.as_exception(check_for_no_audio_devices=True)
class Echo(NamedTuple):
@ -237,9 +265,13 @@ def parse_message(line):
if msg_type == 'media_state_changed':
ans['state'] = getattr(MediaState, ans['state'])
if 'code' in ans:
ans['code'] = MediaPlayerError(ans['code'])
ans['code'] = getattr(MediaPlayerError, ans['code'])
if 'hr' in ans:
ans['hr'] = int(ans['hr'], 16)
return MediaStateChanged(**ans)
if msg_type == 'error':
if 'hr' in ans:
ans['hr'] = int(ans['hr'], 16)
return Error(**ans)
if msg_type == 'synthesizing':
return Synthesizing(**ans)
@ -286,11 +318,15 @@ def parse_message(line):
class WinSpeech:
def __init__(self):
def __init__(self, event_dispatcher=print):
self._worker = None
self.queue = Queue()
self.msg_id_counter = count()
next(self.msg_id_counter)
self.pending_messages = []
self.current_speak_cmd_id = 0
self.waiting_for = -1
self.event_dispatcher = event_dispatcher
@property
def worker(self):
@ -299,15 +335,84 @@ class WinSpeech:
Thread(name='WinspeechQueue', target=self._get_messages, args=(self._worker, self.queue), daemon=True).start()
return self._worker
def __del__(self):
if self._worker is not None:
self.send_command('exit')
with suppress(Exception):
self._worker.wait(0.3)
if self._worker.poll() is None:
self._worker.kill()
self._worker = None
shutdown = __del__
def _get_messages(self, worker, queue):
def send_msg(msg):
if self.waiting_for == msg.related_to:
self.queue.put(msg)
else:
self.dispatch_message(msg)
try:
for line in worker.stdout:
queue.put(line.decode('utf-8', 'replace'))
line = line.strip()
if DEBUG:
with suppress(Exception):
print('winspeech:', line.decode('utf-8', 'replace'), flush=True)
send_msg(parse_message(line))
except OSError as e:
line = '0 error ' + json.dumps({"msg": "Failed to read from worker", "error": str(e), "file": "winspeech.py", "line": 0})
queue.put(line)
send_msg(Error('Failed to read from worker', str(e)))
except Exception as e:
send_msg(Error('Failed to parse message from worker', str(e)))
def send_command(self, cmd):
cmd_id = next(self.msg_id_counter)
w = self.worker
w.stdin.write(f'{cmd_id} {cmd}\n'.encode('utf-8'))
w.stdin.flush()
return cmd_id
def wait_for(self, error_msg, *classes, related_to=-1, timeout=4):
orig, self.waiting_for = self.waiting_for, related_to
try:
limit = monotonic() + timeout
while True:
left = limit - monotonic()
if left <= 0:
break
try:
x = self.queue.get(True, left)
except Empty:
break
if (not classes or isinstance(x, *classes)) and (not related_to or x.related_to == related_to):
return x
if isinstance(x, Error) and (not related_to or x.related_to == related_to):
raise x.as_exception(error_msg)
raise TimeoutError('Timed out waiting for: ' + error_msg)
finally:
self.waiting_for = orig
def speak(self, text, is_cued=False, is_xml=False):
with SharedMemory(size=max_buffer_size(text)) as shm:
st = 'cued' if is_cued else ('ssml' if is_xml else 'text')
sz = encode_to_file_object(text, shm)
self.current_speak_cmd_id = self.send_command(f'speak {st} shm {sz} {shm.name}')
x = self.wait_for('speech synthesis to start', MediaStateChanged, related_to=self.current_speak_cmd_id, timeout=8)
if x.state is MediaState.failed:
raise x.as_exception()
return self.current_speak_cmd_id
def dispatch_message(self, x):
if x.related_to == self.current_speak_cmd_id:
if isinstance(x, (Error, MediaStateChanged, MarkReached)):
self.event_dispatcher(x)
def pause(self):
self.wait_for('pause', Pause, related_to=self.send_command('pause'))
def play(self):
self.wait_for('play', Play, related_to=self.send_command('play'))
# develop {{{
def develop_loop(*commands):
p = start_worker()
q = Queue()
@ -400,3 +505,4 @@ def develop_interactive():
finally:
if p.poll() is None:
p.kill()
# }}}