diff --git a/src/calibre/gui2/tts/windows.py b/src/calibre/gui2/tts/windows.py new file mode 100644 index 0000000000..19e6395871 --- /dev/null +++ b/src/calibre/gui2/tts/windows.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python +# License: GPL v3 Copyright: 2020, Kovid Goyal + +from functools import partial + +from calibre.utils.windows.winspeech import WinSpeech, Error, MarkReached, MediaStateChanged, MediaState + +from .common import Event, EventType + +def split_into_chunks(marked_text, chunk_size): + chunk = [] + tlen = 0 + for x in marked_text: + if isinstance(x, int): + chunk.append(x) + else: + sz = len(x) + if tlen + sz > chunk_size: + mark = None + if chunk and isinstance(chunk[-1], int): + mark = chunk[-1] + del chunk[-1] + yield chunk + chunk = [] if mark is None else [mark] + tlen = sz + chunk.append(x) + else: + chunk.append(x) + tlen += sz + if chunk: + yield chunk + + +class Client: + + mark_template = '' + name = 'winspeech' + min_rate = 0.5 + max_rate = 6.0 + default_system_rate = 1.0 + chunk_size = 128 * 1024 + + @classmethod + def escape_marked_text(cls, text): + return text + + def __init__(self, settings=None, dispatch_on_main_thread=lambda f: f()): + self.backend = WinSpeech(self.dispatch_msg) + self.last_mark = -1 + self.current_callback = None + self.dispatch_on_main_thread = dispatch_on_main_thread + self.synthesizing = False + self.settings = settings or {} + self.apply_settings() + + def __del__(self): + if self.backend is not None: + self.backend.shutdown() + self.backend = None + shutdown = __del__ + + def dispatch_msg(self, msg): + self.dispatch_on_main_thread(partial(self.handle_event, msg)) + + def handle_event(self, x): + if isinstance(x, MarkReached): + self.last_mark = x.id + elif isinstance(x, MediaStateChanged) and self.current_chunks: + if x.state is MediaState.opened: + if self.current_chunk == 0: + self.callback_ignoring_errors(Event(EventType.begin)) + elif x.state is MediaState.ended: + if self.current_chunk >= len(self.chunks) - 1: + self.clear_chunks() + self.callback_ignoring_errors(Event(EventType.end)) + else: + self.current_chunk += 1 + self.backend.speak(self.chunks[self.current_chunk], is_cued=True) + elif x.state is MediaState.failed: + raise x.as_exception() + elif isinstance(x, Error): + raise x.as_exception(check_for_no_audio_devices=True) + else: + raise KeyError(f'Unknown event type: {x}') + + def speak_simple_text(self, text): + self.current_callback = None + self.clear_chunks() + self.backend.speak(text) + + def speak_marked_text(self, text, callback): + self.backend.pause() + self.clear_chunks() + self.current_callback = callback + self.chunks = tuple(split_into_chunks(text, self.chunk_size)) + self.current_chunk = 0 + if self.chunks: + self.backend.speak(self.chunks[self.current_chunk], is_cued=True) + self.synthesizing = True + + def callback_ignoring_errors(self, ev): + if self.current_callback is not None: + try: + self.current_callback(ev) + except Exception: + import traceback + traceback.print_exc() + + def clear_chunks(self): + self.synthesizing = False + self.current_chunk = 0 + self.current_chunks = [] + self.last_mark = -1 + + def stop(self): + self.backend.pause() + self.clear_chunks() + if self.current_callback is not None: + self.current_callback(Event(EventType.cancel)) + + def pause(self): + self.backend.pause() + self.synthesizing = False + if self.current_callback is not None: + self.current_callback(Event(EventType.pause)) + + def resume(self): + self.backend.play() + self.synthesizing = True + if self.current_callback is not None: + self.current_callback(Event(EventType.resume)) + + def apply_settings(self, new_settings=None): + pass + + def config_widget(self, backend_settings, parent): + from calibre.gui2.tts.windows_config import Widget + return Widget(self, backend_settings, parent) + + def change_rate(self, steps=1): + rate = current_rate = self.settings.get('rate', self.default_system_rate) + if rate < 1: + step_size = 0.1 + else: + step_size = 0.5 + rate += steps * step_size + rate = max(self.min_rate, min(rate, self.max_rate)) + if rate != current_rate: + self.settings['rate'] = rate + was_synthesizing = self.synthesizing + self.pause() + self.apply_settings() + if was_synthesizing: + self.synthesizing = True + self.resume_after_configure() + return self.settings diff --git a/src/calibre/utils/windows/winspeech.py b/src/calibre/utils/windows/winspeech.py index ec43e02616..0a3bf52802 100644 --- a/src/calibre/utils/windows/winspeech.py +++ b/src/calibre/utils/windows/winspeech.py @@ -6,13 +6,15 @@ import json import os import struct import sys -from contextlib import closing +from contextlib import closing, suppress from enum import Enum, auto from itertools import count -from queue import Queue +from queue import Empty, Queue from threading import Thread +from time import monotonic from typing import NamedTuple, Tuple +from calibre.constants import DEBUG from calibre.utils.ipc.simple_worker import start_pipe_worker from calibre.utils.shm import SharedMemory @@ -91,14 +93,36 @@ class MarkReached(NamedTuple): id: int +class SpeechError(OSError): + + def __init__(self, err, msg=''): + val = 'There was an error in the Windows Speech subsystem. ' + if msg: + val += f'{msg}. ' + val += err.msg + ': ' + err.error + f'\nFile: {err.file} Line: {err.line}' + if err.hr: + val += f' HRESULT: 0x{err.hr:x}' + super().__init__(val) + + +class NoAudioDevices(Exception): + def __init__(self): + super().__init__(_('No active audio output devices found. Connect headphones or speakers.')) + + class Error(NamedTuple): msg: str error: str = '' line: int = 0 file: str = 'winspeech.py' - hr: str = '' + hr: str = 0 related_to: int = 0 + def as_exception(self, msg='', check_for_no_audio_devices=False): + if check_for_no_audio_devices and self.hr == 0x8004503a: + raise NoAudioDevices(_('No active audio output devices found. Connect headphones or speakers.')) + raise SpeechError(self, msg) + class Synthesizing(NamedTuple): related_to: int @@ -145,7 +169,11 @@ class MediaStateChanged(NamedTuple): state: MediaState error: str = "" code: MediaPlayerError = MediaPlayerError.unknown - hr: str = "" + hr: int = 0 + + def as_exception(self): + err = Error("Playback of speech stream failed", self.error + f' ({self.code})', hr=self.hr) + return err.as_exception(check_for_no_audio_devices=True) class Echo(NamedTuple): @@ -237,9 +265,13 @@ def parse_message(line): if msg_type == 'media_state_changed': ans['state'] = getattr(MediaState, ans['state']) if 'code' in ans: - ans['code'] = MediaPlayerError(ans['code']) + ans['code'] = getattr(MediaPlayerError, ans['code']) + if 'hr' in ans: + ans['hr'] = int(ans['hr'], 16) return MediaStateChanged(**ans) if msg_type == 'error': + if 'hr' in ans: + ans['hr'] = int(ans['hr'], 16) return Error(**ans) if msg_type == 'synthesizing': return Synthesizing(**ans) @@ -286,11 +318,15 @@ def parse_message(line): class WinSpeech: - def __init__(self): + def __init__(self, event_dispatcher=print): self._worker = None self.queue = Queue() self.msg_id_counter = count() next(self.msg_id_counter) + self.pending_messages = [] + self.current_speak_cmd_id = 0 + self.waiting_for = -1 + self.event_dispatcher = event_dispatcher @property def worker(self): @@ -299,15 +335,84 @@ class WinSpeech: Thread(name='WinspeechQueue', target=self._get_messages, args=(self._worker, self.queue), daemon=True).start() return self._worker + def __del__(self): + if self._worker is not None: + self.send_command('exit') + with suppress(Exception): + self._worker.wait(0.3) + if self._worker.poll() is None: + self._worker.kill() + self._worker = None + shutdown = __del__ + def _get_messages(self, worker, queue): + def send_msg(msg): + if self.waiting_for == msg.related_to: + self.queue.put(msg) + else: + self.dispatch_message(msg) try: for line in worker.stdout: - queue.put(line.decode('utf-8', 'replace')) + line = line.strip() + if DEBUG: + with suppress(Exception): + print('winspeech:', line.decode('utf-8', 'replace'), flush=True) + send_msg(parse_message(line)) except OSError as e: - line = '0 error ' + json.dumps({"msg": "Failed to read from worker", "error": str(e), "file": "winspeech.py", "line": 0}) - queue.put(line) + send_msg(Error('Failed to read from worker', str(e))) + except Exception as e: + send_msg(Error('Failed to parse message from worker', str(e))) + + def send_command(self, cmd): + cmd_id = next(self.msg_id_counter) + w = self.worker + w.stdin.write(f'{cmd_id} {cmd}\n'.encode('utf-8')) + w.stdin.flush() + return cmd_id + + def wait_for(self, error_msg, *classes, related_to=-1, timeout=4): + orig, self.waiting_for = self.waiting_for, related_to + try: + limit = monotonic() + timeout + while True: + left = limit - monotonic() + if left <= 0: + break + try: + x = self.queue.get(True, left) + except Empty: + break + if (not classes or isinstance(x, *classes)) and (not related_to or x.related_to == related_to): + return x + if isinstance(x, Error) and (not related_to or x.related_to == related_to): + raise x.as_exception(error_msg) + raise TimeoutError('Timed out waiting for: ' + error_msg) + finally: + self.waiting_for = orig + + def speak(self, text, is_cued=False, is_xml=False): + with SharedMemory(size=max_buffer_size(text)) as shm: + st = 'cued' if is_cued else ('ssml' if is_xml else 'text') + sz = encode_to_file_object(text, shm) + self.current_speak_cmd_id = self.send_command(f'speak {st} shm {sz} {shm.name}') + x = self.wait_for('speech synthesis to start', MediaStateChanged, related_to=self.current_speak_cmd_id, timeout=8) + if x.state is MediaState.failed: + raise x.as_exception() + return self.current_speak_cmd_id + + def dispatch_message(self, x): + if x.related_to == self.current_speak_cmd_id: + if isinstance(x, (Error, MediaStateChanged, MarkReached)): + self.event_dispatcher(x) + + def pause(self): + self.wait_for('pause', Pause, related_to=self.send_command('pause')) + + def play(self): + self.wait_for('play', Play, related_to=self.send_command('play')) +# develop {{{ def develop_loop(*commands): p = start_worker() q = Queue() @@ -400,3 +505,4 @@ def develop_interactive(): finally: if p.poll() is None: p.kill() +# }}}