From 913892d4f86388e748497d98adec00aab1bdd76c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 13 Dec 2020 09:24:58 +0530 Subject: [PATCH] Windows: Fix Read aloud not working with books that have a single large internal text file, such as MOBI or DOCX books Apparently, there is an undocumented limit to how much text can be passed to SAPI in a single Speak() call. So maintain our own internal queue and pass 128KB chunks. --- src/calibre/gui2/tts/common.py | 19 ++++ src/calibre/gui2/tts/linux.py | 6 +- src/calibre/gui2/tts/macos.py | 6 +- src/calibre/gui2/tts/windows.py | 175 +++++++++++++++++++++++--------- src/calibre/gui2/viewer/tts.py | 14 +-- 5 files changed, 154 insertions(+), 66 deletions(-) diff --git a/src/calibre/gui2/tts/common.py b/src/calibre/gui2/tts/common.py index b149ad9f6b..7e5b165474 100644 --- a/src/calibre/gui2/tts/common.py +++ b/src/calibre/gui2/tts/common.py @@ -22,3 +22,22 @@ class Event: def __repr__(self): return f'Event(type={self.type}, data={self.data})' + + +def add_markup(text_parts, mark_template, escape_marked_text, chunk_size=0): + buf = [] + size = 0 + for x in text_parts: + if isinstance(x, int): + item = mark_template.format(x) + else: + item = escape_marked_text(x) + sz = len(item) + if chunk_size and size + sz > chunk_size: + yield ''.join(buf).strip() + size = 0 + buf = [] + size += sz + buf.append(item) + if size: + yield ''.join(buf).strip() diff --git a/src/calibre/gui2/tts/linux.py b/src/calibre/gui2/tts/linux.py index e34daaf6f4..2e09affa92 100644 --- a/src/calibre/gui2/tts/linux.py +++ b/src/calibre/gui2/tts/linux.py @@ -6,7 +6,7 @@ from functools import partial from calibre import prepare_string_for_xml -from .common import Event, EventType +from .common import Event, EventType, add_markup from .errors import TTSSystemUnavailable @@ -21,6 +21,7 @@ class Client: name = 'speechd' min_rate = -100 max_rate = 100 + chunk_size = 0 @classmethod def escape_marked_text(cls, text): @@ -127,8 +128,9 @@ class Client: self.next_cancel_is_for_pause = False return event - def speak_marked_text(self, text, callback=lambda ev: None): + def speak_marked_text(self, marked_text, callback=lambda ev: None): self.stop() + text = ''.join(add_markup(marked_text, self.mark_template, self.escape_marked_text, self.chunk_size)) self.current_marked_text = text self.last_mark = None diff --git a/src/calibre/gui2/tts/macos.py b/src/calibre/gui2/tts/macos.py index eb9c2fa495..6e16eb6eef 100644 --- a/src/calibre/gui2/tts/macos.py +++ b/src/calibre/gui2/tts/macos.py @@ -2,7 +2,7 @@ # vim:fileencoding=utf-8 # License: GPL v3 Copyright: 2020, Kovid Goyal -from .common import Event, EventType +from .common import Event, EventType, add_markup class Client: @@ -12,6 +12,7 @@ class Client: name = 'nsss' min_rate = 10 max_rate = 340 + chunk_size = 0 @classmethod def escape_marked_text(cls, text): @@ -70,7 +71,8 @@ class Client: self.nsss.speak(self.escape_marked_text(text)) self.status = {'synthesizing': True, 'paused': False} - def speak_marked_text(self, text, callback): + def speak_marked_text(self, marked_text, callback): + text = ''.join(add_markup(marked_text, self.mark_template, self.escape_marked_text, self.chunk_size)) self.current_callback = callback self.current_marked_text = text self.last_mark = None diff --git a/src/calibre/gui2/tts/windows.py b/src/calibre/gui2/tts/windows.py index d10f0f7d20..c69b48093a 100644 --- a/src/calibre/gui2/tts/windows.py +++ b/src/calibre/gui2/tts/windows.py @@ -5,10 +5,70 @@ from time import monotonic from threading import Thread +from typing import NamedTuple from calibre import prepare_string_for_xml -from .common import Event, EventType +from .common import Event, EventType, add_markup + + +class QueueEntry(NamedTuple): + stream_number: int + text: str + + +class SpeechQueue: + + def __init__(self): + self.clear() + + def __len__(self): + return len(self.items) + + def clear(self, keep_mark=False): + self.items = [] + self.pos = -1 + if not keep_mark: + self.last_mark = None + + def add(self, stream_number, text): + self.items.append(QueueEntry(stream_number, text)) + + def start(self, stream_number): + self.pos = -1 + for i, x in enumerate(self.items): + if x.stream_number == stream_number: + self.pos = i + break + + @property + def is_at_start(self): + return self.pos == 0 + + @property + def is_at_end(self): + return self.pos >= len(self.items) - 1 + + @property + def current_stream_number(self): + if -1 < self.pos < len(self.items): + return self.items[self.pos].stream_number + + def resume_from_last_mark(self, mark_template): + if self.pos < 0 or self.pos >= len(self.items): + return + item = self.items[self.pos] + if self.last_mark is None: + idx = -1 + else: + idx = item.text.find(mark_template.format(self.last_mark)) + if idx == -1: + text = item.text + else: + text = item.text[idx:] + yield text + for i in range(self.pos + 1, len(self.items)): + yield self.items[i].text class Client: @@ -17,6 +77,7 @@ class Client: name = 'sapi' min_rate = -10 max_rate = 10 + chunk_size = 128 * 1024 @classmethod def escape_marked_text(cls, text): @@ -29,14 +90,23 @@ class Client: self.default_system_rate = self.sp_voice.get_current_rate() self.default_system_voice = self.sp_voice.get_current_voice() self.default_system_sound_output = self.sp_voice.get_current_sound_output() - self.current_stream_number = None + self.current_stream_queue = SpeechQueue() self.current_callback = None self.dispatch_on_main_thread = dispatch_on_main_thread - self.current_marked_text = self.last_mark = None - self.status = {'synthesizing': False, 'paused': False} + self.synthesizing = False + self.pause_count = 0 self.settings = settings or {} self.apply_settings() + @property + def status(self): + return {'synthesizing': self.synthesizing, 'paused': self.pause_count > 0} + + def clear_pauses(self): + while self.pause_count: + self.sp_voice.resume() + self.pause_count -= 1 + def create_voice(self): from calibre.utils.windows.winsapi import ISpVoice self.sp_voice = ISpVoice() @@ -51,10 +121,10 @@ class Client: shutdown = __del__ def apply_settings(self, new_settings=None): - if self.status['paused']: - self.sp_voice.resume() + if self.pause_count: + self.clear_pauses() self.ignore_next_stop_event = monotonic() - self.status = {'synthesizing': False, 'paused': False} + self.synthesizing = False if new_settings is not None: self.settings = new_settings self.sp_voice.set_current_rate(self.settings.get('rate', self.default_system_rate)) @@ -72,94 +142,101 @@ class Client: SPEI_END_INPUT_STREAM, SPEI_START_INPUT_STREAM, SPEI_TTS_BOOKMARK ) c = self.current_callback + for (stream_number, event_type, event_data) in self.sp_voice.get_events(): if event_type == SPEI_TTS_BOOKMARK: - self.last_mark = event_data + self.current_stream_queue.last_mark = event_data event = Event(EventType.mark, event_data) elif event_type == SPEI_START_INPUT_STREAM: + self.current_stream_queue.start(stream_number) if self.ignore_next_start_event: self.ignore_next_start_event = False continue + self.synthesizing = True + if not self.current_stream_queue.is_at_start: + continue event = Event(EventType.begin) - self.status = {'synthesizing': True, 'paused': False} elif event_type == SPEI_END_INPUT_STREAM: if self.ignore_next_stop_event is not None and monotonic() - self.ignore_next_stop_event < 2: self.ignore_next_stop_event = None continue + self.synthesizing = False + if not self.current_stream_queue.is_at_end: + continue event = Event(EventType.end) - self.status = {'synthesizing': False, 'paused': False} else: continue - if c is not None and stream_number == self.current_stream_number: + if c is not None and stream_number == self.current_stream_queue.current_stream_number: try: c(event) except Exception: import traceback traceback.print_exc() - def speak(self, text, is_xml=False, want_events=True): + def speak(self, text, is_xml=False, want_events=True, purge=True): from calibre_extensions.winsapi import ( SPF_ASYNC, SPF_IS_NOT_XML, SPF_PURGEBEFORESPEAK, SPF_IS_XML ) flags = SPF_IS_XML if is_xml else SPF_IS_NOT_XML - self.current_stream_number = self.sp_voice.speak(text, flags | SPF_PURGEBEFORESPEAK | SPF_ASYNC, want_events) - return self.current_stream_number + if purge: + flags |= SPF_PURGEBEFORESPEAK + return self.sp_voice.speak(text, flags | SPF_ASYNC, want_events) + + def purge(self): + from calibre_extensions.winsapi import SPF_PURGEBEFORESPEAK + self.sp_voice.speak('', SPF_PURGEBEFORESPEAK, False) + self.synthesizing = False def speak_simple_text(self, text): self.current_callback = None - self.current_marked_text = self.last_mark = None - self.speak(text) + self.current_stream_queue.clear() + number = self.speak(text) + self.clear_pauses() + self.current_stream_queue.add(number, text) def speak_marked_text(self, text, callback): - self.current_marked_text = text - self.last_mark = None - if self.status['synthesizing']: + self.clear_pauses() + self.current_stream_queue.clear() + if self.synthesizing: self.ignore_next_stop_event = monotonic() self.current_callback = callback - self.speak(text, is_xml=True) + for i, chunk in enumerate(add_markup(text, self.mark_template, self.escape_marked_text, self.chunk_size)): + number = self.speak(chunk, is_xml=True, purge=i == 0) + self.current_stream_queue.add(number, chunk) def stop(self): - from calibre_extensions.winsapi import SPF_PURGEBEFORESPEAK - if self.status['paused']: - self.sp_voice.resume() - self.sp_voice.speak('', SPF_PURGEBEFORESPEAK, False) - self.status = {'synthesizing': False, 'paused': False} + self.clear_pauses() + self.purge() if self.current_callback is not None: self.current_callback(Event(EventType.cancel)) self.current_callback = None def pause(self): - if self.status['synthesizing'] and not self.status['paused']: - self.sp_voice.pause() - self.status = {'synthesizing': True, 'paused': True} - if self.current_callback is not None: - self.current_callback(Event(EventType.pause)) + self.sp_voice.pause() + self.pause_count += 1 + if self.current_callback is not None: + self.current_callback(Event(EventType.pause)) def resume(self): - if self.status['paused']: - self.sp_voice.resume() - self.status = {'synthesizing': True, 'paused': False} + if self.pause_count: + self.clear_pauses() if self.current_callback is not None: self.current_callback(Event(EventType.resume)) def resume_after_configure(self): - if self.status['paused']: - self.resume() + if self.pause_count: + self.clear_pauses() return - if self.last_mark is None: - idx = -1 - else: - mark = self.mark_template.format(self.last_mark) - idx = self.current_marked_text.find(mark) - if idx == -1: - text = self.current_marked_text - else: - text = self.current_marked_text[idx:] + chunks = tuple(self.current_stream_queue.resume_from_last_mark(self.mark_template)) self.ignore_next_start_event = True + self.current_stream_queue.clear(keep_mark=True) + self.purge() + for chunk in chunks: + number = self.speak(chunk, is_xml=True, purge=False) + self.current_stream_queue.add(number, chunk) if self.current_callback is not None: self.current_callback(Event(EventType.resume)) - self.speak(text, is_xml=True) - self.status = {'synthesizing': True, 'paused': False} + self.synthesizing = bool(chunks) def get_voice_data(self): ans = getattr(self, 'voice_data', None) @@ -184,10 +261,10 @@ class Client: rate = max(self.min_rate, min(rate, self.max_rate)) if rate != current_rate: self.settings['rate'] = rate - prev_state = self.status.copy() + was_synthesizing = self.synthesizing self.pause() self.apply_settings() - if prev_state['synthesizing']: - self.status = {'synthesizing': True, 'paused': False} + if was_synthesizing: + self.synthesizing = True self.resume_after_configure() return self.settings diff --git a/src/calibre/gui2/viewer/tts.py b/src/calibre/gui2/viewer/tts.py index eefd39ff42..4f86528802 100644 --- a/src/calibre/gui2/viewer/tts.py +++ b/src/calibre/gui2/viewer/tts.py @@ -38,17 +38,6 @@ class Config(Dialog): return super().accept() -def add_markup(text_parts, mark_template): - from calibre.gui2.tts.implementation import Client - buf = [] - for x in text_parts: - if isinstance(x, int): - buf.append(mark_template.format(x)) - else: - buf.append(Client.escape_marked_text(x)) - return ''.join(buf) - - class TTS(QObject): dispatch_on_main_thread_signal = pyqtSignal(object) @@ -98,8 +87,7 @@ class TTS(QObject): return error_dialog(self.parent(), _('Text-to-Speech unavailable'), str(err), show=True) def play(self, data): - marked_text = add_markup(data['marked_text'], self.tts_client_class.mark_template) - self.tts_client.speak_marked_text(marked_text.strip(), self.callback) + self.tts_client.speak_marked_text(data['marked_text'], self.callback) def pause(self, data): self.tts_client.pause()