Windows: Fix Read aloud not working with books that have a single large internal text file, such as MOBI or DOCX books

Apparently, there is an undocumented limit to how much text can be
passed to SAPI in a single Speak() call.
So maintain our own internal queue and pass 128KB chunks.
This commit is contained in:
Kovid Goyal 2020-12-13 09:24:58 +05:30
parent c7c627f285
commit 913892d4f8
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
5 changed files with 154 additions and 66 deletions

View File

@ -22,3 +22,22 @@ class Event:
def __repr__(self): def __repr__(self):
return f'Event(type={self.type}, data={self.data})' return f'Event(type={self.type}, data={self.data})'
def add_markup(text_parts, mark_template, escape_marked_text, chunk_size=0):
buf = []
size = 0
for x in text_parts:
if isinstance(x, int):
item = mark_template.format(x)
else:
item = escape_marked_text(x)
sz = len(item)
if chunk_size and size + sz > chunk_size:
yield ''.join(buf).strip()
size = 0
buf = []
size += sz
buf.append(item)
if size:
yield ''.join(buf).strip()

View File

@ -6,7 +6,7 @@ from functools import partial
from calibre import prepare_string_for_xml from calibre import prepare_string_for_xml
from .common import Event, EventType from .common import Event, EventType, add_markup
from .errors import TTSSystemUnavailable from .errors import TTSSystemUnavailable
@ -21,6 +21,7 @@ class Client:
name = 'speechd' name = 'speechd'
min_rate = -100 min_rate = -100
max_rate = 100 max_rate = 100
chunk_size = 0
@classmethod @classmethod
def escape_marked_text(cls, text): def escape_marked_text(cls, text):
@ -127,8 +128,9 @@ class Client:
self.next_cancel_is_for_pause = False self.next_cancel_is_for_pause = False
return event return event
def speak_marked_text(self, text, callback=lambda ev: None): def speak_marked_text(self, marked_text, callback=lambda ev: None):
self.stop() self.stop()
text = ''.join(add_markup(marked_text, self.mark_template, self.escape_marked_text, self.chunk_size))
self.current_marked_text = text self.current_marked_text = text
self.last_mark = None self.last_mark = None

View File

@ -2,7 +2,7 @@
# vim:fileencoding=utf-8 # vim:fileencoding=utf-8
# License: GPL v3 Copyright: 2020, Kovid Goyal <kovid at kovidgoyal.net> # License: GPL v3 Copyright: 2020, Kovid Goyal <kovid at kovidgoyal.net>
from .common import Event, EventType from .common import Event, EventType, add_markup
class Client: class Client:
@ -12,6 +12,7 @@ class Client:
name = 'nsss' name = 'nsss'
min_rate = 10 min_rate = 10
max_rate = 340 max_rate = 340
chunk_size = 0
@classmethod @classmethod
def escape_marked_text(cls, text): def escape_marked_text(cls, text):
@ -70,7 +71,8 @@ class Client:
self.nsss.speak(self.escape_marked_text(text)) self.nsss.speak(self.escape_marked_text(text))
self.status = {'synthesizing': True, 'paused': False} self.status = {'synthesizing': True, 'paused': False}
def speak_marked_text(self, text, callback): def speak_marked_text(self, marked_text, callback):
text = ''.join(add_markup(marked_text, self.mark_template, self.escape_marked_text, self.chunk_size))
self.current_callback = callback self.current_callback = callback
self.current_marked_text = text self.current_marked_text = text
self.last_mark = None self.last_mark = None

View File

@ -5,10 +5,70 @@
from time import monotonic from time import monotonic
from threading import Thread from threading import Thread
from typing import NamedTuple
from calibre import prepare_string_for_xml from calibre import prepare_string_for_xml
from .common import Event, EventType from .common import Event, EventType, add_markup
class QueueEntry(NamedTuple):
stream_number: int
text: str
class SpeechQueue:
def __init__(self):
self.clear()
def __len__(self):
return len(self.items)
def clear(self, keep_mark=False):
self.items = []
self.pos = -1
if not keep_mark:
self.last_mark = None
def add(self, stream_number, text):
self.items.append(QueueEntry(stream_number, text))
def start(self, stream_number):
self.pos = -1
for i, x in enumerate(self.items):
if x.stream_number == stream_number:
self.pos = i
break
@property
def is_at_start(self):
return self.pos == 0
@property
def is_at_end(self):
return self.pos >= len(self.items) - 1
@property
def current_stream_number(self):
if -1 < self.pos < len(self.items):
return self.items[self.pos].stream_number
def resume_from_last_mark(self, mark_template):
if self.pos < 0 or self.pos >= len(self.items):
return
item = self.items[self.pos]
if self.last_mark is None:
idx = -1
else:
idx = item.text.find(mark_template.format(self.last_mark))
if idx == -1:
text = item.text
else:
text = item.text[idx:]
yield text
for i in range(self.pos + 1, len(self.items)):
yield self.items[i].text
class Client: class Client:
@ -17,6 +77,7 @@ class Client:
name = 'sapi' name = 'sapi'
min_rate = -10 min_rate = -10
max_rate = 10 max_rate = 10
chunk_size = 128 * 1024
@classmethod @classmethod
def escape_marked_text(cls, text): def escape_marked_text(cls, text):
@ -29,14 +90,23 @@ class Client:
self.default_system_rate = self.sp_voice.get_current_rate() self.default_system_rate = self.sp_voice.get_current_rate()
self.default_system_voice = self.sp_voice.get_current_voice() self.default_system_voice = self.sp_voice.get_current_voice()
self.default_system_sound_output = self.sp_voice.get_current_sound_output() self.default_system_sound_output = self.sp_voice.get_current_sound_output()
self.current_stream_number = None self.current_stream_queue = SpeechQueue()
self.current_callback = None self.current_callback = None
self.dispatch_on_main_thread = dispatch_on_main_thread self.dispatch_on_main_thread = dispatch_on_main_thread
self.current_marked_text = self.last_mark = None self.synthesizing = False
self.status = {'synthesizing': False, 'paused': False} self.pause_count = 0
self.settings = settings or {} self.settings = settings or {}
self.apply_settings() self.apply_settings()
@property
def status(self):
return {'synthesizing': self.synthesizing, 'paused': self.pause_count > 0}
def clear_pauses(self):
while self.pause_count:
self.sp_voice.resume()
self.pause_count -= 1
def create_voice(self): def create_voice(self):
from calibre.utils.windows.winsapi import ISpVoice from calibre.utils.windows.winsapi import ISpVoice
self.sp_voice = ISpVoice() self.sp_voice = ISpVoice()
@ -51,10 +121,10 @@ class Client:
shutdown = __del__ shutdown = __del__
def apply_settings(self, new_settings=None): def apply_settings(self, new_settings=None):
if self.status['paused']: if self.pause_count:
self.sp_voice.resume() self.clear_pauses()
self.ignore_next_stop_event = monotonic() self.ignore_next_stop_event = monotonic()
self.status = {'synthesizing': False, 'paused': False} self.synthesizing = False
if new_settings is not None: if new_settings is not None:
self.settings = new_settings self.settings = new_settings
self.sp_voice.set_current_rate(self.settings.get('rate', self.default_system_rate)) self.sp_voice.set_current_rate(self.settings.get('rate', self.default_system_rate))
@ -72,94 +142,101 @@ class Client:
SPEI_END_INPUT_STREAM, SPEI_START_INPUT_STREAM, SPEI_TTS_BOOKMARK SPEI_END_INPUT_STREAM, SPEI_START_INPUT_STREAM, SPEI_TTS_BOOKMARK
) )
c = self.current_callback c = self.current_callback
for (stream_number, event_type, event_data) in self.sp_voice.get_events(): for (stream_number, event_type, event_data) in self.sp_voice.get_events():
if event_type == SPEI_TTS_BOOKMARK: if event_type == SPEI_TTS_BOOKMARK:
self.last_mark = event_data self.current_stream_queue.last_mark = event_data
event = Event(EventType.mark, event_data) event = Event(EventType.mark, event_data)
elif event_type == SPEI_START_INPUT_STREAM: elif event_type == SPEI_START_INPUT_STREAM:
self.current_stream_queue.start(stream_number)
if self.ignore_next_start_event: if self.ignore_next_start_event:
self.ignore_next_start_event = False self.ignore_next_start_event = False
continue continue
self.synthesizing = True
if not self.current_stream_queue.is_at_start:
continue
event = Event(EventType.begin) event = Event(EventType.begin)
self.status = {'synthesizing': True, 'paused': False}
elif event_type == SPEI_END_INPUT_STREAM: elif event_type == SPEI_END_INPUT_STREAM:
if self.ignore_next_stop_event is not None and monotonic() - self.ignore_next_stop_event < 2: if self.ignore_next_stop_event is not None and monotonic() - self.ignore_next_stop_event < 2:
self.ignore_next_stop_event = None self.ignore_next_stop_event = None
continue continue
self.synthesizing = False
if not self.current_stream_queue.is_at_end:
continue
event = Event(EventType.end) event = Event(EventType.end)
self.status = {'synthesizing': False, 'paused': False}
else: else:
continue continue
if c is not None and stream_number == self.current_stream_number: if c is not None and stream_number == self.current_stream_queue.current_stream_number:
try: try:
c(event) c(event)
except Exception: except Exception:
import traceback import traceback
traceback.print_exc() traceback.print_exc()
def speak(self, text, is_xml=False, want_events=True): def speak(self, text, is_xml=False, want_events=True, purge=True):
from calibre_extensions.winsapi import ( from calibre_extensions.winsapi import (
SPF_ASYNC, SPF_IS_NOT_XML, SPF_PURGEBEFORESPEAK, SPF_IS_XML SPF_ASYNC, SPF_IS_NOT_XML, SPF_PURGEBEFORESPEAK, SPF_IS_XML
) )
flags = SPF_IS_XML if is_xml else SPF_IS_NOT_XML flags = SPF_IS_XML if is_xml else SPF_IS_NOT_XML
self.current_stream_number = self.sp_voice.speak(text, flags | SPF_PURGEBEFORESPEAK | SPF_ASYNC, want_events) if purge:
return self.current_stream_number flags |= SPF_PURGEBEFORESPEAK
return self.sp_voice.speak(text, flags | SPF_ASYNC, want_events)
def purge(self):
from calibre_extensions.winsapi import SPF_PURGEBEFORESPEAK
self.sp_voice.speak('', SPF_PURGEBEFORESPEAK, False)
self.synthesizing = False
def speak_simple_text(self, text): def speak_simple_text(self, text):
self.current_callback = None self.current_callback = None
self.current_marked_text = self.last_mark = None self.current_stream_queue.clear()
self.speak(text) number = self.speak(text)
self.clear_pauses()
self.current_stream_queue.add(number, text)
def speak_marked_text(self, text, callback): def speak_marked_text(self, text, callback):
self.current_marked_text = text self.clear_pauses()
self.last_mark = None self.current_stream_queue.clear()
if self.status['synthesizing']: if self.synthesizing:
self.ignore_next_stop_event = monotonic() self.ignore_next_stop_event = monotonic()
self.current_callback = callback self.current_callback = callback
self.speak(text, is_xml=True) for i, chunk in enumerate(add_markup(text, self.mark_template, self.escape_marked_text, self.chunk_size)):
number = self.speak(chunk, is_xml=True, purge=i == 0)
self.current_stream_queue.add(number, chunk)
def stop(self): def stop(self):
from calibre_extensions.winsapi import SPF_PURGEBEFORESPEAK self.clear_pauses()
if self.status['paused']: self.purge()
self.sp_voice.resume()
self.sp_voice.speak('', SPF_PURGEBEFORESPEAK, False)
self.status = {'synthesizing': False, 'paused': False}
if self.current_callback is not None: if self.current_callback is not None:
self.current_callback(Event(EventType.cancel)) self.current_callback(Event(EventType.cancel))
self.current_callback = None self.current_callback = None
def pause(self): def pause(self):
if self.status['synthesizing'] and not self.status['paused']:
self.sp_voice.pause() self.sp_voice.pause()
self.status = {'synthesizing': True, 'paused': True} self.pause_count += 1
if self.current_callback is not None: if self.current_callback is not None:
self.current_callback(Event(EventType.pause)) self.current_callback(Event(EventType.pause))
def resume(self): def resume(self):
if self.status['paused']: if self.pause_count:
self.sp_voice.resume() self.clear_pauses()
self.status = {'synthesizing': True, 'paused': False}
if self.current_callback is not None: if self.current_callback is not None:
self.current_callback(Event(EventType.resume)) self.current_callback(Event(EventType.resume))
def resume_after_configure(self): def resume_after_configure(self):
if self.status['paused']: if self.pause_count:
self.resume() self.clear_pauses()
return return
if self.last_mark is None: chunks = tuple(self.current_stream_queue.resume_from_last_mark(self.mark_template))
idx = -1
else:
mark = self.mark_template.format(self.last_mark)
idx = self.current_marked_text.find(mark)
if idx == -1:
text = self.current_marked_text
else:
text = self.current_marked_text[idx:]
self.ignore_next_start_event = True self.ignore_next_start_event = True
self.current_stream_queue.clear(keep_mark=True)
self.purge()
for chunk in chunks:
number = self.speak(chunk, is_xml=True, purge=False)
self.current_stream_queue.add(number, chunk)
if self.current_callback is not None: if self.current_callback is not None:
self.current_callback(Event(EventType.resume)) self.current_callback(Event(EventType.resume))
self.speak(text, is_xml=True) self.synthesizing = bool(chunks)
self.status = {'synthesizing': True, 'paused': False}
def get_voice_data(self): def get_voice_data(self):
ans = getattr(self, 'voice_data', None) ans = getattr(self, 'voice_data', None)
@ -184,10 +261,10 @@ class Client:
rate = max(self.min_rate, min(rate, self.max_rate)) rate = max(self.min_rate, min(rate, self.max_rate))
if rate != current_rate: if rate != current_rate:
self.settings['rate'] = rate self.settings['rate'] = rate
prev_state = self.status.copy() was_synthesizing = self.synthesizing
self.pause() self.pause()
self.apply_settings() self.apply_settings()
if prev_state['synthesizing']: if was_synthesizing:
self.status = {'synthesizing': True, 'paused': False} self.synthesizing = True
self.resume_after_configure() self.resume_after_configure()
return self.settings return self.settings

View File

@ -38,17 +38,6 @@ class Config(Dialog):
return super().accept() return super().accept()
def add_markup(text_parts, mark_template):
from calibre.gui2.tts.implementation import Client
buf = []
for x in text_parts:
if isinstance(x, int):
buf.append(mark_template.format(x))
else:
buf.append(Client.escape_marked_text(x))
return ''.join(buf)
class TTS(QObject): class TTS(QObject):
dispatch_on_main_thread_signal = pyqtSignal(object) dispatch_on_main_thread_signal = pyqtSignal(object)
@ -98,8 +87,7 @@ class TTS(QObject):
return error_dialog(self.parent(), _('Text-to-Speech unavailable'), str(err), show=True) return error_dialog(self.parent(), _('Text-to-Speech unavailable'), str(err), show=True)
def play(self, data): def play(self, data):
marked_text = add_markup(data['marked_text'], self.tts_client_class.mark_template) self.tts_client.speak_marked_text(data['marked_text'], self.callback)
self.tts_client.speak_marked_text(marked_text.strip(), self.callback)
def pause(self, data): def pause(self, data):
self.tts_client.pause() self.tts_client.pause()