mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Windows: Fix Read aloud not working with books that have a single large internal text file, such as MOBI or DOCX books
Apparently, there is an undocumented limit to how much text can be passed to SAPI in a single Speak() call. So maintain our own internal queue and pass 128KB chunks.
This commit is contained in:
parent
c7c627f285
commit
913892d4f8
@ -22,3 +22,22 @@ class Event:
|
|||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return f'Event(type={self.type}, data={self.data})'
|
return f'Event(type={self.type}, data={self.data})'
|
||||||
|
|
||||||
|
|
||||||
|
def add_markup(text_parts, mark_template, escape_marked_text, chunk_size=0):
|
||||||
|
buf = []
|
||||||
|
size = 0
|
||||||
|
for x in text_parts:
|
||||||
|
if isinstance(x, int):
|
||||||
|
item = mark_template.format(x)
|
||||||
|
else:
|
||||||
|
item = escape_marked_text(x)
|
||||||
|
sz = len(item)
|
||||||
|
if chunk_size and size + sz > chunk_size:
|
||||||
|
yield ''.join(buf).strip()
|
||||||
|
size = 0
|
||||||
|
buf = []
|
||||||
|
size += sz
|
||||||
|
buf.append(item)
|
||||||
|
if size:
|
||||||
|
yield ''.join(buf).strip()
|
||||||
|
@ -6,7 +6,7 @@ from functools import partial
|
|||||||
|
|
||||||
from calibre import prepare_string_for_xml
|
from calibre import prepare_string_for_xml
|
||||||
|
|
||||||
from .common import Event, EventType
|
from .common import Event, EventType, add_markup
|
||||||
from .errors import TTSSystemUnavailable
|
from .errors import TTSSystemUnavailable
|
||||||
|
|
||||||
|
|
||||||
@ -21,6 +21,7 @@ class Client:
|
|||||||
name = 'speechd'
|
name = 'speechd'
|
||||||
min_rate = -100
|
min_rate = -100
|
||||||
max_rate = 100
|
max_rate = 100
|
||||||
|
chunk_size = 0
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def escape_marked_text(cls, text):
|
def escape_marked_text(cls, text):
|
||||||
@ -127,8 +128,9 @@ class Client:
|
|||||||
self.next_cancel_is_for_pause = False
|
self.next_cancel_is_for_pause = False
|
||||||
return event
|
return event
|
||||||
|
|
||||||
def speak_marked_text(self, text, callback=lambda ev: None):
|
def speak_marked_text(self, marked_text, callback=lambda ev: None):
|
||||||
self.stop()
|
self.stop()
|
||||||
|
text = ''.join(add_markup(marked_text, self.mark_template, self.escape_marked_text, self.chunk_size))
|
||||||
self.current_marked_text = text
|
self.current_marked_text = text
|
||||||
self.last_mark = None
|
self.last_mark = None
|
||||||
|
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
# vim:fileencoding=utf-8
|
# vim:fileencoding=utf-8
|
||||||
# License: GPL v3 Copyright: 2020, Kovid Goyal <kovid at kovidgoyal.net>
|
# License: GPL v3 Copyright: 2020, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
|
||||||
from .common import Event, EventType
|
from .common import Event, EventType, add_markup
|
||||||
|
|
||||||
|
|
||||||
class Client:
|
class Client:
|
||||||
@ -12,6 +12,7 @@ class Client:
|
|||||||
name = 'nsss'
|
name = 'nsss'
|
||||||
min_rate = 10
|
min_rate = 10
|
||||||
max_rate = 340
|
max_rate = 340
|
||||||
|
chunk_size = 0
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def escape_marked_text(cls, text):
|
def escape_marked_text(cls, text):
|
||||||
@ -70,7 +71,8 @@ class Client:
|
|||||||
self.nsss.speak(self.escape_marked_text(text))
|
self.nsss.speak(self.escape_marked_text(text))
|
||||||
self.status = {'synthesizing': True, 'paused': False}
|
self.status = {'synthesizing': True, 'paused': False}
|
||||||
|
|
||||||
def speak_marked_text(self, text, callback):
|
def speak_marked_text(self, marked_text, callback):
|
||||||
|
text = ''.join(add_markup(marked_text, self.mark_template, self.escape_marked_text, self.chunk_size))
|
||||||
self.current_callback = callback
|
self.current_callback = callback
|
||||||
self.current_marked_text = text
|
self.current_marked_text = text
|
||||||
self.last_mark = None
|
self.last_mark = None
|
||||||
|
@ -5,10 +5,70 @@
|
|||||||
|
|
||||||
from time import monotonic
|
from time import monotonic
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
|
from typing import NamedTuple
|
||||||
|
|
||||||
from calibre import prepare_string_for_xml
|
from calibre import prepare_string_for_xml
|
||||||
|
|
||||||
from .common import Event, EventType
|
from .common import Event, EventType, add_markup
|
||||||
|
|
||||||
|
|
||||||
|
class QueueEntry(NamedTuple):
|
||||||
|
stream_number: int
|
||||||
|
text: str
|
||||||
|
|
||||||
|
|
||||||
|
class SpeechQueue:
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.clear()
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.items)
|
||||||
|
|
||||||
|
def clear(self, keep_mark=False):
|
||||||
|
self.items = []
|
||||||
|
self.pos = -1
|
||||||
|
if not keep_mark:
|
||||||
|
self.last_mark = None
|
||||||
|
|
||||||
|
def add(self, stream_number, text):
|
||||||
|
self.items.append(QueueEntry(stream_number, text))
|
||||||
|
|
||||||
|
def start(self, stream_number):
|
||||||
|
self.pos = -1
|
||||||
|
for i, x in enumerate(self.items):
|
||||||
|
if x.stream_number == stream_number:
|
||||||
|
self.pos = i
|
||||||
|
break
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_at_start(self):
|
||||||
|
return self.pos == 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_at_end(self):
|
||||||
|
return self.pos >= len(self.items) - 1
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_stream_number(self):
|
||||||
|
if -1 < self.pos < len(self.items):
|
||||||
|
return self.items[self.pos].stream_number
|
||||||
|
|
||||||
|
def resume_from_last_mark(self, mark_template):
|
||||||
|
if self.pos < 0 or self.pos >= len(self.items):
|
||||||
|
return
|
||||||
|
item = self.items[self.pos]
|
||||||
|
if self.last_mark is None:
|
||||||
|
idx = -1
|
||||||
|
else:
|
||||||
|
idx = item.text.find(mark_template.format(self.last_mark))
|
||||||
|
if idx == -1:
|
||||||
|
text = item.text
|
||||||
|
else:
|
||||||
|
text = item.text[idx:]
|
||||||
|
yield text
|
||||||
|
for i in range(self.pos + 1, len(self.items)):
|
||||||
|
yield self.items[i].text
|
||||||
|
|
||||||
|
|
||||||
class Client:
|
class Client:
|
||||||
@ -17,6 +77,7 @@ class Client:
|
|||||||
name = 'sapi'
|
name = 'sapi'
|
||||||
min_rate = -10
|
min_rate = -10
|
||||||
max_rate = 10
|
max_rate = 10
|
||||||
|
chunk_size = 128 * 1024
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def escape_marked_text(cls, text):
|
def escape_marked_text(cls, text):
|
||||||
@ -29,14 +90,23 @@ class Client:
|
|||||||
self.default_system_rate = self.sp_voice.get_current_rate()
|
self.default_system_rate = self.sp_voice.get_current_rate()
|
||||||
self.default_system_voice = self.sp_voice.get_current_voice()
|
self.default_system_voice = self.sp_voice.get_current_voice()
|
||||||
self.default_system_sound_output = self.sp_voice.get_current_sound_output()
|
self.default_system_sound_output = self.sp_voice.get_current_sound_output()
|
||||||
self.current_stream_number = None
|
self.current_stream_queue = SpeechQueue()
|
||||||
self.current_callback = None
|
self.current_callback = None
|
||||||
self.dispatch_on_main_thread = dispatch_on_main_thread
|
self.dispatch_on_main_thread = dispatch_on_main_thread
|
||||||
self.current_marked_text = self.last_mark = None
|
self.synthesizing = False
|
||||||
self.status = {'synthesizing': False, 'paused': False}
|
self.pause_count = 0
|
||||||
self.settings = settings or {}
|
self.settings = settings or {}
|
||||||
self.apply_settings()
|
self.apply_settings()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def status(self):
|
||||||
|
return {'synthesizing': self.synthesizing, 'paused': self.pause_count > 0}
|
||||||
|
|
||||||
|
def clear_pauses(self):
|
||||||
|
while self.pause_count:
|
||||||
|
self.sp_voice.resume()
|
||||||
|
self.pause_count -= 1
|
||||||
|
|
||||||
def create_voice(self):
|
def create_voice(self):
|
||||||
from calibre.utils.windows.winsapi import ISpVoice
|
from calibre.utils.windows.winsapi import ISpVoice
|
||||||
self.sp_voice = ISpVoice()
|
self.sp_voice = ISpVoice()
|
||||||
@ -51,10 +121,10 @@ class Client:
|
|||||||
shutdown = __del__
|
shutdown = __del__
|
||||||
|
|
||||||
def apply_settings(self, new_settings=None):
|
def apply_settings(self, new_settings=None):
|
||||||
if self.status['paused']:
|
if self.pause_count:
|
||||||
self.sp_voice.resume()
|
self.clear_pauses()
|
||||||
self.ignore_next_stop_event = monotonic()
|
self.ignore_next_stop_event = monotonic()
|
||||||
self.status = {'synthesizing': False, 'paused': False}
|
self.synthesizing = False
|
||||||
if new_settings is not None:
|
if new_settings is not None:
|
||||||
self.settings = new_settings
|
self.settings = new_settings
|
||||||
self.sp_voice.set_current_rate(self.settings.get('rate', self.default_system_rate))
|
self.sp_voice.set_current_rate(self.settings.get('rate', self.default_system_rate))
|
||||||
@ -72,94 +142,101 @@ class Client:
|
|||||||
SPEI_END_INPUT_STREAM, SPEI_START_INPUT_STREAM, SPEI_TTS_BOOKMARK
|
SPEI_END_INPUT_STREAM, SPEI_START_INPUT_STREAM, SPEI_TTS_BOOKMARK
|
||||||
)
|
)
|
||||||
c = self.current_callback
|
c = self.current_callback
|
||||||
|
|
||||||
for (stream_number, event_type, event_data) in self.sp_voice.get_events():
|
for (stream_number, event_type, event_data) in self.sp_voice.get_events():
|
||||||
if event_type == SPEI_TTS_BOOKMARK:
|
if event_type == SPEI_TTS_BOOKMARK:
|
||||||
self.last_mark = event_data
|
self.current_stream_queue.last_mark = event_data
|
||||||
event = Event(EventType.mark, event_data)
|
event = Event(EventType.mark, event_data)
|
||||||
elif event_type == SPEI_START_INPUT_STREAM:
|
elif event_type == SPEI_START_INPUT_STREAM:
|
||||||
|
self.current_stream_queue.start(stream_number)
|
||||||
if self.ignore_next_start_event:
|
if self.ignore_next_start_event:
|
||||||
self.ignore_next_start_event = False
|
self.ignore_next_start_event = False
|
||||||
continue
|
continue
|
||||||
|
self.synthesizing = True
|
||||||
|
if not self.current_stream_queue.is_at_start:
|
||||||
|
continue
|
||||||
event = Event(EventType.begin)
|
event = Event(EventType.begin)
|
||||||
self.status = {'synthesizing': True, 'paused': False}
|
|
||||||
elif event_type == SPEI_END_INPUT_STREAM:
|
elif event_type == SPEI_END_INPUT_STREAM:
|
||||||
if self.ignore_next_stop_event is not None and monotonic() - self.ignore_next_stop_event < 2:
|
if self.ignore_next_stop_event is not None and monotonic() - self.ignore_next_stop_event < 2:
|
||||||
self.ignore_next_stop_event = None
|
self.ignore_next_stop_event = None
|
||||||
continue
|
continue
|
||||||
|
self.synthesizing = False
|
||||||
|
if not self.current_stream_queue.is_at_end:
|
||||||
|
continue
|
||||||
event = Event(EventType.end)
|
event = Event(EventType.end)
|
||||||
self.status = {'synthesizing': False, 'paused': False}
|
|
||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
if c is not None and stream_number == self.current_stream_number:
|
if c is not None and stream_number == self.current_stream_queue.current_stream_number:
|
||||||
try:
|
try:
|
||||||
c(event)
|
c(event)
|
||||||
except Exception:
|
except Exception:
|
||||||
import traceback
|
import traceback
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
|
||||||
def speak(self, text, is_xml=False, want_events=True):
|
def speak(self, text, is_xml=False, want_events=True, purge=True):
|
||||||
from calibre_extensions.winsapi import (
|
from calibre_extensions.winsapi import (
|
||||||
SPF_ASYNC, SPF_IS_NOT_XML, SPF_PURGEBEFORESPEAK, SPF_IS_XML
|
SPF_ASYNC, SPF_IS_NOT_XML, SPF_PURGEBEFORESPEAK, SPF_IS_XML
|
||||||
)
|
)
|
||||||
flags = SPF_IS_XML if is_xml else SPF_IS_NOT_XML
|
flags = SPF_IS_XML if is_xml else SPF_IS_NOT_XML
|
||||||
self.current_stream_number = self.sp_voice.speak(text, flags | SPF_PURGEBEFORESPEAK | SPF_ASYNC, want_events)
|
if purge:
|
||||||
return self.current_stream_number
|
flags |= SPF_PURGEBEFORESPEAK
|
||||||
|
return self.sp_voice.speak(text, flags | SPF_ASYNC, want_events)
|
||||||
|
|
||||||
|
def purge(self):
|
||||||
|
from calibre_extensions.winsapi import SPF_PURGEBEFORESPEAK
|
||||||
|
self.sp_voice.speak('', SPF_PURGEBEFORESPEAK, False)
|
||||||
|
self.synthesizing = False
|
||||||
|
|
||||||
def speak_simple_text(self, text):
|
def speak_simple_text(self, text):
|
||||||
self.current_callback = None
|
self.current_callback = None
|
||||||
self.current_marked_text = self.last_mark = None
|
self.current_stream_queue.clear()
|
||||||
self.speak(text)
|
number = self.speak(text)
|
||||||
|
self.clear_pauses()
|
||||||
|
self.current_stream_queue.add(number, text)
|
||||||
|
|
||||||
def speak_marked_text(self, text, callback):
|
def speak_marked_text(self, text, callback):
|
||||||
self.current_marked_text = text
|
self.clear_pauses()
|
||||||
self.last_mark = None
|
self.current_stream_queue.clear()
|
||||||
if self.status['synthesizing']:
|
if self.synthesizing:
|
||||||
self.ignore_next_stop_event = monotonic()
|
self.ignore_next_stop_event = monotonic()
|
||||||
self.current_callback = callback
|
self.current_callback = callback
|
||||||
self.speak(text, is_xml=True)
|
for i, chunk in enumerate(add_markup(text, self.mark_template, self.escape_marked_text, self.chunk_size)):
|
||||||
|
number = self.speak(chunk, is_xml=True, purge=i == 0)
|
||||||
|
self.current_stream_queue.add(number, chunk)
|
||||||
|
|
||||||
def stop(self):
|
def stop(self):
|
||||||
from calibre_extensions.winsapi import SPF_PURGEBEFORESPEAK
|
self.clear_pauses()
|
||||||
if self.status['paused']:
|
self.purge()
|
||||||
self.sp_voice.resume()
|
|
||||||
self.sp_voice.speak('', SPF_PURGEBEFORESPEAK, False)
|
|
||||||
self.status = {'synthesizing': False, 'paused': False}
|
|
||||||
if self.current_callback is not None:
|
if self.current_callback is not None:
|
||||||
self.current_callback(Event(EventType.cancel))
|
self.current_callback(Event(EventType.cancel))
|
||||||
self.current_callback = None
|
self.current_callback = None
|
||||||
|
|
||||||
def pause(self):
|
def pause(self):
|
||||||
if self.status['synthesizing'] and not self.status['paused']:
|
|
||||||
self.sp_voice.pause()
|
self.sp_voice.pause()
|
||||||
self.status = {'synthesizing': True, 'paused': True}
|
self.pause_count += 1
|
||||||
if self.current_callback is not None:
|
if self.current_callback is not None:
|
||||||
self.current_callback(Event(EventType.pause))
|
self.current_callback(Event(EventType.pause))
|
||||||
|
|
||||||
def resume(self):
|
def resume(self):
|
||||||
if self.status['paused']:
|
if self.pause_count:
|
||||||
self.sp_voice.resume()
|
self.clear_pauses()
|
||||||
self.status = {'synthesizing': True, 'paused': False}
|
|
||||||
if self.current_callback is not None:
|
if self.current_callback is not None:
|
||||||
self.current_callback(Event(EventType.resume))
|
self.current_callback(Event(EventType.resume))
|
||||||
|
|
||||||
def resume_after_configure(self):
|
def resume_after_configure(self):
|
||||||
if self.status['paused']:
|
if self.pause_count:
|
||||||
self.resume()
|
self.clear_pauses()
|
||||||
return
|
return
|
||||||
if self.last_mark is None:
|
chunks = tuple(self.current_stream_queue.resume_from_last_mark(self.mark_template))
|
||||||
idx = -1
|
|
||||||
else:
|
|
||||||
mark = self.mark_template.format(self.last_mark)
|
|
||||||
idx = self.current_marked_text.find(mark)
|
|
||||||
if idx == -1:
|
|
||||||
text = self.current_marked_text
|
|
||||||
else:
|
|
||||||
text = self.current_marked_text[idx:]
|
|
||||||
self.ignore_next_start_event = True
|
self.ignore_next_start_event = True
|
||||||
|
self.current_stream_queue.clear(keep_mark=True)
|
||||||
|
self.purge()
|
||||||
|
for chunk in chunks:
|
||||||
|
number = self.speak(chunk, is_xml=True, purge=False)
|
||||||
|
self.current_stream_queue.add(number, chunk)
|
||||||
if self.current_callback is not None:
|
if self.current_callback is not None:
|
||||||
self.current_callback(Event(EventType.resume))
|
self.current_callback(Event(EventType.resume))
|
||||||
self.speak(text, is_xml=True)
|
self.synthesizing = bool(chunks)
|
||||||
self.status = {'synthesizing': True, 'paused': False}
|
|
||||||
|
|
||||||
def get_voice_data(self):
|
def get_voice_data(self):
|
||||||
ans = getattr(self, 'voice_data', None)
|
ans = getattr(self, 'voice_data', None)
|
||||||
@ -184,10 +261,10 @@ class Client:
|
|||||||
rate = max(self.min_rate, min(rate, self.max_rate))
|
rate = max(self.min_rate, min(rate, self.max_rate))
|
||||||
if rate != current_rate:
|
if rate != current_rate:
|
||||||
self.settings['rate'] = rate
|
self.settings['rate'] = rate
|
||||||
prev_state = self.status.copy()
|
was_synthesizing = self.synthesizing
|
||||||
self.pause()
|
self.pause()
|
||||||
self.apply_settings()
|
self.apply_settings()
|
||||||
if prev_state['synthesizing']:
|
if was_synthesizing:
|
||||||
self.status = {'synthesizing': True, 'paused': False}
|
self.synthesizing = True
|
||||||
self.resume_after_configure()
|
self.resume_after_configure()
|
||||||
return self.settings
|
return self.settings
|
||||||
|
@ -38,17 +38,6 @@ class Config(Dialog):
|
|||||||
return super().accept()
|
return super().accept()
|
||||||
|
|
||||||
|
|
||||||
def add_markup(text_parts, mark_template):
|
|
||||||
from calibre.gui2.tts.implementation import Client
|
|
||||||
buf = []
|
|
||||||
for x in text_parts:
|
|
||||||
if isinstance(x, int):
|
|
||||||
buf.append(mark_template.format(x))
|
|
||||||
else:
|
|
||||||
buf.append(Client.escape_marked_text(x))
|
|
||||||
return ''.join(buf)
|
|
||||||
|
|
||||||
|
|
||||||
class TTS(QObject):
|
class TTS(QObject):
|
||||||
|
|
||||||
dispatch_on_main_thread_signal = pyqtSignal(object)
|
dispatch_on_main_thread_signal = pyqtSignal(object)
|
||||||
@ -98,8 +87,7 @@ class TTS(QObject):
|
|||||||
return error_dialog(self.parent(), _('Text-to-Speech unavailable'), str(err), show=True)
|
return error_dialog(self.parent(), _('Text-to-Speech unavailable'), str(err), show=True)
|
||||||
|
|
||||||
def play(self, data):
|
def play(self, data):
|
||||||
marked_text = add_markup(data['marked_text'], self.tts_client_class.mark_template)
|
self.tts_client.speak_marked_text(data['marked_text'], self.callback)
|
||||||
self.tts_client.speak_marked_text(marked_text.strip(), self.callback)
|
|
||||||
|
|
||||||
def pause(self, data):
|
def pause(self, data):
|
||||||
self.tts_client.pause()
|
self.tts_client.pause()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user