mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Content server: Viewer: Read Aloud: Implement word-by-word tracking when reading aloud if the browser and voice used support it.
This commit is contained in:
parent
f3299f5b8f
commit
b51055a00f
@ -115,7 +115,7 @@ class TTS(QObject):
|
|||||||
def callback(self, event):
|
def callback(self, event):
|
||||||
data = event.data
|
data = event.data
|
||||||
if event.type is event.type.mark:
|
if event.type is event.type.mark:
|
||||||
data = int(data)
|
data = {'first': int(data), 'last': int(data)}
|
||||||
self.event_received.emit(event.type.name, data)
|
self.event_received.emit(event.type.name, data)
|
||||||
|
|
||||||
def stop(self, data):
|
def stop(self, data):
|
||||||
|
@ -162,17 +162,24 @@ def select_search_result(sr):
|
|||||||
return select_find_result(match)
|
return select_find_result(match)
|
||||||
|
|
||||||
|
|
||||||
def select_tts_mark(idx_in_flat_text):
|
def find_word_length(idx):
|
||||||
window.getSelection().removeAllRanges()
|
|
||||||
if not cache.text_map:
|
|
||||||
cache.text_map = build_text_map()
|
|
||||||
r = tts_word_regex()
|
r = tts_word_regex()
|
||||||
r.lastIndex = idx_in_flat_text
|
r.lastIndex = idx
|
||||||
match = v'r.exec(cache.text_map.flat_text)'
|
match = v'r.exec(cache.text_map.flat_text)'
|
||||||
word_length = 5
|
word_length = 5
|
||||||
if match:
|
if match:
|
||||||
word_length = match[0]?.length or 5
|
word_length = match[0]?.length or 5
|
||||||
match = get_occurrence_data(cache.text_map.node_list, idx_in_flat_text, idx_in_flat_text + word_length)
|
return word_length
|
||||||
|
|
||||||
|
|
||||||
|
def select_tts_mark(idx_in_flat_text, last_idx_in_flat_text):
|
||||||
|
window.getSelection().removeAllRanges()
|
||||||
|
if not cache.text_map:
|
||||||
|
cache.text_map = build_text_map()
|
||||||
|
if idx_in_flat_text is last_idx_in_flat_text:
|
||||||
|
match = get_occurrence_data(cache.text_map.node_list, idx_in_flat_text, idx_in_flat_text + find_word_length(idx_in_flat_text))
|
||||||
|
else:
|
||||||
|
match = get_occurrence_data(cache.text_map.node_list, idx_in_flat_text, last_idx_in_flat_text + find_word_length(last_idx_in_flat_text))
|
||||||
if not match:
|
if not match:
|
||||||
return False
|
return False
|
||||||
return select_find_result(match)
|
return select_find_result(match)
|
||||||
|
@ -1041,9 +1041,11 @@ class IframeBoss:
|
|||||||
sel.removeAllRanges()
|
sel.removeAllRanges()
|
||||||
self.send_message('tts', type='text-extracted', marked_text=marked_text, pos=data.pos)
|
self.send_message('tts', type='text-extracted', marked_text=marked_text, pos=data.pos)
|
||||||
|
|
||||||
def mark_word_being_spoken(self, occurrence_number):
|
def mark_word_being_spoken(self, x):
|
||||||
|
if jstype(x) is 'number':
|
||||||
|
x = {'first': x, 'last': x}
|
||||||
self.last_search_at = window.performance.now()
|
self.last_search_at = window.performance.now()
|
||||||
if select_tts_mark(occurrence_number):
|
if select_tts_mark(x.first, x.last):
|
||||||
self.ensure_selection_boundary_visible()
|
self.ensure_selection_boundary_visible()
|
||||||
|
|
||||||
def audio_ebook_msg_received(self, data):
|
def audio_ebook_msg_received(self, data):
|
||||||
|
@ -11,15 +11,66 @@ from modals import create_custom_dialog, error_dialog
|
|||||||
from widgets import create_button
|
from widgets import create_button
|
||||||
|
|
||||||
|
|
||||||
def escaper():
|
class Tracker:
|
||||||
doc = document.implementation.createDocument("", "", None)
|
|
||||||
el = doc.createElement("temp")
|
def __init__(self):
|
||||||
el.textContent = "temp"
|
self.clear()
|
||||||
el = el.firstChild
|
|
||||||
ser = new XMLSerializer() # noqa
|
def clear(self):
|
||||||
return def(text):
|
self.positions = v'[]'
|
||||||
el.nodeValue = text
|
self.last_pos = 0
|
||||||
return ser.serializeToString(el)
|
self.queue = v'[]'
|
||||||
|
|
||||||
|
def parse_marked_text(self, marked_text):
|
||||||
|
self.clear()
|
||||||
|
text = v'[]'
|
||||||
|
text_len = chunk_len = index_in_positions = 0
|
||||||
|
limit = 4096
|
||||||
|
for x in marked_text:
|
||||||
|
if jstype(x) is 'number':
|
||||||
|
self.positions.push({'mark': x, 'offset_in_text': text_len})
|
||||||
|
else:
|
||||||
|
text_len += x.length
|
||||||
|
chunk_len += x.length
|
||||||
|
text.push(x)
|
||||||
|
if chunk_len > limit:
|
||||||
|
self.queue.push({'text': ''.join(text), 'index_in_positions': index_in_positions})
|
||||||
|
chunk_len = 0
|
||||||
|
text = v'[]'
|
||||||
|
index_in_positions = self.positions.length - 1
|
||||||
|
if text.length:
|
||||||
|
self.queue.push({'text': ''.join(text), 'index_in_positions': index_in_positions})
|
||||||
|
self.marked_text = marked_text
|
||||||
|
console.log(self.queue)
|
||||||
|
return self.current_text()
|
||||||
|
|
||||||
|
def pop_first(self):
|
||||||
|
self.queue.splice(0, 1)
|
||||||
|
|
||||||
|
def current_text(self):
|
||||||
|
if self.queue.length:
|
||||||
|
return self.queue[0].text
|
||||||
|
return ''
|
||||||
|
|
||||||
|
def resume(self):
|
||||||
|
self.last_pos = 0
|
||||||
|
if self.queue.length:
|
||||||
|
self.last_pos = self.queue[0].index_in_positions
|
||||||
|
return self.current_text()
|
||||||
|
|
||||||
|
def mark_word(self, start, length):
|
||||||
|
end = start + length
|
||||||
|
matches = v'[]'
|
||||||
|
while self.last_pos < self.positions.length:
|
||||||
|
pos = self.positions[self.last_pos]
|
||||||
|
if start <= pos.offset_in_text < end:
|
||||||
|
matches.push(pos)
|
||||||
|
elif pos.offset_in_text >= end:
|
||||||
|
break
|
||||||
|
self.last_pos += 1
|
||||||
|
if matches.length:
|
||||||
|
return matches[0].mark, matches[-1].mark
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
class Client:
|
class Client:
|
||||||
@ -30,7 +81,7 @@ class Client:
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.stop_requested_at = None
|
self.stop_requested_at = None
|
||||||
self.status = {'synthesizing': False, 'paused': False}
|
self.status = {'synthesizing': False, 'paused': False}
|
||||||
self.queue = v'[]'
|
self.tracker = Tracker(v'[]')
|
||||||
self.last_reached_mark = None
|
self.last_reached_mark = None
|
||||||
self.onevent = def():
|
self.onevent = def():
|
||||||
pass
|
pass
|
||||||
@ -38,19 +89,14 @@ class Client:
|
|||||||
self.current_voice_uri = data.voice or ''
|
self.current_voice_uri = data.voice or ''
|
||||||
self.current_rate = data.rate or None
|
self.current_rate = data.rate or None
|
||||||
|
|
||||||
def create_utterance(self, text_or_ssml, wrap_in_ssml):
|
def create_utterance(self, text):
|
||||||
if wrap_in_ssml:
|
ut = new window.SpeechSynthesisUtterance(text)
|
||||||
text_or_ssml = (
|
|
||||||
'<?xml version="1.0"?>\n<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis"><s>' +
|
|
||||||
text_or_ssml +
|
|
||||||
'</s></speak>')
|
|
||||||
ut = new window.SpeechSynthesisUtterance(text_or_ssml)
|
|
||||||
ut.onstart = self.utterance_started
|
ut.onstart = self.utterance_started
|
||||||
ut.onpause = self.utterance_paused
|
ut.onpause = self.utterance_paused
|
||||||
ut.onend = self.utterance_ended
|
ut.onend = self.utterance_ended
|
||||||
ut.onerror = self.utterance_failed
|
ut.onerror = self.utterance_failed
|
||||||
ut.onmark = self.utterance_mark_reached
|
|
||||||
ut.onresume = self.utterance_resumed
|
ut.onresume = self.utterance_resumed
|
||||||
|
ut.addEventListener('boundary', self.utterance_boundary_reached)
|
||||||
if self.current_voice_uri:
|
if self.current_voice_uri:
|
||||||
for voice in window.speechSynthesis.getVoices():
|
for voice in window.speechSynthesis.getVoices():
|
||||||
if voice.voiceURI is self.current_voice_uri:
|
if voice.voiceURI is self.current_voice_uri:
|
||||||
@ -58,7 +104,6 @@ class Client:
|
|||||||
break
|
break
|
||||||
if self.current_rate:
|
if self.current_rate:
|
||||||
ut.rate = self.current_rate
|
ut.rate = self.current_rate
|
||||||
self.queue.push(ut)
|
|
||||||
return ut
|
return ut
|
||||||
|
|
||||||
def utterance_started(self, event):
|
def utterance_started(self, event):
|
||||||
@ -74,22 +119,27 @@ class Client:
|
|||||||
if self.stop_requested_at? and window.performance.now() - self.stop_requested_at < 1000:
|
if self.stop_requested_at? and window.performance.now() - self.stop_requested_at < 1000:
|
||||||
self.stop_requested_at = None
|
self.stop_requested_at = None
|
||||||
return
|
return
|
||||||
self.queue.splice(0, 1)
|
self.tracker.pop_first()
|
||||||
if self.queue.length:
|
text = self.tracker.current_text()
|
||||||
window.speechSynthesis.speak(self.queue[0])
|
if text and text.length:
|
||||||
|
window.speechSynthesis.speak(text)
|
||||||
else:
|
else:
|
||||||
self.onevent('end')
|
self.onevent('end')
|
||||||
|
|
||||||
def utterance_failed(self, event):
|
def utterance_failed(self, event):
|
||||||
self.status = {'synthesizing': False, 'paused': False}
|
self.status = {'synthesizing': False, 'paused': False}
|
||||||
self.queue = v'[]'
|
self.tracker.clear()
|
||||||
error_dialog(_('Speaking failed'), _(
|
if event.error is not 'interrupted':
|
||||||
'An error has occurred with speech synthesis: ' + event.error))
|
error_dialog(_('Speaking failed'), _(
|
||||||
|
'An error has occurred with speech synthesis: ' + event.error))
|
||||||
self.onevent('cancel')
|
self.onevent('cancel')
|
||||||
|
|
||||||
def utterance_mark_reached(self, event):
|
def utterance_boundary_reached(self, event):
|
||||||
self.last_reached_mark = event.name
|
if event.name is 'word':
|
||||||
self.onevent('mark', int(event.name))
|
x = self.tracker.mark_word(event.charIndex, event.charLength)
|
||||||
|
if x:
|
||||||
|
first, last = x[0], x[1]
|
||||||
|
self.onevent('mark', {'first': first, 'last': last})
|
||||||
|
|
||||||
def utterance_resumed(self, event):
|
def utterance_resumed(self, event):
|
||||||
self.status = {'synthesizing': True, 'paused': False}
|
self.status = {'synthesizing': True, 'paused': False}
|
||||||
@ -102,61 +152,28 @@ class Client:
|
|||||||
window.speechSynthesis.resume()
|
window.speechSynthesis.resume()
|
||||||
|
|
||||||
def resume_after_configure(self):
|
def resume_after_configure(self):
|
||||||
if self.queue.length:
|
text = self.tracker.resume()
|
||||||
window.speechSynthesis.speak(self.queue[0])
|
if text and text.length:
|
||||||
|
window.speechSynthesis.speak(text)
|
||||||
|
|
||||||
def stop(self):
|
def stop(self):
|
||||||
self.queue = v'[]'
|
self.tracker.clear()
|
||||||
self.stop_requested_at = window.performance.now()
|
self.stop_requested_at = window.performance.now()
|
||||||
window.speechSynthesis.cancel()
|
window.speechSynthesis.cancel()
|
||||||
self.status = {'synthesizing': False, 'paused': False}
|
self.status = {'synthesizing': False, 'paused': False}
|
||||||
|
|
||||||
def speak_simple_text(self, text):
|
def speak_simple_text(self, text):
|
||||||
self.stop()
|
self.stop()
|
||||||
while text.length > 32766:
|
text = self.tracker.parse_marked_text(v'[text]')
|
||||||
self.create_utterance(text[:32766])
|
if text and text.length:
|
||||||
text = text[32766:]
|
window.speechSynthesis.speak(self.create_utterance(text))
|
||||||
if text:
|
|
||||||
self.create_utterance(text)
|
|
||||||
if self.queue.length:
|
|
||||||
window.speechSynthesis.speak(self.queue[0])
|
|
||||||
|
|
||||||
def speak_marked_text(self, text_segments, onevent):
|
def speak_marked_text(self, text_segments, onevent):
|
||||||
self.stop()
|
self.stop()
|
||||||
self.onevent = onevent
|
self.onevent = onevent
|
||||||
buf = v'[]'
|
text = self.tracker.parse_marked_text(text_segments)
|
||||||
size = 0
|
if text and text.length:
|
||||||
limit = 2048
|
window.speechSynthesis.speak(self.create_utterance(text))
|
||||||
|
|
||||||
def commit():
|
|
||||||
nonlocal buf, size
|
|
||||||
text = buf.join('')
|
|
||||||
if text.length:
|
|
||||||
self.create_utterance(text)
|
|
||||||
buf = v'[]'
|
|
||||||
size = 0
|
|
||||||
|
|
||||||
for x in text_segments:
|
|
||||||
if jstype(x) is 'number':
|
|
||||||
# Currently the sad sack browsers dont support SSML
|
|
||||||
# https://github.com/WICG/speech-api/issues/37
|
|
||||||
# buf.push()
|
|
||||||
# markup = '<mark name="' + x + '"/>'
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
if x.length > limit:
|
|
||||||
commit()
|
|
||||||
while x.length:
|
|
||||||
self.create_utterance(x[:limit])
|
|
||||||
x = x[limit:]
|
|
||||||
continue
|
|
||||||
if size + x.length > limit:
|
|
||||||
commit()
|
|
||||||
buf.push(x)
|
|
||||||
size += x.length
|
|
||||||
commit()
|
|
||||||
if self.queue.length:
|
|
||||||
window.speechSynthesis.speak(self.queue[0])
|
|
||||||
|
|
||||||
def faster(self):
|
def faster(self):
|
||||||
self.change_rate(steps=1)
|
self.change_rate(steps=1)
|
||||||
@ -167,13 +184,6 @@ class Client:
|
|||||||
def apply_settings(self):
|
def apply_settings(self):
|
||||||
sd = get_session_data()
|
sd = get_session_data()
|
||||||
sd.set('tts_backend', {'voice': self.current_voice_uri, 'rate': self.current_rate})
|
sd.set('tts_backend', {'voice': self.current_voice_uri, 'rate': self.current_rate})
|
||||||
existing = self.queue
|
|
||||||
if self.queue and self.queue.length:
|
|
||||||
if self.status.paused:
|
|
||||||
window.speechSynthesis.resume()
|
|
||||||
self.stop()
|
|
||||||
for ut in existing:
|
|
||||||
self.create_utterance(ut.text)
|
|
||||||
|
|
||||||
def change_rate(self, steps=1):
|
def change_rate(self, steps=1):
|
||||||
rate = current_rate = (self.current_rate or 1) * 10
|
rate = current_rate = (self.current_rate or 1) * 10
|
||||||
|
Loading…
x
Reference in New Issue
Block a user