Get basic TTS playback working in browser viewer

Sadly, there is no support for marking words, so highlighting the
currently spoken word will require gymnastics.
This commit is contained in:
Kovid Goyal 2020-12-03 19:56:08 +05:30
parent 5c9e597fb3
commit ce0a57b69e
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
3 changed files with 148 additions and 8 deletions

View File

@ -173,6 +173,8 @@ class ReadAloud:
self.send_message('mark', num=data)
elif which is 'begin':
self.state = PLAYING
elif which is 'end':
pass
def send_message(self, type, **kw):
self.view.iframe_wrapper.send_message('tts', type=type, **kw)

120
src/pyj/read_book/tts.pyj Normal file
View File

@ -0,0 +1,120 @@
# vim:fileencoding=utf-8
# License: GPL v3 Copyright: 2020, Kovid Goyal <kovid at kovidgoyal.net>
from __python__ import bound_methods, hash_literals
from gettext import gettext as _
from modals import error_dialog
def escaper():
doc = document.implementation.createDocument("", "", None)
el = doc.createElement("temp")
el.textContent = "temp"
el = el.firstChild
ser = new XMLSerializer() # noqa
return def(text):
el.nodeValue = text
return ser.serializeToString(el)
escape_for_xml = escaper()
class Client:
def __init__(self):
self.status = {'synthesizing': False, 'paused': False}
self.queue = v'[]'
self.last_reached_mark = None
self.onevent = def():
pass
def create_utterance(self, text_or_ssml, wrap_in_ssml):
if wrap_in_ssml:
text_or_ssml = (
'<?xml version="1.0"?>\n<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis"><s>' +
text_or_ssml +
'</s></speak>')
ut = new window.SpeechSynthesisUtterance(text_or_ssml)
ut.onstart = self.utterance_started
ut.onpause = self.utterance_paused
ut.onend = self.utterance_ended
ut.onerror = self.utterance_failed
ut.onmark = self.utterance_mark_reached
ut.onresume = self.utterance_resumed
self.queue.push(ut)
return ut
def utterance_started(self, event):
self.status = {'synthesizing': True, 'paused': False}
self.onevent('begin')
def utterance_paused(self, event):
self.status = {'synthesizing': True, 'paused': True}
self.onevent('pause')
def utterance_ended(self, event):
self.status = {'synthesizing': False, 'paused': False}
self.queue.splice(0, 1)
if self.queue.length:
window.speechSynthesis.speak(self.queue[0])
else:
self.onevent('end')
def utterance_failed(self, event):
self.status = {'synthesizing': False, 'paused': False}
self.queue = v'[]'
error_dialog(_('Speaking failed'), _(
'An error has occurred with speech synthesis: ' + event.error))
self.onevent('cancel')
def utterance_mark_reached(self, event):
self.last_reached_mark = event.name
self.onevent('mark', int(event.name))
def utterance_resumed(self, event):
self.status = {'synthesizing': True, 'paused': False}
self.onevent('resume')
def pause(self):
window.speechSynthesis.pause()
def resume(self):
window.speechSynthesis.resume()
def stop(self):
window.speechSynthesis.cancel()
self.queue = v'[]'
self.status = {'synthesizing': False, 'paused': False}
def speak_simple_text(self, text):
self.stop()
while text.length > 32766:
self.create_utterance(text[:32766])
text = text[32766:]
if text:
self.create_utterance(text)
if self.queue.length:
window.speechSynthesis.speak(self.queue[0])
def speak_marked_text(self, text_segments, onevent):
self.stop()
self.onevent = onevent
buf = v'[]'
size = 0
for x in text_segments:
if jstype(x) is 'number':
# Currently the sad sack brosers dont support SSML
# https://github.com/WICG/speech-api/issues/37
# buf.push('<mark name="' + x + '"/>')
buf.push('')
else:
buf.push(escape_for_xml(x))
size += buf[-1].length
if size > 24000:
buf = v'[]'
size = 0
self.create_utterance(buf.join(''), True)
text = buf.join('')
if text.length:
self.create_utterance(text)
if self.queue.length:
window.speechSynthesis.speak(self.queue[0])

View File

@ -3,19 +3,20 @@
# globals: __RENDER_VERSION__
from __python__ import hash_literals
import traceback
from elementmaker import E
from gettext import gettext as _
import traceback
from ajax import ajax, ajax_send
from book_list.constants import read_book_container_id
from book_list.library_data import current_library_id, library_data
from book_list.router import home, push_state, read_book_mode, update_window_title
from book_list.ui import show_panel
from dom import clear
from gettext import gettext as _
from modals import create_simple_dialog_markup, error_dialog
from read_book.db import get_db
from read_book.globals import ui_operations
from read_book.tts import Client
from read_book.view import View
from utils import debounce, full_screen_element, human_readable, request_full_screen
from widgets import create_button
@ -53,6 +54,7 @@ class ReadUI:
id=self.display_id, style='display:none',
))
self.view = View(container.lastChild)
self.tts_client = Client()
self.windows_to_listen_for_messages_from = []
window.addEventListener('resize', debounce(self.on_resize.bind(self), 250))
window.addEventListener('message', self.message_from_other_window.bind(self))
@ -83,6 +85,8 @@ class ReadUI:
ui_operations.close_book = self.close_book.bind(self)
ui_operations.copy_image = self.copy_image.bind(self)
ui_operations.view_image = self.view_image.bind(self)
ui_operations.speak_simple_text = self.speak_simple_text.bind(self)
ui_operations.tts = self.tts.bind(self)
ui_operations.open_url = def(url):
window.open(url, '_blank')
ui_operations.copy_selection = def(text, html):
@ -115,12 +119,6 @@ class ReadUI:
window.navigator.clipboard.writeText(text or '').then(def (): pass;, def():
error_dialog(_('Could not copy to clipboard'), _('No permission to write to clipboard'))
)
ui_operations.speak_simple_text = def (text):
if not window.speechSynthesis:
return error_dialog(_('No speech support'), _(
'Your browser does not have support for Text-to-Speech'))
ut = new SpeechSynthesisUtterance(text) # noqa
window.speechSynthesis.speak(ut)
def on_resize(self):
self.view.on_resize()
@ -610,3 +608,23 @@ class ReadUI:
w, callback = x
if w is msg.source:
callback(msg)
def check_for_speech_capability(self):
if not window.speechSynthesis:
error_dialog(_('No speech support'), _(
'Your browser does not have support for Text-to-Speech'))
return False
return True
def speak_simple_text(self, text):
if not self.check_for_speech_capability():
return
self.tts_client.speak_simple_text(text)
def tts(self, event, data):
if not self.check_for_speech_capability():
return
if event is 'play':
self.tts_client.speak_marked_text(data.marked_text, self.view.read_aloud.handle_tts_event)
else:
getattr(self.tts_client, event)()