mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Get basic TTS playback working in browser viewer
Sadly, there is no support for marking words, so highlighting the currently spoken word will require gymnastics.
This commit is contained in:
parent
5c9e597fb3
commit
ce0a57b69e
@ -173,6 +173,8 @@ class ReadAloud:
|
||||
self.send_message('mark', num=data)
|
||||
elif which is 'begin':
|
||||
self.state = PLAYING
|
||||
elif which is 'end':
|
||||
pass
|
||||
|
||||
def send_message(self, type, **kw):
|
||||
self.view.iframe_wrapper.send_message('tts', type=type, **kw)
|
||||
|
120
src/pyj/read_book/tts.pyj
Normal file
120
src/pyj/read_book/tts.pyj
Normal file
@ -0,0 +1,120 @@
|
||||
# vim:fileencoding=utf-8
|
||||
# License: GPL v3 Copyright: 2020, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
from __python__ import bound_methods, hash_literals
|
||||
|
||||
from gettext import gettext as _
|
||||
from modals import error_dialog
|
||||
|
||||
|
||||
def escaper():
|
||||
doc = document.implementation.createDocument("", "", None)
|
||||
el = doc.createElement("temp")
|
||||
el.textContent = "temp"
|
||||
el = el.firstChild
|
||||
ser = new XMLSerializer() # noqa
|
||||
return def(text):
|
||||
el.nodeValue = text
|
||||
return ser.serializeToString(el)
|
||||
escape_for_xml = escaper()
|
||||
|
||||
|
||||
class Client:
|
||||
|
||||
def __init__(self):
|
||||
self.status = {'synthesizing': False, 'paused': False}
|
||||
self.queue = v'[]'
|
||||
self.last_reached_mark = None
|
||||
self.onevent = def():
|
||||
pass
|
||||
|
||||
def create_utterance(self, text_or_ssml, wrap_in_ssml):
|
||||
if wrap_in_ssml:
|
||||
text_or_ssml = (
|
||||
'<?xml version="1.0"?>\n<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis"><s>' +
|
||||
text_or_ssml +
|
||||
'</s></speak>')
|
||||
ut = new window.SpeechSynthesisUtterance(text_or_ssml)
|
||||
ut.onstart = self.utterance_started
|
||||
ut.onpause = self.utterance_paused
|
||||
ut.onend = self.utterance_ended
|
||||
ut.onerror = self.utterance_failed
|
||||
ut.onmark = self.utterance_mark_reached
|
||||
ut.onresume = self.utterance_resumed
|
||||
self.queue.push(ut)
|
||||
return ut
|
||||
|
||||
def utterance_started(self, event):
|
||||
self.status = {'synthesizing': True, 'paused': False}
|
||||
self.onevent('begin')
|
||||
|
||||
def utterance_paused(self, event):
|
||||
self.status = {'synthesizing': True, 'paused': True}
|
||||
self.onevent('pause')
|
||||
|
||||
def utterance_ended(self, event):
|
||||
self.status = {'synthesizing': False, 'paused': False}
|
||||
self.queue.splice(0, 1)
|
||||
if self.queue.length:
|
||||
window.speechSynthesis.speak(self.queue[0])
|
||||
else:
|
||||
self.onevent('end')
|
||||
|
||||
def utterance_failed(self, event):
|
||||
self.status = {'synthesizing': False, 'paused': False}
|
||||
self.queue = v'[]'
|
||||
error_dialog(_('Speaking failed'), _(
|
||||
'An error has occurred with speech synthesis: ' + event.error))
|
||||
self.onevent('cancel')
|
||||
|
||||
def utterance_mark_reached(self, event):
|
||||
self.last_reached_mark = event.name
|
||||
self.onevent('mark', int(event.name))
|
||||
|
||||
def utterance_resumed(self, event):
|
||||
self.status = {'synthesizing': True, 'paused': False}
|
||||
self.onevent('resume')
|
||||
|
||||
def pause(self):
|
||||
window.speechSynthesis.pause()
|
||||
|
||||
def resume(self):
|
||||
window.speechSynthesis.resume()
|
||||
|
||||
def stop(self):
|
||||
window.speechSynthesis.cancel()
|
||||
self.queue = v'[]'
|
||||
self.status = {'synthesizing': False, 'paused': False}
|
||||
|
||||
def speak_simple_text(self, text):
|
||||
self.stop()
|
||||
while text.length > 32766:
|
||||
self.create_utterance(text[:32766])
|
||||
text = text[32766:]
|
||||
if text:
|
||||
self.create_utterance(text)
|
||||
if self.queue.length:
|
||||
window.speechSynthesis.speak(self.queue[0])
|
||||
|
||||
def speak_marked_text(self, text_segments, onevent):
|
||||
self.stop()
|
||||
self.onevent = onevent
|
||||
buf = v'[]'
|
||||
size = 0
|
||||
for x in text_segments:
|
||||
if jstype(x) is 'number':
|
||||
# Currently the sad sack brosers dont support SSML
|
||||
# https://github.com/WICG/speech-api/issues/37
|
||||
# buf.push('<mark name="' + x + '"/>')
|
||||
buf.push('')
|
||||
else:
|
||||
buf.push(escape_for_xml(x))
|
||||
size += buf[-1].length
|
||||
if size > 24000:
|
||||
buf = v'[]'
|
||||
size = 0
|
||||
self.create_utterance(buf.join(''), True)
|
||||
text = buf.join('')
|
||||
if text.length:
|
||||
self.create_utterance(text)
|
||||
if self.queue.length:
|
||||
window.speechSynthesis.speak(self.queue[0])
|
@ -3,19 +3,20 @@
|
||||
# globals: __RENDER_VERSION__
|
||||
from __python__ import hash_literals
|
||||
|
||||
import traceback
|
||||
from elementmaker import E
|
||||
from gettext import gettext as _
|
||||
|
||||
import traceback
|
||||
from ajax import ajax, ajax_send
|
||||
from book_list.constants import read_book_container_id
|
||||
from book_list.library_data import current_library_id, library_data
|
||||
from book_list.router import home, push_state, read_book_mode, update_window_title
|
||||
from book_list.ui import show_panel
|
||||
from dom import clear
|
||||
from gettext import gettext as _
|
||||
from modals import create_simple_dialog_markup, error_dialog
|
||||
from read_book.db import get_db
|
||||
from read_book.globals import ui_operations
|
||||
from read_book.tts import Client
|
||||
from read_book.view import View
|
||||
from utils import debounce, full_screen_element, human_readable, request_full_screen
|
||||
from widgets import create_button
|
||||
@ -53,6 +54,7 @@ class ReadUI:
|
||||
id=self.display_id, style='display:none',
|
||||
))
|
||||
self.view = View(container.lastChild)
|
||||
self.tts_client = Client()
|
||||
self.windows_to_listen_for_messages_from = []
|
||||
window.addEventListener('resize', debounce(self.on_resize.bind(self), 250))
|
||||
window.addEventListener('message', self.message_from_other_window.bind(self))
|
||||
@ -83,6 +85,8 @@ class ReadUI:
|
||||
ui_operations.close_book = self.close_book.bind(self)
|
||||
ui_operations.copy_image = self.copy_image.bind(self)
|
||||
ui_operations.view_image = self.view_image.bind(self)
|
||||
ui_operations.speak_simple_text = self.speak_simple_text.bind(self)
|
||||
ui_operations.tts = self.tts.bind(self)
|
||||
ui_operations.open_url = def(url):
|
||||
window.open(url, '_blank')
|
||||
ui_operations.copy_selection = def(text, html):
|
||||
@ -115,12 +119,6 @@ class ReadUI:
|
||||
window.navigator.clipboard.writeText(text or '').then(def (): pass;, def():
|
||||
error_dialog(_('Could not copy to clipboard'), _('No permission to write to clipboard'))
|
||||
)
|
||||
ui_operations.speak_simple_text = def (text):
|
||||
if not window.speechSynthesis:
|
||||
return error_dialog(_('No speech support'), _(
|
||||
'Your browser does not have support for Text-to-Speech'))
|
||||
ut = new SpeechSynthesisUtterance(text) # noqa
|
||||
window.speechSynthesis.speak(ut)
|
||||
|
||||
def on_resize(self):
|
||||
self.view.on_resize()
|
||||
@ -610,3 +608,23 @@ class ReadUI:
|
||||
w, callback = x
|
||||
if w is msg.source:
|
||||
callback(msg)
|
||||
|
||||
def check_for_speech_capability(self):
|
||||
if not window.speechSynthesis:
|
||||
error_dialog(_('No speech support'), _(
|
||||
'Your browser does not have support for Text-to-Speech'))
|
||||
return False
|
||||
return True
|
||||
|
||||
def speak_simple_text(self, text):
|
||||
if not self.check_for_speech_capability():
|
||||
return
|
||||
self.tts_client.speak_simple_text(text)
|
||||
|
||||
def tts(self, event, data):
|
||||
if not self.check_for_speech_capability():
|
||||
return
|
||||
if event is 'play':
|
||||
self.tts_client.speak_marked_text(data.marked_text, self.view.read_aloud.handle_tts_event)
|
||||
else:
|
||||
getattr(self.tts_client, event)()
|
||||
|
Loading…
x
Reference in New Issue
Block a user