mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
More work on speech backends
This commit is contained in:
parent
10971de4b7
commit
05fe8d8ef5
@ -6,11 +6,10 @@
|
|||||||
import re
|
import re
|
||||||
from itertools import count
|
from itertools import count
|
||||||
from PyQt5.Qt import (
|
from PyQt5.Qt import (
|
||||||
QDialogButtonBox, QLabel, QMainWindow, Qt, QVBoxLayout, QWidget, pyqtSignal
|
QDialogButtonBox, QLabel, QMainWindow, Qt, QTimer, QVBoxLayout, QWidget,
|
||||||
|
pyqtSignal
|
||||||
)
|
)
|
||||||
|
|
||||||
from calibre import prepare_string_for_xml
|
|
||||||
from calibre.constants import iswindows
|
|
||||||
from calibre.gui2 import Application
|
from calibre.gui2 import Application
|
||||||
|
|
||||||
from .common import EventType
|
from .common import EventType
|
||||||
@ -23,25 +22,22 @@ def add_markup(text):
|
|||||||
counter = count()
|
counter = count()
|
||||||
pos_map = {}
|
pos_map = {}
|
||||||
last = None
|
last = None
|
||||||
if iswindows:
|
bm = Client.mark_template
|
||||||
bm = '<bookmark mark="{}"/>'
|
|
||||||
else:
|
|
||||||
bm = '<mark name="{}"/>'
|
|
||||||
for m in re.finditer(r'\w+', text):
|
for m in re.finditer(r'\w+', text):
|
||||||
start, end = m.start(), m.end()
|
start, end = m.start(), m.end()
|
||||||
if first:
|
if first:
|
||||||
first = False
|
first = False
|
||||||
if start:
|
if start:
|
||||||
buf.append(prepare_string_for_xml(text[:start]))
|
buf.append(Client.escape_marked_text(text[:start]))
|
||||||
num = next(counter)
|
num = next(counter)
|
||||||
buf.append(bm.format(num))
|
buf.append(bm.format(num))
|
||||||
pos_map[num] = start, end
|
pos_map[num] = start, end
|
||||||
buf.append(prepare_string_for_xml(m.group()))
|
buf.append(Client.escape_marked_text(m.group()))
|
||||||
last = end
|
last = end
|
||||||
if last is None:
|
if last is None:
|
||||||
buf.append(prepare_string_for_xml(text))
|
buf.append(Client.escape_marked_text(text))
|
||||||
else:
|
else:
|
||||||
buf.append(prepare_string_for_xml(text[last:]))
|
buf.append(Client.escape_marked_text(text[last:]))
|
||||||
return ''.join(buf), pos_map
|
return ''.join(buf), pos_map
|
||||||
|
|
||||||
|
|
||||||
@ -135,5 +131,25 @@ def main():
|
|||||||
tts.tts.shutdown()
|
tts.tts.shutdown()
|
||||||
|
|
||||||
|
|
||||||
|
def headless():
|
||||||
|
app = Application([])
|
||||||
|
c = Client()
|
||||||
|
text = '[[sync 0x123456]]very [[sync 0x80]]good [[sync 0x81]]indeed'
|
||||||
|
|
||||||
|
def callback():
|
||||||
|
for ev in c.get_events():
|
||||||
|
if ev.type is EventType.mark:
|
||||||
|
print('mark:', hex(ev.data))
|
||||||
|
if ev.type in (EventType.end, EventType.cancel):
|
||||||
|
print(ev.type)
|
||||||
|
app.quit()
|
||||||
|
|
||||||
|
def run():
|
||||||
|
c.speak_marked_text(text, callback)
|
||||||
|
QTimer.singleShot(10, run)
|
||||||
|
QTimer.singleShot(5000, app.quit)
|
||||||
|
app.exec_()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
@ -2,15 +2,24 @@
|
|||||||
# vim:fileencoding=utf-8
|
# vim:fileencoding=utf-8
|
||||||
# License: GPL v3 Copyright: 2020, Kovid Goyal <kovid at kovidgoyal.net>
|
# License: GPL v3 Copyright: 2020, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
|
||||||
|
from calibre import prepare_string_for_xml
|
||||||
|
|
||||||
from .common import Event, EventType
|
from .common import Event, EventType
|
||||||
from .errors import TTSSystemUnavailable
|
from .errors import TTSSystemUnavailable
|
||||||
|
|
||||||
|
|
||||||
class Client:
|
class Client:
|
||||||
|
|
||||||
|
mark_template = '<mark name="{}"/>'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def escape_marked_text(cls, text):
|
||||||
|
return prepare_string_for_xml(text)
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.create_ssip_client()
|
self.create_ssip_client()
|
||||||
self.pending_events = []
|
self.pending_events = []
|
||||||
|
self.status = {'synthesizing': False, 'paused': False}
|
||||||
|
|
||||||
def create_ssip_client(self):
|
def create_ssip_client(self):
|
||||||
from speechd.client import SpawnError, SSIPClient
|
from speechd.client import SpawnError, SSIPClient
|
||||||
@ -37,12 +46,27 @@ class Client:
|
|||||||
|
|
||||||
def speak_simple_text(self, text):
|
def speak_simple_text(self, text):
|
||||||
self.set_use_ssml(False)
|
self.set_use_ssml(False)
|
||||||
self.ssip_client.speak(text)
|
self.pending_events = []
|
||||||
|
self.ssip_client.speak(text, self.update_status)
|
||||||
|
|
||||||
|
def update_status(self, callback_type, index_mark=None):
|
||||||
|
from speechd.client import CallbackType
|
||||||
|
if callback_type is CallbackType.BEGIN:
|
||||||
|
self.status = {'synthesizing': True, 'paused': False}
|
||||||
|
elif callback_type is CallbackType.END:
|
||||||
|
self.status = {'synthesizing': False, 'paused': False}
|
||||||
|
elif callback_type is CallbackType.CANCEL:
|
||||||
|
self.status = {'synthesizing': False, 'paused': False}
|
||||||
|
elif callback_type is CallbackType.PAUSE:
|
||||||
|
self.status = {'synthesizing': True, 'paused': True}
|
||||||
|
elif callback_type is CallbackType.RESUME:
|
||||||
|
self.status = {'synthesizing': True, 'paused': False}
|
||||||
|
|
||||||
def speak_marked_text(self, text, callback):
|
def speak_marked_text(self, text, callback):
|
||||||
from speechd.client import CallbackType
|
from speechd.client import CallbackType
|
||||||
|
|
||||||
def callback_wrapper(callback_type, index_mark=None):
|
def callback_wrapper(callback_type, index_mark=None):
|
||||||
|
self.update_status(callback_type, index_mark)
|
||||||
if callback_type is CallbackType.INDEX_MARK:
|
if callback_type is CallbackType.INDEX_MARK:
|
||||||
event = Event(EventType.mark, index_mark)
|
event = Event(EventType.mark, index_mark)
|
||||||
elif callback_type is CallbackType.BEGIN:
|
elif callback_type is CallbackType.BEGIN:
|
||||||
@ -61,6 +85,7 @@ class Client:
|
|||||||
callback()
|
callback()
|
||||||
|
|
||||||
self.set_use_ssml(True)
|
self.set_use_ssml(True)
|
||||||
|
self.pending_events = []
|
||||||
self.ssip_client.speak(text, callback=callback_wrapper)
|
self.ssip_client.speak(text, callback=callback_wrapper)
|
||||||
|
|
||||||
def get_events(self):
|
def get_events(self):
|
||||||
|
@ -2,15 +2,57 @@
|
|||||||
# vim:fileencoding=utf-8
|
# vim:fileencoding=utf-8
|
||||||
# License: GPL v3 Copyright: 2020, Kovid Goyal <kovid at kovidgoyal.net>
|
# License: GPL v3 Copyright: 2020, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
|
||||||
|
from .common import Event, EventType
|
||||||
|
|
||||||
|
|
||||||
class Client:
|
class Client:
|
||||||
|
|
||||||
|
mark_template = '[[sync 0x{:x}]]'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def escape_marked_text(cls, text):
|
||||||
|
return text.replace('[[', ' [ [ ').replace(']]', ' ] ] ')
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
from calibre_extensions.cocoa import NSSpeechSynthesizer
|
from calibre_extensions.cocoa import NSSpeechSynthesizer
|
||||||
self.nsss = NSSpeechSynthesizer()
|
self.nsss = NSSpeechSynthesizer(self.handle_message)
|
||||||
|
self.current_callback = None
|
||||||
|
self.pending_events = []
|
||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
self.nsss = None
|
self.nsss = None
|
||||||
|
shutdown = __del__
|
||||||
|
|
||||||
|
def handle_message(self, message_type, data):
|
||||||
|
from calibre_extensions.cocoa import MARK, END
|
||||||
|
if self.current_callback is not None:
|
||||||
|
if message_type == MARK:
|
||||||
|
event = Event(EventType.mark, data)
|
||||||
|
elif message_type == END:
|
||||||
|
event = Event(EventType.end if data else EventType.cancel)
|
||||||
|
else:
|
||||||
|
return
|
||||||
|
self.pending_events.append(event)
|
||||||
|
self.current_callback()
|
||||||
|
|
||||||
def speak_simple_text(self, text):
|
def speak_simple_text(self, text):
|
||||||
|
self.current_callback = None
|
||||||
|
self.pending_events = []
|
||||||
|
self.nsss.speak(text.replace('[[', '[').replace(']]', ']'))
|
||||||
|
|
||||||
|
def speak_marked_text(self, text, callback):
|
||||||
|
self.current_callback = callback
|
||||||
|
self.pending_events = []
|
||||||
self.nsss.speak(text)
|
self.nsss.speak(text)
|
||||||
|
|
||||||
|
def get_events(self):
|
||||||
|
events = self.pending_events
|
||||||
|
self.pending_events = []
|
||||||
|
return events
|
||||||
|
|
||||||
|
@property
|
||||||
|
def status(self):
|
||||||
|
ans = self.nsss.status()
|
||||||
|
ans['synthesizing'] = ans.get('synthesizing', False)
|
||||||
|
ans['paused'] = ans.get('paused', False)
|
||||||
|
return ans
|
||||||
|
@ -11,30 +11,77 @@
|
|||||||
typedef struct {
|
typedef struct {
|
||||||
PyObject_HEAD
|
PyObject_HEAD
|
||||||
NSSpeechSynthesizer *nsss;
|
NSSpeechSynthesizer *nsss;
|
||||||
|
PyObject *callback;
|
||||||
} NSSS;
|
} NSSS;
|
||||||
|
|
||||||
|
typedef enum { MARK, END } MessageType;
|
||||||
|
|
||||||
static PyTypeObject NSSSType = {
|
static PyTypeObject NSSSType = {
|
||||||
PyVarObject_HEAD_INIT(NULL, 0)
|
PyVarObject_HEAD_INIT(NULL, 0)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static void
|
||||||
|
dispatch_message(NSSS *self, MessageType which, unsigned long val) {
|
||||||
|
PyGILState_STATE state = PyGILState_Ensure();
|
||||||
|
PyObject *ret = PyObject_CallFunction(self->callback, "ik", which, val);
|
||||||
|
if (ret) Py_DECREF(ret);
|
||||||
|
else PyErr_Print();
|
||||||
|
PyGILState_Release(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
@interface SynthesizerDelegate : NSObject <NSSpeechSynthesizerDelegate> {
|
||||||
|
NSSS *parent;
|
||||||
|
}
|
||||||
|
- (id)initWithNSSS:(NSSS *)x;
|
||||||
|
@end
|
||||||
|
|
||||||
|
@implementation SynthesizerDelegate
|
||||||
|
|
||||||
|
- (id)initWithNSSS:(NSSS *)x {
|
||||||
|
self = [super init];
|
||||||
|
if (self) parent = x;
|
||||||
|
return self;
|
||||||
|
}
|
||||||
|
|
||||||
|
- (void)speechSynthesizer:(NSSpeechSynthesizer *)sender didFinishSpeaking:(BOOL)success {
|
||||||
|
dispatch_message(parent, END, success);
|
||||||
|
}
|
||||||
|
|
||||||
|
- (void)speechSynthesizer:(NSSpeechSynthesizer *)sender didEncounterSyncMessage:(NSString *)message {
|
||||||
|
NSError *err = nil;
|
||||||
|
NSNumber *syncProp = (NSNumber*) [sender objectForProperty: NSSpeechRecentSyncProperty error: &err];
|
||||||
|
if (syncProp && !err) dispatch_message(parent, MARK, syncProp.unsignedLongValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
@end
|
||||||
// }}}
|
// }}}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
NSSS_new(PyTypeObject *type, PyObject *args, PyObject *kwds) {
|
NSSS_new(PyTypeObject *type, PyObject *args, PyObject *kwds) {
|
||||||
|
PyObject *callback;
|
||||||
|
if (!PyArg_ParseTuple(args, "O", &callback)) return NULL;
|
||||||
|
if (!PyCallable_Check(callback)) { PyErr_SetString(PyExc_TypeError, "callback must be a callable"); return NULL; }
|
||||||
NSSS *self = (NSSS *) type->tp_alloc(type, 0);
|
NSSS *self = (NSSS *) type->tp_alloc(type, 0);
|
||||||
if (self) {
|
if (self) {
|
||||||
|
self->callback = callback;
|
||||||
|
Py_INCREF(callback);
|
||||||
self->nsss = [[NSSpeechSynthesizer alloc] initWithVoice:nil];
|
self->nsss = [[NSSpeechSynthesizer alloc] initWithVoice:nil];
|
||||||
if (self->nsss) {
|
if (self->nsss) {
|
||||||
|
self->nsss.delegate = [[SynthesizerDelegate alloc] initWithNSSS:self];
|
||||||
} else PyErr_NoMemory();
|
} else return PyErr_NoMemory();
|
||||||
}
|
}
|
||||||
return (PyObject*)self;
|
return (PyObject*)self;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
NSSS_dealloc(NSSS *self) {
|
NSSS_dealloc(NSSS *self) {
|
||||||
if (self->nsss) [self->nsss release];
|
if (self->nsss) {
|
||||||
|
if (self->nsss.delegate) [self->nsss.delegate release];
|
||||||
|
self->nsss.delegate = nil;
|
||||||
|
[self->nsss release];
|
||||||
|
}
|
||||||
self->nsss = nil;
|
self->nsss = nil;
|
||||||
|
Py_CLEAR(self->callback);
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
@ -73,6 +120,20 @@ NSSS_get_all_voices(NSSS *self, PyObject *args) {
|
|||||||
return ans;
|
return ans;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static PyObject*
|
||||||
|
NSSS_set_command_delimiters(NSSS *self, PyObject *args) {
|
||||||
|
// this function doesn't actually work
|
||||||
|
// https://openradar.appspot.com/6524554
|
||||||
|
const char *left, *right;
|
||||||
|
if (!PyArg_ParseTuple(args, "ss", &left, &right)) return NULL;
|
||||||
|
NSError *err = nil;
|
||||||
|
[self->nsss setObject:@{NSSpeechCommandPrefix:@(left), NSSpeechCommandSuffix:@(right)} forProperty:NSSpeechCommandDelimiterProperty error:&err];
|
||||||
|
if (err) {
|
||||||
|
PyErr_SetString(PyExc_OSError, [[NSString stringWithFormat:@"Failed to set delimiters: %@", err] UTF8String]);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
Py_RETURN_NONE;
|
||||||
|
}
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
NSSS_get_current_voice(NSSS *self, PyObject *args) {
|
NSSS_get_current_voice(NSSS *self, PyObject *args) {
|
||||||
@ -144,10 +205,33 @@ NSSS_start_saving_to_path(NSSS *self, PyObject *args) {
|
|||||||
Py_RETURN_FALSE;
|
Py_RETURN_FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static PyObject*
|
||||||
|
NSSS_status(NSSS *self, PyObject *args) {
|
||||||
|
NSError *err = nil;
|
||||||
|
NSDictionary *status = [self->nsss objectForProperty:NSSpeechStatusProperty error:&err];
|
||||||
|
if (err) {
|
||||||
|
PyErr_SetString(PyExc_OSError, [[err localizedDescription] UTF8String]);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
PyObject *ans = PyDict_New();
|
||||||
|
if (ans) {
|
||||||
|
NSNumber *result = [status objectForKey:NSSpeechStatusOutputBusy];
|
||||||
|
if (result) {
|
||||||
|
if (PyDict_SetItemString(ans, "synthesizing", [result boolValue] ? Py_True : Py_False) != 0) { Py_CLEAR(ans); return NULL; }
|
||||||
|
}
|
||||||
|
result = [status objectForKey:NSSpeechStatusOutputPaused];
|
||||||
|
if (result) {
|
||||||
|
if (PyDict_SetItemString(ans, "paused", [result boolValue] ? Py_True : Py_False) != 0) { Py_CLEAR(ans); return NULL; }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ans;
|
||||||
|
}
|
||||||
|
|
||||||
// Boilerplate {{{
|
// Boilerplate {{{
|
||||||
#define M(name, args) { #name, (PyCFunction)NSSS_##name, args, ""}
|
#define M(name, args) { #name, (PyCFunction)NSSS_##name, args, ""}
|
||||||
static PyMethodDef NSSS_methods[] = {
|
static PyMethodDef NSSS_methods[] = {
|
||||||
M(get_all_voices, METH_NOARGS),
|
M(get_all_voices, METH_NOARGS),
|
||||||
|
M(status, METH_NOARGS),
|
||||||
M(speak, METH_VARARGS),
|
M(speak, METH_VARARGS),
|
||||||
M(start_saving_to_path, METH_VARARGS),
|
M(start_saving_to_path, METH_VARARGS),
|
||||||
M(speaking, METH_NOARGS),
|
M(speaking, METH_NOARGS),
|
||||||
@ -159,6 +243,7 @@ static PyMethodDef NSSS_methods[] = {
|
|||||||
M(set_current_volume, METH_VARARGS),
|
M(set_current_volume, METH_VARARGS),
|
||||||
M(get_current_rate, METH_NOARGS),
|
M(get_current_rate, METH_NOARGS),
|
||||||
M(set_current_rate, METH_VARARGS),
|
M(set_current_rate, METH_VARARGS),
|
||||||
|
M(set_command_delimiters, METH_VARARGS),
|
||||||
{NULL, NULL, 0, NULL}
|
{NULL, NULL, 0, NULL}
|
||||||
};
|
};
|
||||||
#undef M
|
#undef M
|
||||||
@ -180,6 +265,8 @@ nsss_init_module(PyObject *module) {
|
|||||||
Py_DECREF(&NSSSType);
|
Py_DECREF(&NSSSType);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
PyModule_AddIntMacro(module, MARK);
|
||||||
|
PyModule_AddIntMacro(module, END);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -4,11 +4,20 @@
|
|||||||
|
|
||||||
|
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
|
|
||||||
|
from calibre import prepare_string_for_xml
|
||||||
|
|
||||||
from .common import Event, EventType
|
from .common import Event, EventType
|
||||||
|
|
||||||
|
|
||||||
class Client:
|
class Client:
|
||||||
|
|
||||||
|
mark_template = '<bookmark mark="{}"/>'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def escape_marked_text(cls, text):
|
||||||
|
return prepare_string_for_xml(text)
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
from calibre.utils.windows.winsapi import ISpVoice
|
from calibre.utils.windows.winsapi import ISpVoice
|
||||||
self.sp_voice = ISpVoice()
|
self.sp_voice = ISpVoice()
|
||||||
@ -33,7 +42,9 @@ class Client:
|
|||||||
c()
|
c()
|
||||||
|
|
||||||
def get_events(self):
|
def get_events(self):
|
||||||
from calibre_extensions.winsapi import SPEI_TTS_BOOKMARK, SPEI_START_INPUT_STREAM, SPEI_END_INPUT_STREAM
|
from calibre_extensions.winsapi import (
|
||||||
|
SPEI_END_INPUT_STREAM, SPEI_START_INPUT_STREAM, SPEI_TTS_BOOKMARK
|
||||||
|
)
|
||||||
ans = []
|
ans = []
|
||||||
for (stream_number, event_type, event_data) in self.sp_voice.get_events():
|
for (stream_number, event_type, event_data) in self.sp_voice.get_events():
|
||||||
if stream_number == self.current_stream_number:
|
if stream_number == self.current_stream_number:
|
||||||
@ -49,11 +60,15 @@ class Client:
|
|||||||
return ans
|
return ans
|
||||||
|
|
||||||
def speak_simple_text(self, text):
|
def speak_simple_text(self, text):
|
||||||
from calibre_extensions.winsapi import SPF_ASYNC, SPF_PURGEBEFORESPEAK, SPF_IS_NOT_XML
|
from calibre_extensions.winsapi import (
|
||||||
|
SPF_ASYNC, SPF_IS_NOT_XML, SPF_PURGEBEFORESPEAK
|
||||||
|
)
|
||||||
self.current_callback = None
|
self.current_callback = None
|
||||||
self.current_stream_number = self.sp_voice.speak(text, SPF_ASYNC | SPF_PURGEBEFORESPEAK | SPF_IS_NOT_XML)
|
self.current_stream_number = self.sp_voice.speak(text, SPF_ASYNC | SPF_PURGEBEFORESPEAK | SPF_IS_NOT_XML)
|
||||||
|
|
||||||
def speak_marked_text(self, text, callback):
|
def speak_marked_text(self, text, callback):
|
||||||
from calibre_extensions.winsapi import SPF_ASYNC, SPF_PURGEBEFORESPEAK, SPF_IS_XML
|
from calibre_extensions.winsapi import (
|
||||||
|
SPF_ASYNC, SPF_IS_XML, SPF_PURGEBEFORESPEAK
|
||||||
|
)
|
||||||
self.current_callback = callback
|
self.current_callback = callback
|
||||||
self.current_stream_number = self.sp_voice.speak(text, SPF_ASYNC | SPF_PURGEBEFORESPEAK | SPF_IS_XML, True)
|
self.current_stream_number = self.sp_voice.speak(text, SPF_ASYNC | SPF_PURGEBEFORESPEAK | SPF_IS_XML, True)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user