Port config to winspeech

This commit is contained in:
Kovid Goyal 2023-02-02 11:22:46 +05:30
parent f64b9e3e2c
commit c8e9f33736
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
4 changed files with 321 additions and 18 deletions

View File

@ -52,8 +52,16 @@ class Client:
self.synthesizing = False
self.settings = settings or {}
self.clear_chunks()
self.default_system_audio_device = self.backend.get_audio_device().device
self.default_system_voice = self.backend.default_voice().voice
self.apply_settings()
def get_all_voices(self):
return self.backend.all_voices().voices
def get_all_audio_devices(self):
return self.backend.all_audio_devices().devices
def __del__(self):
if self.backend is not None:
self.backend.shutdown()
@ -63,6 +71,9 @@ class Client:
def dispatch_msg(self, msg):
self.dispatch_on_main_thread(partial(self.handle_event, msg))
def speak_current_chunk(self):
self.backend.speak(self.current_chunks[self.current_chunk_idx], is_cued=True)
def handle_event(self, x):
if isinstance(x, MarkReached) and self.current_chunks:
self.last_mark = x.id
@ -74,7 +85,7 @@ class Client:
self.callback_ignoring_errors(Event(EventType.end))
else:
self.current_chunk_idx += 1
self.backend.speak(self.current_chunks[self.current_chunk_idx], is_cued=True)
self.speak_current_chunk()
elif x.state is MediaState.failed:
self.clear_chunks()
self.callback_ignoring_errors(Event(EventType.cancel))
@ -82,7 +93,8 @@ class Client:
e.display_to_user = True
raise e
elif x.state is MediaState.opened:
self.callback_ignoring_errors(Event(EventType.begin))
self.callback_ignoring_errors(Event(EventType.resume if self.next_start_is_resume else EventType.begin))
self.next_start_is_resume = False
elif isinstance(x, Error):
raise x.as_exception(check_for_no_audio_devices=True)
else:
@ -98,12 +110,11 @@ class Client:
self.clear_chunks()
self.current_callback = callback
self.current_chunks = tuple(split_into_chunks(text, self.chunk_size))
self.current_chunk_idx = 0
self.current_chunk_idx = -100
if self.current_chunks:
self.backend.speak(self.current_chunks[self.current_chunk_idx], is_cued=True)
self.current_chunk_idx = 0
self.speak_current_chunk()
self.synthesizing = True
if self.current_callback is not None:
self.current_callback(Event(EventType.begin))
def callback_ignoring_errors(self, ev):
if self.current_callback is not None:
@ -115,8 +126,9 @@ class Client:
def clear_chunks(self):
self.synthesizing = False
self.next_start_is_resume = False
self.current_chunk_idx = -100
self.current_chunks = []
self.current_chunks = ()
self.last_mark = -1
def stop(self):
@ -138,12 +150,52 @@ class Client:
self.current_callback(Event(EventType.resume))
def apply_settings(self, new_settings=None):
pass
if self.synthesizing:
self.stop()
if new_settings is not None:
self.settings = new_settings
try:
self.backend.set_rate(self.settings.get('rate', self.default_system_rate))
except OSError:
self.settings.pop('rate', None)
try:
self.backend.set_voice(self.settings.get('voice'), self.default_system_voice)
except OSError:
self.settings.pop('voice', None)
try:
self.backend.set_audio_device(self.settings.get('sound_output'), self.default_system_audio_device)
except OSError:
self.settings.pop('sound_output', None)
def config_widget(self, backend_settings, parent):
from calibre.gui2.tts.windows_config import Widget
return Widget(self, backend_settings, parent)
def chunks_from_last_mark(self):
for i, chunk in enumerate(self.current_chunks):
for ci, x in enumerate(chunk):
if x == self.last_mark:
chunks = self.current_chunks[i:]
chunk = chunk[ci + 1:]
if chunk:
chunks = (chunk,) + chunks[1:]
else:
chunks = chunks[1:]
return chunks
return ()
def resume_after_configure(self):
if not self.synthesizing:
return
self.current_chunk_idx = -100
self.last_mark = -1
self.current_chunks = self.chunks_from_last_mark()
self.next_start_is_resume = True
self.synthesizing = bool(self.current_chunks)
if self.current_chunks:
self.current_chunk_idx = 0
self.speak_current_chunk()
def change_rate(self, steps=1):
rate = current_rate = self.settings.get('rate', self.default_system_rate)
if rate < 1:

View File

@ -0,0 +1,196 @@
#!/usr/bin/env python
# License: GPL v3 Copyright: 2020, Kovid Goyal <kovid at kovidgoyal.net>
from contextlib import suppress
from qt.core import (
QAbstractItemView, QAbstractTableModel, QByteArray, QComboBox, QFontMetrics,
QFormLayout, QItemSelectionModel, QSlider, QSortFilterProxyModel, Qt, QTableView,
QWidget
)
from calibre.gui2.widgets import BusyCursor
class VoicesModel(QAbstractTableModel):
system_default_voice = '__default__'
def __init__(self, voice_data, parent=None):
super().__init__(parent)
self.voice_data = voice_data
self.current_voices = tuple((x.display_name, x.language, x.gender, x.id) for x in voice_data)
self.column_headers = _('Name'), _('Language'), _('Gender')
def rowCount(self, parent=None):
return len(self.current_voices) + 1
def columnCount(self, parent=None):
return len(self.column_headers)
def headerData(self, section, orientation, role=Qt.ItemDataRole.DisplayRole):
if role == Qt.ItemDataRole.DisplayRole and orientation == Qt.Orientation.Horizontal:
return self.column_headers[section]
return super().headerData(section, orientation, role)
def data(self, index, role=Qt.ItemDataRole.DisplayRole):
if role == Qt.ItemDataRole.DisplayRole:
row = index.row()
with suppress(IndexError):
if row == 0:
return (_('System default'), '', '', '')[index.column()]
data = self.current_voices[row - 1]
col = index.column()
ans = data[col] or ''
return ans
if role == Qt.ItemDataRole.UserRole:
row = index.row()
with suppress(IndexError):
if row == 0:
return self.system_default_voice
return self.current_voices[row - 1][3]
def index_for_voice(self, v):
r = 0
if v != self.system_default_voice:
for i, x in enumerate(self.current_voices):
if x[3] == v:
r = i + 1
break
else:
return
return self.index(r, 0)
class Widget(QWidget):
def __init__(self, tts_client, initial_backend_settings=None, parent=None):
QWidget.__init__(self, parent)
self.l = l = QFormLayout(self)
self.tts_client = tts_client
with BusyCursor():
self.voice_data = self.tts_client.get_all_voices()
self.default_system_rate = self.tts_client.default_system_rate
self.all_sound_outputs = self.tts_client.get_all_audio_devices()
self.default_system_audio_device = self.tts_client.default_system_audio_device
self.speed = s = QSlider(Qt.Orientation.Horizontal, self)
s.setMinimumWidth(200)
l.addRow(_('&Speed of speech:'), s)
s.setRange(int(self.tts_client.min_rate * 100), int(100 * self.tts_client.max_rate))
s.setSingleStep(10)
s.setPageStep(40)
self.voices = v = QTableView(self)
self.voices_model = VoicesModel(self.voice_data, parent=v)
self.proxy_model = p = QSortFilterProxyModel(self)
p.setFilterCaseSensitivity(Qt.CaseSensitivity.CaseInsensitive)
p.setSourceModel(self.voices_model)
v.setModel(p)
v.setSelectionBehavior(QAbstractItemView.SelectionBehavior.SelectRows)
v.setSortingEnabled(True)
v.horizontalHeader().resizeSection(0, QFontMetrics(self.font()).averageCharWidth() * 25)
v.horizontalHeader().resizeSection(1, QFontMetrics(self.font()).averageCharWidth() * 30)
v.verticalHeader().close()
v.verticalHeader().close()
v.setSelectionMode(QAbstractItemView.SelectionMode.SingleSelection)
v.sortByColumn(0, Qt.SortOrder.AscendingOrder)
l.addRow(v)
self.sound_outputs = so = QComboBox(self)
so.addItem(_('System default'), ())
for x in self.all_sound_outputs:
so.addItem(x.name, x.spec())
l.addRow(_('Sound output:'), so)
self.backend_settings = initial_backend_settings or {}
def restore_state(self, prefs):
data = prefs.get(f'{self.tts_client.name}-voice-table-state')
if data is not None:
self.voices.horizontalHeader().restoreState(QByteArray(data))
def save_state(self, prefs):
data = bytearray(self.voices.horizontalHeader().saveState())
prefs.set(f'{self.tts_client.name}-voice-table-state', data)
def restore_to_defaults(self):
self.backend_settings = {}
def sizeHint(self):
ans = super().sizeHint()
ans.setHeight(max(ans.height(), 600))
ans.setWidth(max(ans.width(), 500))
return ans
@property
def selected_voice(self):
for x in self.voices.selectedIndexes():
return x.data(Qt.ItemDataRole.UserRole)
@selected_voice.setter
def selected_voice(self, val):
val = val or VoicesModel.system_default_voice
idx = self.voices_model.index_for_voice(val)
if idx is not None:
idx = self.proxy_model.mapFromSource(idx)
self.voices.selectionModel().select(idx, QItemSelectionModel.SelectionFlag.ClearAndSelect | QItemSelectionModel.SelectionFlag.Rows)
self.voices.scrollTo(idx)
@property
def rate(self):
return self.speed.value() / 100
@rate.setter
def rate(self, val):
val = int((val or self.default_system_rate) * 100)
self.speed.setValue(val)
@property
def sound_output(self):
return self.sound_outputs.currentData()
@sound_output.setter
def sound_output(self, val):
idx = 0
if val:
q = self.sound_outputs.findData(val)
if q > -1:
idx = q
self.sound_outputs.setCurrentIndex(idx)
@property
def backend_settings(self):
ans = {}
voice = self.selected_voice
if voice and voice != VoicesModel.system_default_voice:
ans['voice'] = voice
rate = self.rate
if rate and rate != self.default_system_rate:
ans['rate'] = rate
so = self.sound_output
if so:
ans['sound_output'] = so
return ans
@backend_settings.setter
def backend_settings(self, val):
voice = val.get('voice') or VoicesModel.system_default_voice
self.selected_voice = voice
self.rate = val.get('rate', self.default_system_rate)
self.sound_output = val.get('sound_output') or ()
def develop():
from calibre.gui2 import Application
from calibre.gui2.tts.implementation import Client
app = Application([])
c = Client()
w = Widget(c, {})
w.show()
app.exec()
print(w.backend_settings)
if __name__ == '__main__':
develop()

View File

@ -756,6 +756,9 @@ static const std::unordered_map<std::string, handler_function> handlers = {
bool found = false;
if (parts.size()) {
auto voice_id = winrt::hstring(parts.at(0));
if (voice_id == L"__default__") {
voice_id = SpeechSynthesizer::DefaultVoice().Id();
}
for (auto const &candidate : SpeechSynthesizer::AllVoices()) {
if (candidate.Id() == voice_id) {
speech_synthesizer.Voice(candidate);
@ -765,8 +768,8 @@ static const std::unordered_map<std::string, handler_function> handlers = {
}
}
auto x = speech_synthesizer.Voice();
if (x) output(cmd_id, "voice", {{"value", speech_synthesizer.Voice()}, {"found", found}});
else output(cmd_id, "voice", {{"value", ""}, {"found", found}});
if (x) output(cmd_id, "voice", {{"voice", speech_synthesizer.Voice()}, {"found", found}});
else output(cmd_id, "voice", {{"voice", ""}, {"found", found}});
}},
{"volume", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {

View File

@ -12,7 +12,7 @@ from itertools import count
from queue import Empty, Queue
from threading import Thread
from time import monotonic
from typing import NamedTuple, Tuple
from typing import NamedTuple, Tuple, Optional
from calibre.constants import DEBUG
from calibre.utils.ipc.simple_worker import start_pipe_worker
@ -101,11 +101,12 @@ class SpeechError(OSError):
val += f'{msg}. '
val += err.msg + ': ' + err.error + f'\nFile: {err.file} Line: {err.line}'
if err.hr:
# List of mediaserver errors is here: https://www.hresult.info/FACILITY_MEDIASERVER
val += f' HRESULT: 0x{err.hr:x}'
super().__init__(val)
class NoAudioDevices(Exception):
class NoAudioDevices(OSError):
def __init__(self):
super().__init__(_('No active audio output devices found.'
' Connect headphones or speakers. If you are using Remote Desktop then enable Remote Audio for it.'))
@ -212,7 +213,7 @@ class DefaultVoice(NamedTuple):
class Voice(NamedTuple):
related_to: int
voice: VoiceInformation
voice: Optional[VoiceInformation]
found: bool = True
@ -223,13 +224,21 @@ class DeviceInformation(NamedTuple):
is_default: bool
is_enabled: bool
def spec(self) -> Tuple[str, str]:
return self.kind, self.id
class AudioDevice(NamedTuple):
related_to: int
device: DeviceInformation
device: Optional[DeviceInformation]
found: bool = True
class AllAudioDevices(NamedTuple):
related_to: int
devices: Tuple[DeviceInformation, ...]
class AllVoices(NamedTuple):
related_to: int
voices: Tuple[VoiceInformation, ...]
@ -301,11 +310,18 @@ def parse_message(line):
return AllVoices(**ans)
if msg_type == 'all_audio_devices':
ans['devices'] = tuple(DeviceInformation(**x) for x in ans['devices'])
return AudioDevice(**ans)
return AllAudioDevices(**ans)
if msg_type == 'audio_device':
if ans['device']:
ans['device'] = DeviceInformation(ans['device'])
else:
ans['device'] = None
return AudioDevice(**ans)
if msg_type == 'voice':
if ans['voice']:
ans['voice'] = VoiceInformation(**ans['voice'])
else:
ans['voice'] = None
return Voice(**ans)
if msg_type == 'volume':
return Volume(**ans)
@ -357,7 +373,7 @@ class WinSpeech:
line = line.strip()
if DEBUG:
with suppress(Exception):
print('winspeech:', line.decode('utf-8', 'replace'), flush=True)
print('winspeech:\x1b[32m<-\x1b[39m', line.decode('utf-8', 'replace'), flush=True)
send_msg(parse_message(line))
except OSError as e:
send_msg(Error('Failed to read from worker', str(e)))
@ -367,7 +383,11 @@ class WinSpeech:
def send_command(self, cmd):
cmd_id = next(self.msg_id_counter)
w = self.worker
w.stdin.write(f'{cmd_id} {cmd}\n'.encode('utf-8'))
cmd = f'{cmd_id} {cmd}'
if DEBUG:
with suppress(Exception):
print('winspeech:\x1b[31m->\x1b[39m', cmd, flush=True)
w.stdin.write(f'{cmd}\n'.encode('utf-8'))
w.stdin.flush()
return cmd_id
@ -410,6 +430,38 @@ class WinSpeech:
def play(self):
self.wait_for('play', Play, related_to=self.send_command('play'))
def set_rate(self, val):
val = float(val)
self.wait_for('Setting the rate', Rate, related_to=self.send_command(f'rate {val}'))
def set_voice(self, spec, default_system_voice):
val = spec or getattr(default_system_voice, 'id', '__default__')
x = self.wait_for('Setting the voice', Voice, related_to=self.send_command(f'voice {val}'))
if not x.found:
raise KeyError(f'Failed to find the voice: {val}')
def set_audio_device(self, spec, default_system_audio_device):
if not spec and not default_system_audio_device:
return
if not spec:
spec = default_system_audio_device.spec()
x = self.wait_for('Setting the audio device', AudioDevice, related_to=self.send_command(f'audio_device {spec[0]} {spec[1]}'))
if not x.found:
raise KeyError(f'Failed to find the audio device: {spec}')
def get_audio_device(self):
return self.wait_for('Audio device', AudioDevice, related_to=self.send_command('audio_device'))
def default_voice(self):
return self.wait_for('Default voice', DefaultVoice, related_to=self.send_command('default_voice'))
def all_voices(self):
return self.wait_for('All voices', AllVoices, related_to=self.send_command('all_voices'))
def all_audio_devices(self):
return self.wait_for('All audio devices', AllAudioDevices, related_to=self.send_command('all_audio_devices'))
# develop {{{
def develop_loop(*commands):