mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Port config to winspeech
This commit is contained in:
parent
f64b9e3e2c
commit
c8e9f33736
@ -52,8 +52,16 @@ class Client:
|
||||
self.synthesizing = False
|
||||
self.settings = settings or {}
|
||||
self.clear_chunks()
|
||||
self.default_system_audio_device = self.backend.get_audio_device().device
|
||||
self.default_system_voice = self.backend.default_voice().voice
|
||||
self.apply_settings()
|
||||
|
||||
def get_all_voices(self):
|
||||
return self.backend.all_voices().voices
|
||||
|
||||
def get_all_audio_devices(self):
|
||||
return self.backend.all_audio_devices().devices
|
||||
|
||||
def __del__(self):
|
||||
if self.backend is not None:
|
||||
self.backend.shutdown()
|
||||
@ -63,6 +71,9 @@ class Client:
|
||||
def dispatch_msg(self, msg):
|
||||
self.dispatch_on_main_thread(partial(self.handle_event, msg))
|
||||
|
||||
def speak_current_chunk(self):
|
||||
self.backend.speak(self.current_chunks[self.current_chunk_idx], is_cued=True)
|
||||
|
||||
def handle_event(self, x):
|
||||
if isinstance(x, MarkReached) and self.current_chunks:
|
||||
self.last_mark = x.id
|
||||
@ -74,7 +85,7 @@ class Client:
|
||||
self.callback_ignoring_errors(Event(EventType.end))
|
||||
else:
|
||||
self.current_chunk_idx += 1
|
||||
self.backend.speak(self.current_chunks[self.current_chunk_idx], is_cued=True)
|
||||
self.speak_current_chunk()
|
||||
elif x.state is MediaState.failed:
|
||||
self.clear_chunks()
|
||||
self.callback_ignoring_errors(Event(EventType.cancel))
|
||||
@ -82,7 +93,8 @@ class Client:
|
||||
e.display_to_user = True
|
||||
raise e
|
||||
elif x.state is MediaState.opened:
|
||||
self.callback_ignoring_errors(Event(EventType.begin))
|
||||
self.callback_ignoring_errors(Event(EventType.resume if self.next_start_is_resume else EventType.begin))
|
||||
self.next_start_is_resume = False
|
||||
elif isinstance(x, Error):
|
||||
raise x.as_exception(check_for_no_audio_devices=True)
|
||||
else:
|
||||
@ -98,12 +110,11 @@ class Client:
|
||||
self.clear_chunks()
|
||||
self.current_callback = callback
|
||||
self.current_chunks = tuple(split_into_chunks(text, self.chunk_size))
|
||||
self.current_chunk_idx = 0
|
||||
self.current_chunk_idx = -100
|
||||
if self.current_chunks:
|
||||
self.backend.speak(self.current_chunks[self.current_chunk_idx], is_cued=True)
|
||||
self.current_chunk_idx = 0
|
||||
self.speak_current_chunk()
|
||||
self.synthesizing = True
|
||||
if self.current_callback is not None:
|
||||
self.current_callback(Event(EventType.begin))
|
||||
|
||||
def callback_ignoring_errors(self, ev):
|
||||
if self.current_callback is not None:
|
||||
@ -115,8 +126,9 @@ class Client:
|
||||
|
||||
def clear_chunks(self):
|
||||
self.synthesizing = False
|
||||
self.next_start_is_resume = False
|
||||
self.current_chunk_idx = -100
|
||||
self.current_chunks = []
|
||||
self.current_chunks = ()
|
||||
self.last_mark = -1
|
||||
|
||||
def stop(self):
|
||||
@ -138,12 +150,52 @@ class Client:
|
||||
self.current_callback(Event(EventType.resume))
|
||||
|
||||
def apply_settings(self, new_settings=None):
|
||||
pass
|
||||
if self.synthesizing:
|
||||
self.stop()
|
||||
if new_settings is not None:
|
||||
self.settings = new_settings
|
||||
try:
|
||||
self.backend.set_rate(self.settings.get('rate', self.default_system_rate))
|
||||
except OSError:
|
||||
self.settings.pop('rate', None)
|
||||
try:
|
||||
self.backend.set_voice(self.settings.get('voice'), self.default_system_voice)
|
||||
except OSError:
|
||||
self.settings.pop('voice', None)
|
||||
try:
|
||||
self.backend.set_audio_device(self.settings.get('sound_output'), self.default_system_audio_device)
|
||||
except OSError:
|
||||
self.settings.pop('sound_output', None)
|
||||
|
||||
def config_widget(self, backend_settings, parent):
|
||||
from calibre.gui2.tts.windows_config import Widget
|
||||
return Widget(self, backend_settings, parent)
|
||||
|
||||
def chunks_from_last_mark(self):
|
||||
for i, chunk in enumerate(self.current_chunks):
|
||||
for ci, x in enumerate(chunk):
|
||||
if x == self.last_mark:
|
||||
chunks = self.current_chunks[i:]
|
||||
chunk = chunk[ci + 1:]
|
||||
if chunk:
|
||||
chunks = (chunk,) + chunks[1:]
|
||||
else:
|
||||
chunks = chunks[1:]
|
||||
return chunks
|
||||
return ()
|
||||
|
||||
def resume_after_configure(self):
|
||||
if not self.synthesizing:
|
||||
return
|
||||
self.current_chunk_idx = -100
|
||||
self.last_mark = -1
|
||||
self.current_chunks = self.chunks_from_last_mark()
|
||||
self.next_start_is_resume = True
|
||||
self.synthesizing = bool(self.current_chunks)
|
||||
if self.current_chunks:
|
||||
self.current_chunk_idx = 0
|
||||
self.speak_current_chunk()
|
||||
|
||||
def change_rate(self, steps=1):
|
||||
rate = current_rate = self.settings.get('rate', self.default_system_rate)
|
||||
if rate < 1:
|
||||
|
196
src/calibre/gui2/tts/windows_config.py
Normal file
196
src/calibre/gui2/tts/windows_config.py
Normal file
@ -0,0 +1,196 @@
|
||||
#!/usr/bin/env python
|
||||
# License: GPL v3 Copyright: 2020, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
|
||||
from contextlib import suppress
|
||||
from qt.core import (
|
||||
QAbstractItemView, QAbstractTableModel, QByteArray, QComboBox, QFontMetrics,
|
||||
QFormLayout, QItemSelectionModel, QSlider, QSortFilterProxyModel, Qt, QTableView,
|
||||
QWidget
|
||||
)
|
||||
|
||||
from calibre.gui2.widgets import BusyCursor
|
||||
|
||||
|
||||
class VoicesModel(QAbstractTableModel):
|
||||
|
||||
system_default_voice = '__default__'
|
||||
|
||||
def __init__(self, voice_data, parent=None):
|
||||
super().__init__(parent)
|
||||
self.voice_data = voice_data
|
||||
self.current_voices = tuple((x.display_name, x.language, x.gender, x.id) for x in voice_data)
|
||||
self.column_headers = _('Name'), _('Language'), _('Gender')
|
||||
|
||||
def rowCount(self, parent=None):
|
||||
return len(self.current_voices) + 1
|
||||
|
||||
def columnCount(self, parent=None):
|
||||
return len(self.column_headers)
|
||||
|
||||
def headerData(self, section, orientation, role=Qt.ItemDataRole.DisplayRole):
|
||||
if role == Qt.ItemDataRole.DisplayRole and orientation == Qt.Orientation.Horizontal:
|
||||
return self.column_headers[section]
|
||||
return super().headerData(section, orientation, role)
|
||||
|
||||
def data(self, index, role=Qt.ItemDataRole.DisplayRole):
|
||||
if role == Qt.ItemDataRole.DisplayRole:
|
||||
row = index.row()
|
||||
with suppress(IndexError):
|
||||
if row == 0:
|
||||
return (_('System default'), '', '', '')[index.column()]
|
||||
data = self.current_voices[row - 1]
|
||||
col = index.column()
|
||||
ans = data[col] or ''
|
||||
return ans
|
||||
if role == Qt.ItemDataRole.UserRole:
|
||||
row = index.row()
|
||||
with suppress(IndexError):
|
||||
if row == 0:
|
||||
return self.system_default_voice
|
||||
return self.current_voices[row - 1][3]
|
||||
|
||||
def index_for_voice(self, v):
|
||||
r = 0
|
||||
if v != self.system_default_voice:
|
||||
for i, x in enumerate(self.current_voices):
|
||||
if x[3] == v:
|
||||
r = i + 1
|
||||
break
|
||||
else:
|
||||
return
|
||||
return self.index(r, 0)
|
||||
|
||||
|
||||
class Widget(QWidget):
|
||||
|
||||
def __init__(self, tts_client, initial_backend_settings=None, parent=None):
|
||||
QWidget.__init__(self, parent)
|
||||
self.l = l = QFormLayout(self)
|
||||
self.tts_client = tts_client
|
||||
|
||||
with BusyCursor():
|
||||
self.voice_data = self.tts_client.get_all_voices()
|
||||
self.default_system_rate = self.tts_client.default_system_rate
|
||||
self.all_sound_outputs = self.tts_client.get_all_audio_devices()
|
||||
self.default_system_audio_device = self.tts_client.default_system_audio_device
|
||||
|
||||
self.speed = s = QSlider(Qt.Orientation.Horizontal, self)
|
||||
s.setMinimumWidth(200)
|
||||
l.addRow(_('&Speed of speech:'), s)
|
||||
s.setRange(int(self.tts_client.min_rate * 100), int(100 * self.tts_client.max_rate))
|
||||
s.setSingleStep(10)
|
||||
s.setPageStep(40)
|
||||
|
||||
self.voices = v = QTableView(self)
|
||||
self.voices_model = VoicesModel(self.voice_data, parent=v)
|
||||
self.proxy_model = p = QSortFilterProxyModel(self)
|
||||
p.setFilterCaseSensitivity(Qt.CaseSensitivity.CaseInsensitive)
|
||||
p.setSourceModel(self.voices_model)
|
||||
v.setModel(p)
|
||||
v.setSelectionBehavior(QAbstractItemView.SelectionBehavior.SelectRows)
|
||||
v.setSortingEnabled(True)
|
||||
v.horizontalHeader().resizeSection(0, QFontMetrics(self.font()).averageCharWidth() * 25)
|
||||
v.horizontalHeader().resizeSection(1, QFontMetrics(self.font()).averageCharWidth() * 30)
|
||||
v.verticalHeader().close()
|
||||
v.verticalHeader().close()
|
||||
v.setSelectionMode(QAbstractItemView.SelectionMode.SingleSelection)
|
||||
v.sortByColumn(0, Qt.SortOrder.AscendingOrder)
|
||||
l.addRow(v)
|
||||
|
||||
self.sound_outputs = so = QComboBox(self)
|
||||
so.addItem(_('System default'), ())
|
||||
for x in self.all_sound_outputs:
|
||||
so.addItem(x.name, x.spec())
|
||||
l.addRow(_('Sound output:'), so)
|
||||
|
||||
self.backend_settings = initial_backend_settings or {}
|
||||
|
||||
def restore_state(self, prefs):
|
||||
data = prefs.get(f'{self.tts_client.name}-voice-table-state')
|
||||
if data is not None:
|
||||
self.voices.horizontalHeader().restoreState(QByteArray(data))
|
||||
|
||||
def save_state(self, prefs):
|
||||
data = bytearray(self.voices.horizontalHeader().saveState())
|
||||
prefs.set(f'{self.tts_client.name}-voice-table-state', data)
|
||||
|
||||
def restore_to_defaults(self):
|
||||
self.backend_settings = {}
|
||||
|
||||
def sizeHint(self):
|
||||
ans = super().sizeHint()
|
||||
ans.setHeight(max(ans.height(), 600))
|
||||
ans.setWidth(max(ans.width(), 500))
|
||||
return ans
|
||||
|
||||
@property
|
||||
def selected_voice(self):
|
||||
for x in self.voices.selectedIndexes():
|
||||
return x.data(Qt.ItemDataRole.UserRole)
|
||||
|
||||
@selected_voice.setter
|
||||
def selected_voice(self, val):
|
||||
val = val or VoicesModel.system_default_voice
|
||||
idx = self.voices_model.index_for_voice(val)
|
||||
if idx is not None:
|
||||
idx = self.proxy_model.mapFromSource(idx)
|
||||
self.voices.selectionModel().select(idx, QItemSelectionModel.SelectionFlag.ClearAndSelect | QItemSelectionModel.SelectionFlag.Rows)
|
||||
self.voices.scrollTo(idx)
|
||||
|
||||
@property
|
||||
def rate(self):
|
||||
return self.speed.value() / 100
|
||||
|
||||
@rate.setter
|
||||
def rate(self, val):
|
||||
val = int((val or self.default_system_rate) * 100)
|
||||
self.speed.setValue(val)
|
||||
|
||||
@property
|
||||
def sound_output(self):
|
||||
return self.sound_outputs.currentData()
|
||||
|
||||
@sound_output.setter
|
||||
def sound_output(self, val):
|
||||
idx = 0
|
||||
if val:
|
||||
q = self.sound_outputs.findData(val)
|
||||
if q > -1:
|
||||
idx = q
|
||||
self.sound_outputs.setCurrentIndex(idx)
|
||||
|
||||
@property
|
||||
def backend_settings(self):
|
||||
ans = {}
|
||||
voice = self.selected_voice
|
||||
if voice and voice != VoicesModel.system_default_voice:
|
||||
ans['voice'] = voice
|
||||
rate = self.rate
|
||||
if rate and rate != self.default_system_rate:
|
||||
ans['rate'] = rate
|
||||
so = self.sound_output
|
||||
if so:
|
||||
ans['sound_output'] = so
|
||||
return ans
|
||||
|
||||
@backend_settings.setter
|
||||
def backend_settings(self, val):
|
||||
voice = val.get('voice') or VoicesModel.system_default_voice
|
||||
self.selected_voice = voice
|
||||
self.rate = val.get('rate', self.default_system_rate)
|
||||
self.sound_output = val.get('sound_output') or ()
|
||||
|
||||
|
||||
def develop():
|
||||
from calibre.gui2 import Application
|
||||
from calibre.gui2.tts.implementation import Client
|
||||
app = Application([])
|
||||
c = Client()
|
||||
w = Widget(c, {})
|
||||
w.show()
|
||||
app.exec()
|
||||
print(w.backend_settings)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
develop()
|
@ -756,6 +756,9 @@ static const std::unordered_map<std::string, handler_function> handlers = {
|
||||
bool found = false;
|
||||
if (parts.size()) {
|
||||
auto voice_id = winrt::hstring(parts.at(0));
|
||||
if (voice_id == L"__default__") {
|
||||
voice_id = SpeechSynthesizer::DefaultVoice().Id();
|
||||
}
|
||||
for (auto const &candidate : SpeechSynthesizer::AllVoices()) {
|
||||
if (candidate.Id() == voice_id) {
|
||||
speech_synthesizer.Voice(candidate);
|
||||
@ -765,8 +768,8 @@ static const std::unordered_map<std::string, handler_function> handlers = {
|
||||
}
|
||||
}
|
||||
auto x = speech_synthesizer.Voice();
|
||||
if (x) output(cmd_id, "voice", {{"value", speech_synthesizer.Voice()}, {"found", found}});
|
||||
else output(cmd_id, "voice", {{"value", ""}, {"found", found}});
|
||||
if (x) output(cmd_id, "voice", {{"voice", speech_synthesizer.Voice()}, {"found", found}});
|
||||
else output(cmd_id, "voice", {{"voice", ""}, {"found", found}});
|
||||
}},
|
||||
|
||||
{"volume", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
|
||||
|
@ -12,7 +12,7 @@ from itertools import count
|
||||
from queue import Empty, Queue
|
||||
from threading import Thread
|
||||
from time import monotonic
|
||||
from typing import NamedTuple, Tuple
|
||||
from typing import NamedTuple, Tuple, Optional
|
||||
|
||||
from calibre.constants import DEBUG
|
||||
from calibre.utils.ipc.simple_worker import start_pipe_worker
|
||||
@ -101,11 +101,12 @@ class SpeechError(OSError):
|
||||
val += f'{msg}. '
|
||||
val += err.msg + ': ' + err.error + f'\nFile: {err.file} Line: {err.line}'
|
||||
if err.hr:
|
||||
# List of mediaserver errors is here: https://www.hresult.info/FACILITY_MEDIASERVER
|
||||
val += f' HRESULT: 0x{err.hr:x}'
|
||||
super().__init__(val)
|
||||
|
||||
|
||||
class NoAudioDevices(Exception):
|
||||
class NoAudioDevices(OSError):
|
||||
def __init__(self):
|
||||
super().__init__(_('No active audio output devices found.'
|
||||
' Connect headphones or speakers. If you are using Remote Desktop then enable Remote Audio for it.'))
|
||||
@ -212,7 +213,7 @@ class DefaultVoice(NamedTuple):
|
||||
|
||||
class Voice(NamedTuple):
|
||||
related_to: int
|
||||
voice: VoiceInformation
|
||||
voice: Optional[VoiceInformation]
|
||||
found: bool = True
|
||||
|
||||
|
||||
@ -223,13 +224,21 @@ class DeviceInformation(NamedTuple):
|
||||
is_default: bool
|
||||
is_enabled: bool
|
||||
|
||||
def spec(self) -> Tuple[str, str]:
|
||||
return self.kind, self.id
|
||||
|
||||
|
||||
class AudioDevice(NamedTuple):
|
||||
related_to: int
|
||||
device: DeviceInformation
|
||||
device: Optional[DeviceInformation]
|
||||
found: bool = True
|
||||
|
||||
|
||||
class AllAudioDevices(NamedTuple):
|
||||
related_to: int
|
||||
devices: Tuple[DeviceInformation, ...]
|
||||
|
||||
|
||||
class AllVoices(NamedTuple):
|
||||
related_to: int
|
||||
voices: Tuple[VoiceInformation, ...]
|
||||
@ -301,11 +310,18 @@ def parse_message(line):
|
||||
return AllVoices(**ans)
|
||||
if msg_type == 'all_audio_devices':
|
||||
ans['devices'] = tuple(DeviceInformation(**x) for x in ans['devices'])
|
||||
return AudioDevice(**ans)
|
||||
return AllAudioDevices(**ans)
|
||||
if msg_type == 'audio_device':
|
||||
if ans['device']:
|
||||
ans['device'] = DeviceInformation(ans['device'])
|
||||
else:
|
||||
ans['device'] = None
|
||||
return AudioDevice(**ans)
|
||||
if msg_type == 'voice':
|
||||
if ans['voice']:
|
||||
ans['voice'] = VoiceInformation(**ans['voice'])
|
||||
else:
|
||||
ans['voice'] = None
|
||||
return Voice(**ans)
|
||||
if msg_type == 'volume':
|
||||
return Volume(**ans)
|
||||
@ -357,7 +373,7 @@ class WinSpeech:
|
||||
line = line.strip()
|
||||
if DEBUG:
|
||||
with suppress(Exception):
|
||||
print('winspeech:', line.decode('utf-8', 'replace'), flush=True)
|
||||
print('winspeech:\x1b[32m<-\x1b[39m', line.decode('utf-8', 'replace'), flush=True)
|
||||
send_msg(parse_message(line))
|
||||
except OSError as e:
|
||||
send_msg(Error('Failed to read from worker', str(e)))
|
||||
@ -367,7 +383,11 @@ class WinSpeech:
|
||||
def send_command(self, cmd):
|
||||
cmd_id = next(self.msg_id_counter)
|
||||
w = self.worker
|
||||
w.stdin.write(f'{cmd_id} {cmd}\n'.encode('utf-8'))
|
||||
cmd = f'{cmd_id} {cmd}'
|
||||
if DEBUG:
|
||||
with suppress(Exception):
|
||||
print('winspeech:\x1b[31m->\x1b[39m', cmd, flush=True)
|
||||
w.stdin.write(f'{cmd}\n'.encode('utf-8'))
|
||||
w.stdin.flush()
|
||||
return cmd_id
|
||||
|
||||
@ -410,6 +430,38 @@ class WinSpeech:
|
||||
def play(self):
|
||||
self.wait_for('play', Play, related_to=self.send_command('play'))
|
||||
|
||||
def set_rate(self, val):
|
||||
val = float(val)
|
||||
self.wait_for('Setting the rate', Rate, related_to=self.send_command(f'rate {val}'))
|
||||
|
||||
def set_voice(self, spec, default_system_voice):
|
||||
val = spec or getattr(default_system_voice, 'id', '__default__')
|
||||
x = self.wait_for('Setting the voice', Voice, related_to=self.send_command(f'voice {val}'))
|
||||
if not x.found:
|
||||
raise KeyError(f'Failed to find the voice: {val}')
|
||||
|
||||
def set_audio_device(self, spec, default_system_audio_device):
|
||||
if not spec and not default_system_audio_device:
|
||||
return
|
||||
if not spec:
|
||||
spec = default_system_audio_device.spec()
|
||||
x = self.wait_for('Setting the audio device', AudioDevice, related_to=self.send_command(f'audio_device {spec[0]} {spec[1]}'))
|
||||
if not x.found:
|
||||
raise KeyError(f'Failed to find the audio device: {spec}')
|
||||
|
||||
def get_audio_device(self):
|
||||
return self.wait_for('Audio device', AudioDevice, related_to=self.send_command('audio_device'))
|
||||
|
||||
def default_voice(self):
|
||||
return self.wait_for('Default voice', DefaultVoice, related_to=self.send_command('default_voice'))
|
||||
|
||||
def all_voices(self):
|
||||
return self.wait_for('All voices', AllVoices, related_to=self.send_command('all_voices'))
|
||||
|
||||
def all_audio_devices(self):
|
||||
return self.wait_for('All audio devices', AllAudioDevices, related_to=self.send_command('all_audio_devices'))
|
||||
|
||||
|
||||
|
||||
# develop {{{
|
||||
def develop_loop(*commands):
|
||||
|
Loading…
x
Reference in New Issue
Block a user