mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 10:44:09 -04:00
Start work on new Qt based TTS backend
This commit is contained in:
parent
79b4990b5d
commit
151f208dd4
2
src/calibre/gui2/tts2/__init__.py
Normal file
2
src/calibre/gui2/tts2/__init__.py
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# License: GPLv3 Copyright: 2024, Kovid Goyal <kovid at kovidgoyal.net>
|
19
src/calibre/gui2/tts2/qt.py
Normal file
19
src/calibre/gui2/tts2/qt.py
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# License: GPLv3 Copyright: 2024, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
|
||||||
|
from qt.core import QMediaDevices, QObject, QTextToSpeech
|
||||||
|
|
||||||
|
from .types import EngineSpecificSettings
|
||||||
|
|
||||||
|
|
||||||
|
class QtTTSBackend(QObject):
|
||||||
|
|
||||||
|
def __init__(self, engine_name: str = '', settings: EngineSpecificSettings = EngineSpecificSettings(), parent: QObject|None = None):
|
||||||
|
super().__init__(parent)
|
||||||
|
s = {}
|
||||||
|
if settings.audio_device_id:
|
||||||
|
for x in QMediaDevices.audioOutputs():
|
||||||
|
if bytes(x.id) == settings.audio_device_id.id:
|
||||||
|
s['audioDevice'] = x
|
||||||
|
break
|
||||||
|
self.tts = QTextToSpeech(engine_name, s, self)
|
91
src/calibre/gui2/tts2/types.py
Normal file
91
src/calibre/gui2/tts2/types.py
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# License: GPLv3 Copyright: 2024, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
|
||||||
|
from enum import Enum, auto
|
||||||
|
from functools import lru_cache
|
||||||
|
from typing import Literal, NamedTuple
|
||||||
|
|
||||||
|
from qt.core import QLocale, QTextToSpeech, QVoice
|
||||||
|
|
||||||
|
from calibre.utils.localization import canonicalize_lang
|
||||||
|
|
||||||
|
|
||||||
|
class TrackingCapability(Enum):
|
||||||
|
NoTracking: int = auto()
|
||||||
|
WordByWord: int = auto()
|
||||||
|
Sentence: int = auto()
|
||||||
|
|
||||||
|
|
||||||
|
class EngineMetadata(NamedTuple):
|
||||||
|
name: Literal['winrt', 'darwin', 'sapi', 'flite', 'speechd']
|
||||||
|
tracking_capability: TrackingCapability = TrackingCapability.NoTracking
|
||||||
|
allows_choosing_audio_device: bool = True
|
||||||
|
can_synthesize_audio_data: bool = True
|
||||||
|
|
||||||
|
|
||||||
|
class Quality(Enum):
|
||||||
|
High: int = auto()
|
||||||
|
Medium: int = auto()
|
||||||
|
Low: int = auto()
|
||||||
|
|
||||||
|
|
||||||
|
class Voice(NamedTuple):
|
||||||
|
name: str
|
||||||
|
language_code: str
|
||||||
|
country_code: str
|
||||||
|
|
||||||
|
human_name: str = ''
|
||||||
|
notes: str = ''
|
||||||
|
gender: QVoice.Gender = QVoice.Gender.Unknown
|
||||||
|
age: QVoice.Age = QVoice.Age.Other
|
||||||
|
quality: Quality = Quality.High
|
||||||
|
|
||||||
|
|
||||||
|
def qvoice_to_voice(v: QVoice) -> QVoice:
|
||||||
|
lang = canonicalize_lang(QLocale.languageToCode(v.language())) or 'und'
|
||||||
|
country = QLocale.territoryToString(v.locale().territory())
|
||||||
|
return Voice(v.name(), lang, country, gender=v.gender(), age=v.age())
|
||||||
|
|
||||||
|
|
||||||
|
class AudioDeviceId(NamedTuple):
|
||||||
|
id: bytes
|
||||||
|
description: str
|
||||||
|
|
||||||
|
|
||||||
|
class EngineSpecificSettings(NamedTuple):
|
||||||
|
audio_device_id: AudioDeviceId | None = None
|
||||||
|
voice_name: str = ''
|
||||||
|
rate: float = 0 # -1 to 1 0 is normal speech
|
||||||
|
pitch: float = 0 # -1 to 1 0 is normal speech
|
||||||
|
volume: float | None = None # 0 to 1, None is platform default volume
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(2)
|
||||||
|
def available_engines() -> dict[str, EngineMetadata]:
|
||||||
|
ans = {}
|
||||||
|
e = QTextToSpeech()
|
||||||
|
|
||||||
|
def qt_engine_metadata(name: str, allows_choosing_audio_device: bool = False) -> EngineMetadata:
|
||||||
|
e.setEngine(name)
|
||||||
|
cap = e.engineCapabilities()
|
||||||
|
return EngineMetadata(
|
||||||
|
name, TrackingCapability.WordByWord if cap & QTextToSpeech.Capability.WordByWordProgress else TrackingCapability.NoTracking,
|
||||||
|
allows_choosing_audio_device, cap & QTextToSpeech.Capability.Synthesize)
|
||||||
|
|
||||||
|
for x in QTextToSpeech.availableEngines():
|
||||||
|
if x == 'winrt':
|
||||||
|
ans[x] = qt_engine_metadata(x, True)
|
||||||
|
elif x == 'darwin':
|
||||||
|
ans[x] = qt_engine_metadata(x)
|
||||||
|
elif x == 'sapi':
|
||||||
|
ans[x] = qt_engine_metadata(x)
|
||||||
|
elif x == 'macos':
|
||||||
|
# this is slated for removal in Qt 6.8 so skip it
|
||||||
|
continue
|
||||||
|
elif x == 'flite':
|
||||||
|
ans[x] = qt_engine_metadata(x, True)
|
||||||
|
elif x == 'speechd':
|
||||||
|
# TODO: Replace this with our own speechd client that supports word tracking
|
||||||
|
ans[x] = qt_engine_metadata(x)
|
||||||
|
return ans
|
Loading…
x
Reference in New Issue
Block a user