From 151f208dd4c0b0b032ff71384d55985f3939ea8c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 24 Aug 2024 16:16:28 +0530 Subject: [PATCH] Start work on new Qt based TTS backend --- src/calibre/gui2/tts2/__init__.py | 2 + src/calibre/gui2/tts2/qt.py | 19 +++++++ src/calibre/gui2/tts2/types.py | 91 +++++++++++++++++++++++++++++++ 3 files changed, 112 insertions(+) create mode 100644 src/calibre/gui2/tts2/__init__.py create mode 100644 src/calibre/gui2/tts2/qt.py create mode 100644 src/calibre/gui2/tts2/types.py diff --git a/src/calibre/gui2/tts2/__init__.py b/src/calibre/gui2/tts2/__init__.py new file mode 100644 index 0000000000..10baddb581 --- /dev/null +++ b/src/calibre/gui2/tts2/__init__.py @@ -0,0 +1,2 @@ +#!/usr/bin/env python +# License: GPLv3 Copyright: 2024, Kovid Goyal diff --git a/src/calibre/gui2/tts2/qt.py b/src/calibre/gui2/tts2/qt.py new file mode 100644 index 0000000000..723985422f --- /dev/null +++ b/src/calibre/gui2/tts2/qt.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python +# License: GPLv3 Copyright: 2024, Kovid Goyal + +from qt.core import QMediaDevices, QObject, QTextToSpeech + +from .types import EngineSpecificSettings + + +class QtTTSBackend(QObject): + + def __init__(self, engine_name: str = '', settings: EngineSpecificSettings = EngineSpecificSettings(), parent: QObject|None = None): + super().__init__(parent) + s = {} + if settings.audio_device_id: + for x in QMediaDevices.audioOutputs(): + if bytes(x.id) == settings.audio_device_id.id: + s['audioDevice'] = x + break + self.tts = QTextToSpeech(engine_name, s, self) diff --git a/src/calibre/gui2/tts2/types.py b/src/calibre/gui2/tts2/types.py new file mode 100644 index 0000000000..741eaeafe2 --- /dev/null +++ b/src/calibre/gui2/tts2/types.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python +# License: GPLv3 Copyright: 2024, Kovid Goyal + +from enum import Enum, auto +from functools import lru_cache +from typing import Literal, NamedTuple + +from qt.core import QLocale, QTextToSpeech, QVoice + +from calibre.utils.localization import canonicalize_lang + + +class TrackingCapability(Enum): + NoTracking: int = auto() + WordByWord: int = auto() + Sentence: int = auto() + + +class EngineMetadata(NamedTuple): + name: Literal['winrt', 'darwin', 'sapi', 'flite', 'speechd'] + tracking_capability: TrackingCapability = TrackingCapability.NoTracking + allows_choosing_audio_device: bool = True + can_synthesize_audio_data: bool = True + + +class Quality(Enum): + High: int = auto() + Medium: int = auto() + Low: int = auto() + + +class Voice(NamedTuple): + name: str + language_code: str + country_code: str + + human_name: str = '' + notes: str = '' + gender: QVoice.Gender = QVoice.Gender.Unknown + age: QVoice.Age = QVoice.Age.Other + quality: Quality = Quality.High + + +def qvoice_to_voice(v: QVoice) -> QVoice: + lang = canonicalize_lang(QLocale.languageToCode(v.language())) or 'und' + country = QLocale.territoryToString(v.locale().territory()) + return Voice(v.name(), lang, country, gender=v.gender(), age=v.age()) + + +class AudioDeviceId(NamedTuple): + id: bytes + description: str + + +class EngineSpecificSettings(NamedTuple): + audio_device_id: AudioDeviceId | None = None + voice_name: str = '' + rate: float = 0 # -1 to 1 0 is normal speech + pitch: float = 0 # -1 to 1 0 is normal speech + volume: float | None = None # 0 to 1, None is platform default volume + + + +@lru_cache(2) +def available_engines() -> dict[str, EngineMetadata]: + ans = {} + e = QTextToSpeech() + + def qt_engine_metadata(name: str, allows_choosing_audio_device: bool = False) -> EngineMetadata: + e.setEngine(name) + cap = e.engineCapabilities() + return EngineMetadata( + name, TrackingCapability.WordByWord if cap & QTextToSpeech.Capability.WordByWordProgress else TrackingCapability.NoTracking, + allows_choosing_audio_device, cap & QTextToSpeech.Capability.Synthesize) + + for x in QTextToSpeech.availableEngines(): + if x == 'winrt': + ans[x] = qt_engine_metadata(x, True) + elif x == 'darwin': + ans[x] = qt_engine_metadata(x) + elif x == 'sapi': + ans[x] = qt_engine_metadata(x) + elif x == 'macos': + # this is slated for removal in Qt 6.8 so skip it + continue + elif x == 'flite': + ans[x] = qt_engine_metadata(x, True) + elif x == 'speechd': + # TODO: Replace this with our own speechd client that supports word tracking + ans[x] = qt_engine_metadata(x) + return ans