diff --git a/setup/extensions.json b/setup/extensions.json
index 742a6273c3..4e0c7cdf4c 100644
--- a/setup/extensions.json
+++ b/setup/extensions.json
@@ -176,23 +176,6 @@
         "libraries": "shell32 wininet advapi32 gdi32 rstrtmgr",
         "cflags": "/X"
     },
-    {
-        "name": "winsapi",
-        "only": "windows",
-        "headers": "calibre/utils/cpp_binding.h calibre/utils/windows/common.h",
-        "sources": "calibre/utils/windows/winsapi.cpp",
-        "libraries": "SAPI Ole32",
-        "cflags": "/X"
-    },
-    {
-        "name": "winspeech",
-        "only": "windows",
-        "headers": "calibre/utils/cpp_binding.h calibre/utils/windows/common.h",
-        "sources": "calibre/utils/windows/winspeech.cpp",
-        "libraries": "WindowsApp",
-		"needs_c++": "20",
-        "cflags": "/X /Zc:__cplusplus /bigobj /permissive- /WX /Zc:twoPhase-"
-    },
     {
         "name": "wpd",
         "only": "windows",
@@ -217,7 +200,7 @@
     {
         "name": "cocoa",
         "only": "macos",
-        "sources": "calibre/utils/cocoa.m calibre/gui2/tts/nsss.m",
+        "sources": "calibre/utils/cocoa.m",
         "ldflags": "-framework Cocoa -framework UserNotifications"
     },
     {
diff --git a/src/calibre/constants.py b/src/calibre/constants.py
index 0f19825b9e..8a9a6f124e 100644
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@@ -275,7 +275,7 @@ class ExtensionsImporter:
             'uchardet',
         )
         if iswindows:
-            extra = ('winutil', 'wpd', 'winfonts', 'winsapi', 'winspeech')
+            extra = ('winutil', 'wpd', 'winfonts',)
         elif ismacos:
             extra = ('usbobserver', 'cocoa', 'libusb', 'libmtp')
         elif isfreebsd or ishaiku or islinux:
diff --git a/src/calibre/gui2/tts/__init__.py b/src/calibre/gui2/tts/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/src/calibre/gui2/tts/common.py b/src/calibre/gui2/tts/common.py
deleted file mode 100644
index 9550908b7d..0000000000
--- a/src/calibre/gui2/tts/common.py
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/usr/bin/env python
-# License: GPL v3 Copyright: 2020, Kovid Goyal <kovid at kovidgoyal.net>
-
-from enum import Enum, auto
-
-
-class EventType(Enum):
-    mark = auto()
-    begin = auto()
-    end = auto()
-    cancel = auto()
-    pause = auto()
-    resume = auto()
-
-
-class Event:
-
-    def __init__(self, etype, data=None):
-        self.type = etype
-        self.data = data
-
-    def __repr__(self):
-        return f'Event(type={self.type}, data={self.data})'
-
-
-def add_markup(text_parts, mark_template, escape_marked_text, chunk_size=0):
-    buf = []
-    size = 0
-    for x in text_parts:
-        if isinstance(x, int):
-            item = mark_template.format(x)
-        else:
-            item = escape_marked_text(x)
-        sz = len(item)
-        if chunk_size and size + sz > chunk_size:
-            yield ''.join(buf).strip()
-            size = 0
-            buf = []
-        size += sz
-        buf.append(item)
-    if size:
-        yield ''.join(buf).strip()
diff --git a/src/calibre/gui2/tts/develop.py b/src/calibre/gui2/tts/develop.py
deleted file mode 100644
index 85ee75f412..0000000000
--- a/src/calibre/gui2/tts/develop.py
+++ /dev/null
@@ -1,186 +0,0 @@
-#!/usr/bin/env python
-# License: GPL v3 Copyright: 2020, Kovid Goyal <kovid at kovidgoyal.net>
-
-
-import re
-from itertools import count
-
-from qt.core import QDialogButtonBox, QLabel, QMainWindow, Qt, QTimer, QVBoxLayout, QWidget, pyqtSignal
-
-from calibre.gui2 import Application
-
-from .common import EventType
-from .implementation import Client
-
-
-def add_markup(text):
-    buf = []
-    first = True
-    counter = count()
-    pos_map = {}
-    last = None
-    bm = Client.mark_template
-    for m in re.finditer(r'\w+', text):
-        start, end = m.start(), m.end()
-        if first:
-            first = False
-            if start:
-                buf.append(Client.escape_marked_text(text[:start]))
-        elif start > last:
-            buf.append(Client.escape_marked_text(text[last:start]))
-        num = next(counter)
-        buf.append(bm.format(num))
-        pos_map[num] = start, end
-        buf.append(Client.escape_marked_text(m.group()))
-        last = end
-    if last is None:
-        buf.append(Client.escape_marked_text(text))
-    else:
-        buf.append(Client.escape_marked_text(text[last:]))
-    return ''.join(buf), pos_map
-
-
-class TTSWidget(QWidget):
-
-    dispatch_on_main_thread_signal = pyqtSignal(object)
-    mark_changed = pyqtSignal(object)
-    show_message = pyqtSignal(object)
-    show_status = pyqtSignal(object)
-
-    def __init__(self, parent=None):
-        QWidget.__init__(self, parent)
-        self.mark_changed.connect(self.on_mark_change)
-        self.dispatch_on_main_thread_signal.connect(self.dispatch_on_main_thread, type=Qt.ConnectionType.QueuedConnection)
-        self.tts = Client({}, self.dispatch_on_main_thread_signal.emit)
-        self.l = l = QVBoxLayout(self)
-        self.la = la = QLabel(self)
-        la.setTextFormat(Qt.TextFormat.RichText)
-        la.setWordWrap(True)
-        self.text = '''\
-In their duty through weakness of will, which is the
-same as saying through shrinking from toil and pain. These cases are
-perfectly simple and easy to distinguish. In a free hour, when our
-power of choice is untrammelled and when nothing prevents our being
-able to do what we like best, every pleasure is to be welcomed and
-every pain avoided.
-
-But in certain circumstances and owing to the claims of duty or the obligations
-of business it will frequently occur that pleasures have to be repudiated and
-annoyances accepted. The wise man therefore always holds in these matters to
-this.
-
-Born and I will give you a complete account of the system, and expound the
-actual teachings of the great explorer of the truth, the master-builder of
-human happiness. No one rejects, dislikes, or avoids pleasure itself, because
-it is pleasure, but because those who do not know how to pursue pleasure
-rationally encounter consequences that are extremely painful.
-
-Nor again is there anyone who loves or pursues or desires to obtain pain of
-itself, because it is pain, but because occasionally circumstances occur in
-which toil and pain can procure him some great pleasure. To take a trivial
-example, which of.
-'''
-        self.ssml, self.pos_map = add_markup(self.text)
-        self.current_mark = None
-        l.addWidget(la)
-        self.bb = bb = QDialogButtonBox(self)
-        l.addWidget(bb)
-        self.play_button = b = bb.addButton('Play', QDialogButtonBox.ButtonRole.ActionRole)
-        b.clicked.connect(self.play_clicked)
-        self.pause_button = b = bb.addButton('Pause', QDialogButtonBox.ButtonRole.ActionRole)
-        b.clicked.connect(self.pause_clicked)
-        self.resume_button = b = bb.addButton('Resume', QDialogButtonBox.ButtonRole.ActionRole)
-        b.clicked.connect(self.resume_clicked)
-        self.stop_button = b = bb.addButton('Stop', QDialogButtonBox.ButtonRole.ActionRole)
-        b.clicked.connect(self.stop_clicked)
-        self.render_text()
-
-    def render_text(self):
-        text = self.text
-        if self.current_mark is not None:
-            start, end = self.pos_map[self.current_mark]
-            text = text[:end] + '</b>' + text[end:]
-            text = text[:start] + '<b>' + text[start:]
-        lines = ['<p>']
-        for line in text.splitlines():
-            if not line.strip():
-                lines.append('<p>')
-            else:
-                lines.append(line)
-        self.la.setText('\n'.join(lines))
-
-    def play_clicked(self):
-        self.tts.speak_marked_text(self.ssml, self.handle_event)
-
-    def pause_clicked(self):
-        self.tts.pause()
-
-    def resume_clicked(self):
-        self.tts.resume()
-
-    def stop_clicked(self):
-        self.tts.stop()
-
-    def dispatch_on_main_thread(self, func):
-        try:
-            func()
-        except Exception:
-            import traceback
-            traceback.print_exc()
-
-    def handle_event(self, event):
-        status = str(self.tts.status)
-        self.show_status.emit(str(status))
-        if event.type is EventType.mark:
-            try:
-                mark = int(event.data)
-            except Exception:
-                return
-            self.mark_changed.emit(mark)
-        else:
-            self.show_message.emit(f'Got event: {event.type.name}')
-
-    def on_mark_change(self, mark):
-        self.current_mark = mark
-        self.render_text()
-
-
-def main():
-    app = Application([])
-    w = QMainWindow()
-    sb = w.statusBar()
-    la = QLabel(sb)
-    sb.addPermanentWidget(la)
-    tts = TTSWidget(w)
-    tts.show_message.connect(sb.showMessage)
-    tts.show_status.connect(la.setText)
-    w.setCentralWidget(tts)
-    w.show()
-    app.exec()
-    tts.dispatch_on_main_thread_signal.disconnect()
-    tts.mark_changed.disconnect()
-    tts.tts.shutdown()
-
-
-def headless():
-    app = Application([])
-    c = Client()
-    text = '[[sync 0x123456]]very [[sync 0x80]]good [[sync 0x81]]indeed'
-
-    def callback():
-        for ev in c.get_events():
-            if ev.type is EventType.mark:
-                print('mark:', hex(ev.data))
-            if ev.type in (EventType.end, EventType.cancel):
-                print(ev.type)
-                app.quit()
-
-    def run():
-        c.speak_marked_text(text, callback)
-    QTimer.singleShot(10, run)
-    QTimer.singleShot(5000, app.quit)
-    app.exec()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/src/calibre/gui2/tts/errors.py b/src/calibre/gui2/tts/errors.py
deleted file mode 100644
index 42108934ae..0000000000
--- a/src/calibre/gui2/tts/errors.py
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/usr/bin/env python
-# License: GPL v3 Copyright: 2020, Kovid Goyal <kovid at kovidgoyal.net>
-
-
-class TTSSystemUnavailable(Exception):
-    def __init__(self, message, details):
-        Exception.__init__(self, message)
-        self.short_msg = message
-        self.details = details
diff --git a/src/calibre/gui2/tts/implementation.py b/src/calibre/gui2/tts/implementation.py
deleted file mode 100644
index 434b764a3b..0000000000
--- a/src/calibre/gui2/tts/implementation.py
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/usr/bin/env python
-# License: GPL v3 Copyright: 2020, Kovid Goyal <kovid at kovidgoyal.net>
-
-from calibre.constants import ismacos, iswindows
-
-if iswindows:
-    from calibre.utils.config_base import tweaks
-    if tweaks.get('prefer_winsapi'):
-        from .windows_sapi import Client
-    else:
-        from .windows import Client
-elif ismacos:
-    from .macos import Client
-else:
-    from .linux import Client
-Client
diff --git a/src/calibre/gui2/tts/linux.py b/src/calibre/gui2/tts/linux.py
deleted file mode 100644
index 1de7449951..0000000000
--- a/src/calibre/gui2/tts/linux.py
+++ /dev/null
@@ -1,220 +0,0 @@
-#!/usr/bin/env python
-# License: GPL v3 Copyright: 2020, Kovid Goyal <kovid at kovidgoyal.net>
-
-from functools import partial
-
-from calibre import prepare_string_for_xml
-
-from .common import Event, EventType, add_markup
-from .errors import TTSSystemUnavailable
-
-
-def wrap_in_ssml(text):
-    return ('<?xml version="1.0"?>\n<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis"><s>' +
-            text + '</s></speak>')
-
-
-class Client:
-
-    mark_template = '<mark name="{}"/>'
-    name = 'speechd'
-    min_rate = -100
-    max_rate = 100
-    chunk_size = 0
-
-    @classmethod
-    def escape_marked_text(cls, text):
-        return prepare_string_for_xml(text)
-
-    def __init__(self, settings=None, dispatch_on_main_thread=lambda f: f()):
-        self.ssip_client = None
-        self.status = {'synthesizing': False, 'paused': False}
-        self.settings = settings or {}
-        self.dispatch_on_main_thread = dispatch_on_main_thread
-        self.current_marked_text = None
-        self.last_mark = None
-        self.next_cancel_is_for_pause = False
-        self.next_begin_is_for_resume = False
-        self.current_callback = None
-        self.settings_applied = False
-        self.system_default_output_module = None
-
-    def create_ssip_client(self):
-        from speechd.client import Priority, SpawnError, SSIPClient, SSIPCommunicationError
-        try:
-            self.ssip_client = SSIPClient('calibre')
-        except SSIPCommunicationError as err:
-            ex = err.additional_exception()
-            if isinstance(ex, SpawnError):
-                raise TTSSystemUnavailable(_('Could not find speech-dispatcher on your system. Please install it.'), str(err))
-            raise
-        except SpawnError as err:
-            raise TTSSystemUnavailable(_('Could not find speech-dispatcher on your system. Please install it.'), str(err))
-        self.ssip_client.set_priority(Priority.TEXT)
-
-    def __del__(self):
-        if self.ssip_client is not None:
-            try:
-                self.ssip_client.cancel()
-            except Exception:
-                pass
-            self.ssip_client.close()
-            self.ssip_client = None
-    shutdown = __del__
-
-    def ensure_state(self, use_ssml=False):
-        if self.ssip_client is None:
-            self.create_ssip_client()
-        if self.system_default_output_module is None:
-            self.system_default_output_module = self.ssip_client.get_output_module()
-            if self.system_default_output_module == '(null)':
-                mods = self.ssip_client.list_output_modules()
-                if not mods:
-                    raise ValueError(_('Speech dispatcher on this system is not configured with any available voices. Install some voices first.'))
-                self.system_default_output_module = mods[0]
-        if not self.settings_applied:
-            self.apply_settings()
-        self.set_use_ssml(use_ssml)
-
-    def apply_settings(self, new_settings=None):
-        if new_settings is not None:
-            self.settings = new_settings
-        if self.settings_applied:
-            self.shutdown()
-            self.settings_applied = False
-            self.ensure_state()
-        om = self.settings.get('output_module')
-        if om:
-            self.ssip_client.set_output_module(om)
-        voice = self.settings.get('voice')
-        if voice:
-            self.ssip_client.set_synthesis_voice(voice[0])
-        rate = self.settings.get('rate')
-        if rate:
-            self.ssip_client.set_rate(rate)
-        self.settings_applied = True
-
-    def set_use_ssml(self, on):
-        from speechd.client import DataMode, SSIPCommunicationError
-        mode = DataMode.SSML if on else DataMode.TEXT
-        try:
-            self.ssip_client.set_data_mode(mode)
-        except SSIPCommunicationError:
-            self.ssip_client.close()
-            self.ssip_client = None
-            self.ensure_state(on)
-
-    def speak_simple_text(self, text):
-        self.stop()
-        self.ensure_state(use_ssml=False)
-        self.current_marked_text = self.last_mark = None
-
-        def callback(callback_type, index_mark=None):
-            self.dispatch_on_main_thread(partial(self.update_status, callback_type, index_mark))
-
-        self.ssip_client.speak(text, callback)
-
-    def update_status(self, callback_type, index_mark=None):
-        from speechd.client import CallbackType
-        event = None
-        if callback_type is CallbackType.INDEX_MARK:
-            self.last_mark = index_mark
-            event = Event(EventType.mark, index_mark)
-        elif callback_type is CallbackType.BEGIN:
-            self.status = {'synthesizing': True, 'paused': False}
-            event = Event(EventType.resume if self.next_begin_is_for_resume else EventType.begin)
-            self.next_begin_is_for_resume = False
-        elif callback_type is CallbackType.END:
-            self.status = {'synthesizing': False, 'paused': False}
-            event = Event(EventType.end)
-        elif callback_type is CallbackType.CANCEL:
-            if self.next_cancel_is_for_pause:
-                self.status = {'synthesizing': True, 'paused': True}
-                event = Event(EventType.pause)
-            else:
-                self.status = {'synthesizing': False, 'paused': False}
-                event = Event(EventType.cancel)
-            self.next_cancel_is_for_pause = False
-        return event
-
-    def speak_marked_text(self, marked_text, callback=lambda ev: None):
-        self.stop()
-        text = ''.join(add_markup(marked_text, self.mark_template, self.escape_marked_text, self.chunk_size))
-        self.current_marked_text = text
-        self.last_mark = None
-
-        def callback_wrapper(callback_type, index_mark=None):
-            event = self.update_status(callback_type, index_mark)
-            if event is not None:
-                try:
-                    callback(event)
-                except Exception:
-                    import traceback
-                    traceback.print_exc()
-
-        def cw(callback_type, index_mark=None):
-            self.dispatch_on_main_thread(partial(callback_wrapper, callback_type, index_mark))
-        self.current_callback = cw
-
-        self.ensure_state(use_ssml=True)
-        self.ssip_client.speak(wrap_in_ssml(text), callback=self.current_callback)
-
-    def pause(self):
-        if self.status['synthesizing'] and not self.status['paused']:
-            self.next_cancel_is_for_pause = True
-            self.ssip_client.stop()
-
-    def resume(self):
-        if self.current_marked_text is None or not self.status['synthesizing'] or not self.status['paused']:
-            return
-        self.next_begin_is_for_resume = True
-        if self.last_mark is None:
-            text = self.current_marked_text
-        else:
-            mark = self.mark_template.format(self.last_mark)
-            idx = self.current_marked_text.find(mark)
-            if idx == -1:
-                text = self.current_marked_text
-            else:
-                text = self.current_marked_text[idx:]
-        self.ensure_state(use_ssml=True)
-        self.ssip_client.speak(wrap_in_ssml(text), callback=self.current_callback)
-    resume_after_configure = resume
-
-    def stop(self):
-        self.current_callback = self.current_marked_text = self.last_mark = None
-        self.next_cancel_is_for_pause = False
-        self.next_begin_is_for_resume = False
-        if self.ssip_client is not None:
-            self.ssip_client.stop()
-
-    def config_widget(self, backend_settings, parent):
-        from calibre.gui2.tts.linux_config import Widget
-        return Widget(self, backend_settings, parent)
-
-    def get_voice_data(self):
-        ans = getattr(self, 'voice_data', None)
-        if ans is None:
-            self.ensure_state()
-            ans = self.voice_data = {}
-            output_module = self.ssip_client.get_output_module()
-            for om in self.ssip_client.list_output_modules():
-                self.ssip_client.set_output_module(om)
-                ans[om] = tuple(self.ssip_client.list_synthesis_voices())
-            self.ssip_client.set_output_module(output_module)
-        return ans
-
-    def change_rate(self, steps=1):
-        rate = current_rate = self.settings.get('rate') or 0
-        step_size = (self.max_rate - self.min_rate) // 10
-        rate += steps * step_size
-        rate = max(self.min_rate, min(rate, self.max_rate))
-        if rate != current_rate:
-            self.settings['rate'] = rate
-            prev_state = self.status.copy()
-            self.apply_settings()
-            if prev_state['synthesizing'] and not prev_state['paused']:
-                self.status['synthesizing'] = True
-                self.status['paused'] = True
-                self.resume_after_configure()
-            return self.settings
diff --git a/src/calibre/gui2/tts/linux_config.py b/src/calibre/gui2/tts/linux_config.py
deleted file mode 100644
index e581430438..0000000000
--- a/src/calibre/gui2/tts/linux_config.py
+++ /dev/null
@@ -1,215 +0,0 @@
-#!/usr/bin/env python
-# License: GPL v3 Copyright: 2020, Kovid Goyal <kovid at kovidgoyal.net>
-
-from contextlib import suppress
-
-from qt.core import (
-    QAbstractItemView,
-    QAbstractTableModel,
-    QByteArray,
-    QComboBox,
-    QFontMetrics,
-    QFormLayout,
-    QItemSelectionModel,
-    QSlider,
-    QSortFilterProxyModel,
-    Qt,
-    QTableView,
-    QWidget,
-)
-
-from calibre.gui2.widgets import BusyCursor
-
-
-class VoicesModel(QAbstractTableModel):
-
-    system_default_voice = ('', '', '')
-
-    def __init__(self, voice_data, default_output_module, parent=None):
-        super().__init__(parent)
-        self.voice_data = voice_data
-        try:
-            self.current_voices = voice_data[default_output_module]
-        except KeyError as e:
-            raise ValueError(_('Speech dispatcher on this system is not configured with any available voices. Install some voices first.')) from e
-        self.column_headers = (_('Name'), _('Language'), _('Variant'))
-
-    def rowCount(self, parent=None):
-        return len(self.current_voices) + 1
-
-    def columnCount(self, parent=None):
-        return len(self.column_headers)
-
-    def headerData(self, section, orientation, role=Qt.ItemDataRole.DisplayRole):
-        if role == Qt.ItemDataRole.DisplayRole and orientation == Qt.Orientation.Horizontal:
-            return self.column_headers[section]
-        return super().headerData(section, orientation, role)
-
-    def data(self, index, role=Qt.ItemDataRole.DisplayRole):
-        if role == Qt.ItemDataRole.DisplayRole:
-            row = index.row()
-            with suppress(IndexError):
-                if row == 0:
-                    return (_('System default'), '', '')[index.column()]
-                data = self.current_voices[row - 1]
-                ans = data[index.column()]
-                if not ans or ans == 'none':
-                    ans = ''
-                return ans
-        if role == Qt.ItemDataRole.UserRole:
-            row = index.row()
-            with suppress(IndexError):
-                if row == 0:
-                    return self.system_default_voice
-                return self.current_voices[row - 1]
-
-    def change_output_module(self, om):
-        self.beginResetModel()
-        try:
-            self.current_voices = self.voice_data[om]
-        finally:
-            self.endResetModel()
-
-    def index_for_voice(self, v):
-        r = 0
-        if v != self.system_default_voice:
-            try:
-                idx = self.current_voices.index(v)
-            except Exception:
-                return
-            r = idx + 1
-        return self.index(r, 0)
-
-
-class Widget(QWidget):
-
-    def __init__(self, tts_client, initial_backend_settings=None, parent=None):
-        QWidget.__init__(self, parent)
-        self.l = l = QFormLayout(self)
-        self.tts_client = tts_client
-
-        self.speed = s = QSlider(Qt.Orientation.Horizontal, self)
-        s.setTickPosition(QSlider.TickPosition.TicksAbove)
-        s.setMinimumWidth(200)
-        l.addRow(_('&Speed of speech:'), s)
-        s.setRange(self.tts_client.min_rate, self.tts_client.max_rate)
-        s.setSingleStep(10)
-        s.setTickInterval((s.maximum() - s.minimum()) // 2)
-
-        self.output_modules = om = QComboBox(self)
-        with BusyCursor():
-            self.voice_data = self.tts_client.get_voice_data()
-            self.system_default_output_module = self.tts_client.system_default_output_module
-        om.addItem(_('System default'), self.system_default_output_module)
-        for x in self.voice_data:
-            om.addItem(x, x)
-        l.addRow(_('Speech s&ynthesizer:'), om)
-
-        self.voices = v = QTableView(self)
-        self.voices_model = VoicesModel(self.voice_data, self.system_default_output_module, parent=v)
-        self.proxy_model = p = QSortFilterProxyModel(self)
-        p.setFilterCaseSensitivity(Qt.CaseSensitivity.CaseInsensitive)
-        p.setSourceModel(self.voices_model)
-        v.setModel(p)
-        v.setSelectionBehavior(QAbstractItemView.SelectionBehavior.SelectRows)
-        v.setSortingEnabled(True)
-        h = v.horizontalHeader()
-        h.resizeSection(0, QFontMetrics(self.font()).averageCharWidth() * 30)
-        v.verticalHeader().close()
-        v.setSelectionMode(QAbstractItemView.SelectionMode.SingleSelection)
-        v.sortByColumn(0, Qt.SortOrder.AscendingOrder)
-        om.currentIndexChanged.connect(self.output_module_changed)
-        l.addRow(v)
-
-        self.backend_settings = initial_backend_settings or {}
-
-    def restore_state(self, prefs):
-        data = prefs.get(f'{self.tts_client.name}-voice-table-state')
-        if data is not None:
-            self.voices.horizontalHeader().restoreState(QByteArray(data))
-
-    def save_state(self, prefs):
-        data = bytearray(self.voices.horizontalHeader().saveState())
-        prefs.set(f'{self.tts_client.name}-voice-table-state', data)
-
-    def restore_to_defaults(self):
-        self.backend_settings = {}
-
-    def sizeHint(self):
-        ans = super().sizeHint()
-        ans.setHeight(max(ans.height(), 600))
-        return ans
-
-    @property
-    def selected_voice(self):
-        for x in self.voices.selectedIndexes():
-            return x.data(Qt.ItemDataRole.UserRole)
-
-    @selected_voice.setter
-    def selected_voice(self, val):
-        val = val or VoicesModel.system_default_voice
-        idx = self.voices_model.index_for_voice(tuple(val))
-        if idx is not None:
-            idx = self.proxy_model.mapFromSource(idx)
-            self.voices.selectionModel().select(idx, QItemSelectionModel.SelectionFlag.ClearAndSelect | QItemSelectionModel.SelectionFlag.Rows)
-            self.voices.scrollTo(idx)
-
-    @property
-    def selected_output_module(self):
-        return self.output_modules.currentData()
-
-    @selected_output_module.setter
-    def selected_output_module(self, val):
-        if not val:
-            self.output_modules.setCurrentIndex(0)
-            return
-        idx = self.output_modules.findData(val)
-        if idx < 0:
-            idx = 0
-        self.output_modules.setCurrentIndex(idx)
-
-    def output_module_changed(self, idx):
-        om = self.selected_output_module
-        self.voices_model.change_output_module(om)
-
-    @property
-    def rate(self):
-        return self.speed.value()
-
-    @rate.setter
-    def rate(self, val):
-        val = int(val or 0)
-        self.speed.setValue(val)
-
-    @property
-    def backend_settings(self):
-        ans = {}
-        om = self.selected_output_module
-        if om != self.system_default_output_module:
-            ans['output_module'] = om
-        voice = self.selected_voice
-        if voice and voice != VoicesModel.system_default_voice:
-            ans['voice'] = voice
-        rate = self.rate
-        if rate:
-            ans['rate'] = rate
-        return ans
-
-    @backend_settings.setter
-    def backend_settings(self, val):
-        om = val.get('output_module') or self.system_default_output_module
-        self.selected_output_module = om
-        voice = val.get('voice') or VoicesModel.system_default_voice
-        self.selected_voice = voice
-        self.rate = val.get('rate') or 0
-
-
-if __name__ == '__main__':
-    from calibre.gui2 import Application
-    from calibre.gui2.tts.implementation import Client
-    app = Application([])
-    c = Client({})
-    w = Widget(c, {})
-    w.show()
-    app.exec()
-    print(w.backend_settings)
diff --git a/src/calibre/gui2/tts/macos.py b/src/calibre/gui2/tts/macos.py
deleted file mode 100644
index 37f3a3cc87..0000000000
--- a/src/calibre/gui2/tts/macos.py
+++ /dev/null
@@ -1,149 +0,0 @@
-#!/usr/bin/env python
-# License: GPL v3 Copyright: 2020, Kovid Goyal <kovid at kovidgoyal.net>
-
-from .common import Event, EventType, add_markup
-
-
-class Client:
-
-    mark_template = '[[sync 0x{:x}]]'
-    END_MARK = 0xffffffff
-    name = 'nsss'
-    min_rate = 10
-    max_rate = 340
-    chunk_size = 0
-
-    @classmethod
-    def escape_marked_text(cls, text):
-        return text.replace('[[', ' [ [ ').replace(']]', ' ] ] ')
-
-    def __init__(self, settings=None, dispatch_on_main_thread=lambda f: f()):
-        from calibre_extensions.cocoa import NSSpeechSynthesizer
-        self.nsss = NSSpeechSynthesizer(self.handle_message)
-        self.default_system_rate = self.nsss.get_current_rate()
-        self.default_system_voice = self.nsss.get_current_voice()
-        self.current_callback = None
-        self.current_marked_text = self.last_mark = None
-        self.dispatch_on_main_thread = dispatch_on_main_thread
-        self.status = {'synthesizing': False, 'paused': False}
-        self.settings = settings or {}
-        self.ignore_next_stop_event = False
-        self.apply_settings()
-
-    def apply_settings(self, new_settings=None):
-        if self.status['paused']:
-            self.nsss.resume()
-            self.ignore_next_stop_event = True
-            self.status = {'synthesizing': False, 'paused': False}
-        if new_settings is not None:
-            self.settings = new_settings
-        self.nsss.set_current_voice(self.settings.get('voice') or self.default_system_voice)
-        rate = self.settings.get('rate', self.default_system_rate)
-        self.nsss.set_current_rate(rate)
-
-    def __del__(self):
-        self.nsss = None
-    shutdown = __del__
-
-    def handle_message(self, message_type, data):
-        from calibre_extensions.cocoa import END, MARK
-        event = None
-        if message_type == MARK:
-            self.last_mark = data
-            event = Event(EventType.mark, data)
-        elif message_type == END:
-            if self.ignore_next_stop_event:
-                self.ignore_next_stop_event = False
-                return
-            event = Event(EventType.end if data else EventType.cancel)
-            self.status = {'synthesizing': False, 'paused': False}
-        if event is not None and self.current_callback is not None:
-            try:
-                self.current_callback(event)
-            except Exception:
-                import traceback
-                traceback.print_exc()
-
-    def speak_simple_text(self, text):
-        self.current_callback = None
-        self.current_marked_text = self.last_mark = None
-        self.nsss.speak(self.escape_marked_text(text))
-        self.status = {'synthesizing': True, 'paused': False}
-
-    def speak_marked_text(self, marked_text, callback):
-        text = ''.join(add_markup(marked_text, self.mark_template, self.escape_marked_text, self.chunk_size))
-        self.current_callback = callback
-        self.current_marked_text = text
-        self.last_mark = None
-        self.nsss.speak(text)
-        self.status = {'synthesizing': True, 'paused': False}
-        self.current_callback(Event(EventType.begin))
-
-    def pause(self):
-        if self.status['synthesizing']:
-            self.nsss.pause()
-            self.status = {'synthesizing': True, 'paused': True}
-            if self.current_callback is not None:
-                self.current_callback(Event(EventType.pause))
-
-    def resume(self):
-        if self.status['paused']:
-            self.nsss.resume()
-            self.status = {'synthesizing': True, 'paused': False}
-            if self.current_callback is not None:
-                self.current_callback(Event(EventType.resume))
-
-    def resume_after_configure(self):
-        if self.status['paused']:
-            self.resume()
-            return
-        if self.last_mark is None:
-            idx = -1
-        else:
-            mark = self.mark_template.format(self.last_mark)
-            idx = self.current_marked_text.find(mark)
-        if idx == -1:
-            text = self.current_marked_text
-        else:
-            text = self.current_marked_text[idx:]
-        self.nsss.speak(text)
-        self.status = {'synthesizing': True, 'paused': False}
-        if self.current_callback is not None:
-            self.current_callback(Event(EventType.resume))
-
-    def stop(self):
-        self.nsss.stop()
-
-    @property
-    def rate(self):
-        return self.nss.get_current_rate()
-
-    @rate.setter
-    def rate(self, val):
-        val = val or self.default_system_rate
-        self.nss.set_current_rate(float(val))
-
-    def get_voice_data(self):
-        ans = getattr(self, 'voice_data', None)
-        if ans is None:
-            ans = self.voice_data = self.nsss.get_all_voices()
-        return ans
-
-    def config_widget(self, backend_settings, parent):
-        from calibre.gui2.tts.macos_config import Widget
-        return Widget(self, backend_settings, parent)
-
-    def change_rate(self, steps=1):
-        rate = current_rate = self.settings.get('rate', self.default_system_rate)
-        step_size = (self.max_rate - self.min_rate) // 10
-        rate += steps * step_size
-        rate = max(self.min_rate, min(rate, self.max_rate))
-        if rate != current_rate:
-            self.settings['rate'] = rate
-            prev_state = self.status.copy()
-            self.pause()
-            self.apply_settings()
-            if prev_state['synthesizing']:
-                self.status = {'synthesizing': True, 'paused': False}
-                self.resume_after_configure()
-            return self.settings
diff --git a/src/calibre/gui2/tts/macos_config.py b/src/calibre/gui2/tts/macos_config.py
deleted file mode 100644
index fd69fa4464..0000000000
--- a/src/calibre/gui2/tts/macos_config.py
+++ /dev/null
@@ -1,188 +0,0 @@
-#!/usr/bin/env python
-# License: GPL v3 Copyright: 2020, Kovid Goyal <kovid at kovidgoyal.net>
-
-from contextlib import suppress
-
-from qt.core import (
-    QAbstractItemView,
-    QAbstractTableModel,
-    QByteArray,
-    QFontMetrics,
-    QFormLayout,
-    QItemSelectionModel,
-    QSlider,
-    QSortFilterProxyModel,
-    Qt,
-    QTableView,
-    QWidget,
-)
-
-from calibre.gui2.widgets import BusyCursor
-
-
-class VoicesModel(QAbstractTableModel):
-
-    system_default_voice = ''
-
-    def __init__(self, voice_data, parent=None):
-        super().__init__(parent)
-        self.voice_data = voice_data
-        gmap = {'VoiceGenderNeuter': _('neutral'), 'VoiceGenderFemale': _('female'), 'VoiceGenderMale': _('male')}
-
-        def gender(x):
-            return gmap.get(x, x)
-
-        def language(x):
-            return x.get('language_display_name') or x['locale_id'] or ''
-
-        self.current_voices = tuple((x['name'], language(x), x['age'], gender(x['gender'])) for x in voice_data.values())
-        self.voice_ids = tuple(voice_data)
-        self.column_headers = _('Name'), _('Language'), _('Age'), _('Gender')
-
-    def rowCount(self, parent=None):
-        return len(self.current_voices) + 1
-
-    def columnCount(self, parent=None):
-        return len(self.column_headers)
-
-    def headerData(self, section, orientation, role=Qt.ItemDataRole.DisplayRole):
-        if role == Qt.ItemDataRole.DisplayRole and orientation == Qt.Orientation.Horizontal:
-            return self.column_headers[section]
-        return super().headerData(section, orientation, role)
-
-    def data(self, index, role=Qt.ItemDataRole.DisplayRole):
-        if role == Qt.ItemDataRole.DisplayRole:
-            row = index.row()
-            with suppress(IndexError):
-                if row == 0:
-                    return (_('System default'), '', '', '')[index.column()]
-                data = self.current_voices[row - 1]
-                col = index.column()
-                ans = data[col] or ''
-                return ans
-        if role == Qt.ItemDataRole.UserRole:
-            row = index.row()
-            with suppress(IndexError):
-                if row == 0:
-                    return self.system_default_voice
-                return self.voice_ids[row - 1]
-
-    def index_for_voice(self, v):
-        r = 0
-        if v != self.system_default_voice:
-            try:
-                idx = self.voice_ids.index(v)
-            except Exception:
-                return
-            r = idx + 1
-        return self.index(r, 0)
-
-
-class Widget(QWidget):
-
-    def __init__(self, tts_client, initial_backend_settings=None, parent=None):
-        QWidget.__init__(self, parent)
-        self.l = l = QFormLayout(self)
-        self.tts_client = tts_client
-
-        with BusyCursor():
-            self.voice_data = self.tts_client.get_voice_data()
-            self.default_system_rate = self.tts_client.default_system_rate
-
-        self.speed = s = QSlider(Qt.Orientation.Horizontal, self)
-        s.setMinimumWidth(200)
-        l.addRow(_('&Speed of speech (words per minute):'), s)
-        s.setRange(self.tts_client.min_rate, self.tts_client.max_rate)
-        s.setTickPosition(QSlider.TickPosition.TicksAbove)
-        s.setTickInterval((s.maximum() - s.minimum()) // 2)
-        s.setSingleStep(10)
-
-        self.voices = v = QTableView(self)
-        self.voices_model = VoicesModel(self.voice_data, parent=v)
-        self.proxy_model = p = QSortFilterProxyModel(self)
-        p.setFilterCaseSensitivity(Qt.CaseSensitivity.CaseInsensitive)
-        p.setSourceModel(self.voices_model)
-        v.setModel(p)
-        v.setSelectionBehavior(QAbstractItemView.SelectionBehavior.SelectRows)
-        v.setSortingEnabled(True)
-        v.horizontalHeader().resizeSection(0, QFontMetrics(self.font()).averageCharWidth() * 20)
-        v.horizontalHeader().resizeSection(1, QFontMetrics(self.font()).averageCharWidth() * 30)
-        v.verticalHeader().close()
-        v.setSelectionMode(QAbstractItemView.SelectionMode.SingleSelection)
-        v.sortByColumn(0, Qt.SortOrder.AscendingOrder)
-        l.addRow(v)
-
-        self.backend_settings = initial_backend_settings or {}
-
-    def restore_state(self, prefs):
-        data = prefs.get(f'{self.tts_client.name}-voice-table-state')
-        if data is not None:
-            self.voices.horizontalHeader().restoreState(QByteArray(data))
-
-    def save_state(self, prefs):
-        data = bytearray(self.voices.horizontalHeader().saveState())
-        prefs.set(f'{self.tts_client.name}-voice-table-state', data)
-
-    def restore_to_defaults(self):
-        self.backend_settings = {}
-
-    def sizeHint(self):
-        ans = super().sizeHint()
-        ans.setHeight(max(ans.height(), 600))
-        ans.setWidth(max(ans.width(), 500))
-        return ans
-
-    @property
-    def selected_voice(self):
-        for x in self.voices.selectedIndexes():
-            return x.data(Qt.ItemDataRole.UserRole)
-
-    @selected_voice.setter
-    def selected_voice(self, val):
-        val = val or VoicesModel.system_default_voice
-        idx = self.voices_model.index_for_voice(val)
-        if idx is not None:
-            idx = self.proxy_model.mapFromSource(idx)
-            self.voices.selectionModel().select(idx, QItemSelectionModel.SelectionFlag.ClearAndSelect | QItemSelectionModel.SelectionFlag.Rows)
-            self.voices.scrollTo(idx)
-
-    @property
-    def rate(self):
-        return self.speed.value()
-
-    @rate.setter
-    def rate(self, val):
-        val = int(val or self.default_system_rate)
-        self.speed.setValue(val)
-
-    @property
-    def backend_settings(self):
-        ans = {}
-        voice = self.selected_voice
-        if voice and voice != VoicesModel.system_default_voice:
-            ans['voice'] = voice
-        rate = self.rate
-        if rate and rate != self.default_system_rate:
-            ans['rate'] = rate
-        return ans
-
-    @backend_settings.setter
-    def backend_settings(self, val):
-        voice = val.get('voice') or VoicesModel.system_default_voice
-        self.selected_voice = voice
-        self.rate = val.get('rate') or self.default_system_rate
-
-
-def develop():
-    from calibre.gui2 import Application
-    from calibre.gui2.tts.implementation import Client
-    app = Application([])
-    c = Client()
-    w = Widget(c, {})
-    w.show()
-    app.exec()
-    print(w.backend_settings)
-
-
-if __name__ == '__main__':
-    develop()
diff --git a/src/calibre/gui2/tts/nsss.m b/src/calibre/gui2/tts/nsss.m
deleted file mode 100644
index ee19fe5614..0000000000
--- a/src/calibre/gui2/tts/nsss.m
+++ /dev/null
@@ -1,311 +0,0 @@
-/*
- * nsss.m
- * Copyright (C) 2020 Kovid Goyal <kovid at kovidgoyal.net>
- *
- * Distributed under terms of the GPL3 license.
- */
-#define PY_SSIZE_T_CLEAN
-
-#include <Python.h>
-#import <AppKit/AppKit.h>
-// Structures {{{
-typedef struct {
-    PyObject_HEAD
-    NSSpeechSynthesizer *nsss;
-	PyObject *callback;
-} NSSS;
-
-typedef enum { MARK, END } MessageType;
-
-static PyTypeObject NSSSType = {
-    PyVarObject_HEAD_INIT(NULL, 0)
-};
-
-static void
-dispatch_message(NSSS *self, MessageType which, unsigned int val) {
-	PyGILState_STATE state = PyGILState_Ensure();
-	PyObject *ret = PyObject_CallFunction(self->callback, "iI", which, val);
-	if (ret) Py_DECREF(ret);
-	else PyErr_Print();
-	PyGILState_Release(state);
-}
-
-@interface SynthesizerDelegate : NSObject <NSSpeechSynthesizerDelegate> {
-	@private
-	NSSS *nsss;
-}
-
-- (id)initWithNSSS:(NSSS *)x;
-@end
-
-@implementation SynthesizerDelegate
-
-- (id)initWithNSSS:(NSSS *)x {
-    self = [super init];
-    nsss = x;
-    return self;
-}
-
-- (void)speechSynthesizer:(NSSpeechSynthesizer *)sender didFinishSpeaking:(BOOL)success {
-	dispatch_message(nsss, END, success);
-}
-
-- (void)speechSynthesizer:(NSSpeechSynthesizer *)sender didEncounterSyncMessage:(NSString *)message {
-	NSError *err = nil;
-	NSNumber *syncProp = (NSNumber*) [sender objectForProperty: NSSpeechRecentSyncProperty error: &err];
-	if (syncProp && !err) dispatch_message(nsss, MARK, syncProp.unsignedIntValue);
-}
-
-@end
-// }}}
-
-static PyObject *
-NSSS_new(PyTypeObject *type, PyObject *args, PyObject *kwds) {
-	PyObject *callback;
-	if (!PyArg_ParseTuple(args, "O", &callback)) return NULL;
-	if (!PyCallable_Check(callback)) { PyErr_SetString(PyExc_TypeError, "callback must be a callable"); return NULL; }
-	NSSS *self = (NSSS *) type->tp_alloc(type, 0);
-	if (self) {
-		self->callback = callback;
-		Py_INCREF(callback);
-		self->nsss = [[NSSpeechSynthesizer alloc] initWithVoice:nil];
-		if (self->nsss) {
-			self->nsss.delegate = [[SynthesizerDelegate alloc] initWithNSSS:self];
-		} else return PyErr_NoMemory();
-	}
-	return (PyObject*)self;
-}
-
-static void
-NSSS_dealloc(NSSS *self) {
-	if (self->nsss) {
-		if (self->nsss.delegate) [self->nsss.delegate release];
-		self->nsss.delegate = nil;
-		[self->nsss release];
-	}
-	self->nsss = nil;
-	Py_CLEAR(self->callback);
-}
-
-static PyObject*
-as_python(NSObject *x) {
-	if (!x) Py_RETURN_NONE;
-	if ([x isKindOfClass:[NSString class]]) {
-		NSString *s = (NSString*)x;
-		return PyUnicode_FromString([s UTF8String]);
-	}
-	if ([x isKindOfClass:[NSNumber class]]) {
-		NSNumber *n = (NSNumber*)x;
-		return PyFloat_FromDouble([n doubleValue]);
-	}
-	Py_RETURN_NONE;
-}
-
-static PyObject*
-NSSS_get_all_voices(NSSS *self, PyObject *args) {
-	PyObject *ans = PyDict_New();
-	if (!ans) return NULL;
-	NSLocale *locale = [NSLocale autoupdatingCurrentLocale];
-	for (NSSpeechSynthesizerVoiceName voice_id in [NSSpeechSynthesizer availableVoices]) {
-		NSDictionary *attributes = [NSSpeechSynthesizer attributesForVoice:voice_id];
-		if (attributes) {
-			NSObject *lang_key = [attributes objectForKey:NSVoiceLocaleIdentifier];
-			const char *lang_name = NULL;
-			if (lang_key && [lang_key isKindOfClass:[NSString class]]) {
-				NSString *display_name = [locale displayNameForKey:NSLocaleIdentifier value:(NSString*)lang_key];
-				if (display_name) lang_name = [display_name UTF8String];
-			}
-#define E(x, y) #x, as_python([attributes objectForKey:y])
-			PyObject *v = Py_BuildValue("{sN sN sN sN sN sz}",
-					E(name, NSVoiceName), E(age, NSVoiceAge), E(gender, NSVoiceGender),
-					E(demo_text, NSVoiceDemoText), E(locale_id, NSVoiceLocaleIdentifier), "language_display_name", lang_name);
-			if (!v) { Py_DECREF(ans); return NULL; }
-#undef E
-			if (PyDict_SetItemString(ans, [voice_id UTF8String], v) != 0) {
-				Py_DECREF(ans); Py_DECREF(v); return NULL;
-			}
-			Py_DECREF(v);
-		}
-	}
-	return ans;
-}
-
-static PyObject*
-NSSS_set_command_delimiters(NSSS *self, PyObject *args) {
-	// this function doesn't actually work
-	// https://openradar.appspot.com/6524554
-	const char *left, *right;
-	if (!PyArg_ParseTuple(args, "ss", &left, &right)) return NULL;
-	NSError *err = nil;
-	[self->nsss setObject:@{NSSpeechCommandPrefix:@(left), NSSpeechCommandSuffix:@(right)} forProperty:NSSpeechCommandDelimiterProperty error:&err];
-	if (err) {
-		PyErr_SetString(PyExc_OSError, [[NSString stringWithFormat:@"Failed to set delimiters: %@", err] UTF8String]);
-		return NULL;
-	}
-	Py_RETURN_NONE;
-}
-
-static PyObject*
-NSSS_get_current_voice(NSSS *self, PyObject *args) {
-	return Py_BuildValue("s", [[self->nsss voice] UTF8String]);
-}
-
-static PyObject*
-NSSS_set_current_voice(NSSS *self, PyObject *args) {
-	const char *name;
-	if (!PyArg_ParseTuple(args, "s", &name)) return NULL;
-	BOOL ok = [self->nsss setVoice:@(name)];
-	if (ok) Py_RETURN_TRUE;
-	Py_RETURN_FALSE;
-}
-
-static PyObject*
-NSSS_any_application_speaking(NSSS *self, PyObject *args) {
-	return Py_BuildValue("O", NSSpeechSynthesizer.anyApplicationSpeaking ? Py_True : Py_False);
-}
-
-static PyObject*
-NSSS_speaking(NSSS *self, PyObject *args) {
-	return Py_BuildValue("O", self->nsss.speaking ? Py_True : Py_False);
-}
-
-static PyObject*
-NSSS_get_current_volume(NSSS *self, PyObject *args) {
-	return Py_BuildValue("f", self->nsss.volume);
-}
-
-static PyObject*
-NSSS_set_current_volume(NSSS *self, PyObject *args) {
-	float vol;
-	if (!PyArg_ParseTuple(args, "f", &vol)) return NULL;
-	self->nsss.volume = vol;
-	return Py_BuildValue("f", self->nsss.volume);
-}
-
-static PyObject*
-NSSS_get_current_rate(NSSS *self, PyObject *args) {
-	return Py_BuildValue("f", self->nsss.rate);
-}
-
-static PyObject*
-NSSS_set_current_rate(NSSS *self, PyObject *args) {
-	float vol;
-	if (!PyArg_ParseTuple(args, "f", &vol)) return NULL;
-	self->nsss.rate = vol;
-	return Py_BuildValue("f", self->nsss.rate);
-}
-
-static PyObject*
-NSSS_speak(NSSS *self, PyObject *args) {
-	const char *text;
-	if (!PyArg_ParseTuple(args, "s", &text)) return NULL;
-	if ([self->nsss startSpeakingString:@(text)]) Py_RETURN_TRUE;
-	Py_RETURN_FALSE;
-}
-
-
-static PyObject*
-NSSS_start_saving_to_path(NSSS *self, PyObject *args) {
-	const char *text, *path;
-	if (!PyArg_ParseTuple(args, "ss", &text, &path)) return NULL;
-	NSURL *url = [NSURL fileURLWithPath:@(path) isDirectory: NO];
-	BOOL ok = [self->nsss startSpeakingString:@(text) toURL:url];
-	[url release];
-	if (ok) Py_RETURN_TRUE;
-	Py_RETURN_FALSE;
-}
-
-static PyObject*
-NSSS_status(NSSS *self, PyObject *args) {
-	NSError *err = nil;
-	NSDictionary *status = [self->nsss objectForProperty:NSSpeechStatusProperty error:&err];
-	if (err) {
-		PyErr_SetString(PyExc_OSError, [[err localizedDescription] UTF8String]);
-		return NULL;
-	}
-	PyObject *ans = PyDict_New();
-	if (ans) {
-		NSNumber *result = [status objectForKey:NSSpeechStatusOutputBusy];
-		if (result) {
-			if (PyDict_SetItemString(ans, "synthesizing", [result boolValue] ? Py_True : Py_False) != 0) { Py_CLEAR(ans); return NULL; }
-		}
-		result = [status objectForKey:NSSpeechStatusOutputPaused];
-		if (result) {
-			if (PyDict_SetItemString(ans, "paused", [result boolValue] ? Py_True : Py_False) != 0) { Py_CLEAR(ans); return NULL; }
-		}
-	}
-	return ans;
-}
-
-static PyObject*
-NSSS_pause(NSSS *self, PyObject *args) {
-	unsigned int boundary = NSSpeechWordBoundary;
-	if (!PyArg_ParseTuple(args, "|I", &boundary)) return NULL;
-	[self->nsss pauseSpeakingAtBoundary:boundary];
-	Py_RETURN_NONE;
-}
-
-static PyObject*
-NSSS_resume(NSSS *self, PyObject *args) {
-	[self->nsss continueSpeaking];
-	Py_RETURN_NONE;
-}
-
-static PyObject*
-NSSS_stop(NSSS *self, PyObject *args) {
-	[self->nsss stopSpeaking];
-	Py_RETURN_NONE;
-}
-
-
-// Boilerplate {{{
-#define M(name, args) { #name, (PyCFunction)NSSS_##name, args, ""}
-static PyMethodDef NSSS_methods[] = {
-    M(get_all_voices, METH_NOARGS),
-    M(status, METH_NOARGS),
-    M(resume, METH_NOARGS),
-    M(stop, METH_NOARGS),
-    M(speak, METH_VARARGS),
-    M(start_saving_to_path, METH_VARARGS),
-    M(speaking, METH_NOARGS),
-
-    M(any_application_speaking, METH_NOARGS),
-    M(get_current_voice, METH_NOARGS),
-    M(set_current_voice, METH_VARARGS),
-    M(get_current_volume, METH_NOARGS),
-    M(set_current_volume, METH_VARARGS),
-    M(get_current_rate, METH_NOARGS),
-    M(set_current_rate, METH_VARARGS),
-	M(set_command_delimiters, METH_VARARGS),
-	M(pause, METH_VARARGS),
-    {NULL, NULL, 0, NULL}
-};
-#undef M
-
-int
-nsss_init_module(PyObject *module) {
-    NSSSType.tp_name = "cocoa.NSSpeechSynthesizer";
-    NSSSType.tp_doc = "Wrapper for NSSpeechSynthesizer";
-    NSSSType.tp_basicsize = sizeof(NSSS);
-    NSSSType.tp_itemsize = 0;
-    NSSSType.tp_flags = Py_TPFLAGS_DEFAULT;
-    NSSSType.tp_new = NSSS_new;
-    NSSSType.tp_methods = NSSS_methods;
-	NSSSType.tp_dealloc = (destructor)NSSS_dealloc;
-	if (PyType_Ready(&NSSSType) < 0) return -1;
-
-	Py_INCREF(&NSSSType);
-    if (PyModule_AddObject(module, "NSSpeechSynthesizer", (PyObject *) &NSSSType) < 0) {
-        Py_DECREF(&NSSSType);
-        return -1;
-    }
-	PyModule_AddIntMacro(module, MARK);
-	PyModule_AddIntMacro(module, END);
-	PyModule_AddIntMacro(module, NSSpeechImmediateBoundary);
-	PyModule_AddIntMacro(module, NSSpeechWordBoundary);
-	PyModule_AddIntMacro(module, NSSpeechSentenceBoundary);
-
-	return 0;
-}
-
-// }}}
diff --git a/src/calibre/gui2/tts/windows.py b/src/calibre/gui2/tts/windows.py
deleted file mode 100644
index a3e3c78269..0000000000
--- a/src/calibre/gui2/tts/windows.py
+++ /dev/null
@@ -1,239 +0,0 @@
-#!/usr/bin/env python
-# License: GPL v3 Copyright: 2020, Kovid Goyal <kovid at kovidgoyal.net>
-
-from functools import partial
-
-from calibre.utils.windows.winspeech import Error, MarkReached, MediaState, MediaStateChanged, WinSpeech
-
-from .common import Event, EventType
-
-
-def split_into_chunks(marked_text, chunk_size):
-    chunk = []
-    tlen = 0
-    for x in marked_text:
-        if isinstance(x, int):
-            chunk.append(x)
-        else:
-            sz = len(x)
-            if tlen + sz > chunk_size:
-                mark = None
-                if chunk:
-                    if isinstance(chunk[-1], int):
-                        mark = chunk[-1]
-                        del chunk[-1]
-                    yield chunk
-                chunk = [] if mark is None else [mark]
-                tlen = sz
-                chunk.append(x)
-            else:
-                chunk.append(x)
-                tlen += sz
-    if chunk:
-        yield chunk
-
-
-def chunk_has_text(chunk):
-    for x in chunk:
-        if isinstance(x, str) and x:
-            return True
-    return False
-
-
-class Client:
-
-    mark_template = ''
-    name = 'winspeech'
-    min_rate = 0.5
-    max_rate = 6.0
-    default_system_rate = 1.0
-    chunk_size = 64 * 1024
-
-    @classmethod
-    def escape_marked_text(cls, text):
-        return text
-
-    def __init__(self, settings=None, dispatch_on_main_thread=lambda f: f()):
-        self.backend = WinSpeech(self.dispatch_msg)
-        self.last_mark = -1
-        self.current_callback = None
-        self.dispatch_on_main_thread = dispatch_on_main_thread
-        self.synthesizing = False
-        self.settings = settings or {}
-        self.clear_chunks()
-        self.default_system_audio_device = self.backend.get_audio_device().device
-        self.default_system_voice = self.backend.default_voice().voice
-        self.apply_settings()
-
-    def get_all_voices(self):
-        return self.backend.all_voices().voices
-
-    def get_all_audio_devices(self):
-        return self.backend.all_audio_devices().devices
-
-    def __del__(self):
-        if self.backend is not None:
-            self.backend.shutdown()
-            self.backend = None
-    shutdown = __del__
-
-    def dispatch_msg(self, msg):
-        self.dispatch_on_main_thread(partial(self.handle_event, msg))
-
-    def speak_current_chunk(self):
-        chunk = self.current_chunks[self.current_chunk_idx]
-        if chunk_has_text(chunk):
-            self.backend.speak(chunk, is_cued=True)
-        else:
-            self.handle_end_event()
-
-    def handle_end_event(self):
-        if self.current_chunk_idx >= len(self.current_chunks) - 1:
-            self.clear_chunks()
-            self.callback_ignoring_errors(Event(EventType.end))
-        else:
-            self.current_chunk_idx += 1
-            self.speak_current_chunk()
-
-    def handle_event(self, x):
-        if isinstance(x, MarkReached):
-            if self.current_chunks:
-                self.last_mark = x.id
-                self.callback_ignoring_errors(Event(EventType.mark, x.id))
-        elif isinstance(x, MediaStateChanged):
-            if self.current_chunks:
-                if x.state is MediaState.ended:
-                    self.handle_end_event()
-                elif x.state is MediaState.failed:
-                    self.clear_chunks()
-                    self.callback_ignoring_errors(Event(EventType.cancel))
-                    e = x.as_exception()
-                    e.display_to_user = True
-                    raise e
-                elif x.state is MediaState.opened:
-                    self.callback_ignoring_errors(Event(EventType.resume if self.next_start_is_resume else EventType.begin))
-                    self.next_start_is_resume = False
-        elif isinstance(x, Error):
-            raise x.as_exception(check_for_no_audio_devices=True)
-        else:
-            raise KeyError(f'Unknown event type: {x}')
-
-    def speak_simple_text(self, text):
-        self.backend.pause()
-        self.clear_chunks()
-        self.current_callback = None
-        if text:
-            self.backend.speak(text)
-
-    def speak_marked_text(self, text, callback):
-        self.backend.pause()
-        self.clear_chunks()
-        self.current_callback = callback
-        self.current_chunks = tuple(split_into_chunks(text, self.chunk_size))
-        self.current_chunk_idx = -100
-        if self.current_chunks:
-            self.current_chunk_idx = 0
-            self.speak_current_chunk()
-            self.synthesizing = True
-
-    def callback_ignoring_errors(self, ev):
-        if self.current_callback is not None:
-            try:
-                self.current_callback(ev)
-            except Exception:
-                import traceback
-                traceback.print_exc()
-
-    def clear_chunks(self):
-        self.synthesizing = False
-        self.next_start_is_resume = False
-        self.current_chunk_idx = -100
-        self.current_chunks = ()
-        self.last_mark = -1
-
-    def stop(self):
-        self.backend.pause()
-        self.synthesizing = False
-        self.clear_chunks()
-        if self.current_callback is not None:
-            self.current_callback(Event(EventType.cancel))
-
-    def pause(self):
-        self.backend.pause()
-        self.synthesizing = False
-        if self.current_callback is not None:
-            self.current_callback(Event(EventType.pause))
-
-    def resume(self):
-        self.backend.play()
-        self.synthesizing = True
-        if self.current_callback is not None:
-            self.current_callback(Event(EventType.resume))
-
-    def apply_settings(self, new_settings=None):
-        was_synthesizing = self.synthesizing
-        if self.synthesizing:
-            self.pause()
-        if new_settings is not None:
-            self.settings = new_settings
-        try:
-            self.backend.set_voice(self.settings.get('voice'), self.default_system_voice)
-        except OSError:
-            import traceback
-            traceback.print_exc()
-            self.settings.pop('voice', None)
-        try:
-            self.backend.set_rate(self.settings.get('rate', self.default_system_rate))
-        except OSError:
-            import traceback
-            traceback.print_exc()
-            self.settings.pop('rate', None)
-        try:
-            self.backend.set_audio_device(self.settings.get('sound_output'), self.default_system_audio_device)
-        except OSError:
-            import traceback
-            traceback.print_exc()
-            self.settings.pop('sound_output', None)
-        if was_synthesizing:
-            self.resume_after_configure()
-
-    def config_widget(self, backend_settings, parent):
-        from calibre.gui2.tts.windows_config import Widget
-        return Widget(self, backend_settings, parent)
-
-    def chunks_from_last_mark(self):
-        if self.last_mark > -1:
-            for i, chunk in enumerate(self.current_chunks):
-                for ci, x in enumerate(chunk):
-                    if x == self.last_mark:
-                        chunks = self.current_chunks[i:]
-                        chunk = chunk[ci + 1:]
-                        if chunk:
-                            chunks = (chunk,) + chunks[1:]
-                        else:
-                            chunks = chunks[1:]
-                        return chunks
-        return ()
-
-    def resume_after_configure(self):
-        self.current_chunks = self.chunks_from_last_mark()
-        self.current_chunk_idx = -100
-        self.last_mark = -1
-        self.next_start_is_resume = True
-        self.synthesizing = bool(self.current_chunks)
-        if self.synthesizing:
-            self.current_chunk_idx = 0
-            self.speak_current_chunk()
-
-    def change_rate(self, steps=1):
-        rate = current_rate = self.settings.get('rate', self.default_system_rate)
-        if rate < 1:
-            step_size = 0.1
-        else:
-            step_size = 0.5
-        rate += steps * step_size
-        rate = max(self.min_rate, min(rate, self.max_rate))
-        if rate != current_rate:
-            self.settings['rate'] = rate
-            self.apply_settings()
-            return self.settings
diff --git a/src/calibre/gui2/tts/windows_config.py b/src/calibre/gui2/tts/windows_config.py
deleted file mode 100644
index c044408a8b..0000000000
--- a/src/calibre/gui2/tts/windows_config.py
+++ /dev/null
@@ -1,214 +0,0 @@
-#!/usr/bin/env python
-# License: GPL v3 Copyright: 2020, Kovid Goyal <kovid at kovidgoyal.net>
-
-from contextlib import suppress
-
-from qt.core import (
-    QAbstractItemView,
-    QAbstractTableModel,
-    QByteArray,
-    QComboBox,
-    QFontMetrics,
-    QFormLayout,
-    QItemSelectionModel,
-    QSlider,
-    QSortFilterProxyModel,
-    Qt,
-    QTableView,
-    QWidget,
-)
-
-from calibre.gui2.widgets import BusyCursor
-
-
-class VoicesModel(QAbstractTableModel):
-
-    system_default_voice = '__default__'
-
-    def __init__(self, voice_data, parent=None):
-        super().__init__(parent)
-        self.voice_data = voice_data
-        self.current_voices = tuple((x.display_name, x.language,  x.gender, x.id) for x in voice_data)
-        self.column_headers = _('Name'), _('Language'), _('Gender')
-
-    def rowCount(self, parent=None):
-        return len(self.current_voices) + 1
-
-    def columnCount(self, parent=None):
-        return len(self.column_headers)
-
-    def headerData(self, section, orientation, role=Qt.ItemDataRole.DisplayRole):
-        if role == Qt.ItemDataRole.DisplayRole and orientation == Qt.Orientation.Horizontal:
-            return self.column_headers[section]
-        return super().headerData(section, orientation, role)
-
-    def data(self, index, role=Qt.ItemDataRole.DisplayRole):
-        if role == Qt.ItemDataRole.DisplayRole:
-            row = index.row()
-            with suppress(IndexError):
-                if row == 0:
-                    return (_('System default'), '', '', '')[index.column()]
-                data = self.current_voices[row - 1]
-                col = index.column()
-                ans = data[col] or ''
-                return ans
-        if role == Qt.ItemDataRole.UserRole:
-            row = index.row()
-            with suppress(IndexError):
-                if row == 0:
-                    return self.system_default_voice
-                return self.current_voices[row - 1][3]
-
-    def index_for_voice(self, v):
-        r = 0
-        if v != self.system_default_voice:
-            for i, x in enumerate(self.current_voices):
-                if x[3] == v:
-                    r = i + 1
-                    break
-            else:
-                return
-        return self.index(r, 0)
-
-
-class Widget(QWidget):
-
-    def __init__(self, tts_client, initial_backend_settings=None, parent=None):
-        QWidget.__init__(self, parent)
-        self.l = l = QFormLayout(self)
-        self.tts_client = tts_client
-
-        with BusyCursor():
-            self.voice_data = self.tts_client.get_all_voices()
-            self.default_system_rate = self.tts_client.default_system_rate
-            self.all_sound_outputs = self.tts_client.get_all_audio_devices()
-            self.default_system_audio_device = self.tts_client.default_system_audio_device
-
-        self.speed = s = QSlider(Qt.Orientation.Horizontal, self)
-        s.setMinimumWidth(200)
-        l.addRow(_('&Speed of speech:'), s)
-        s.setRange(int(self.tts_client.min_rate * 100), int(100 * self.tts_client.max_rate))
-        s.setSingleStep(10)
-        s.setPageStep(40)
-
-        self.voices = v = QTableView(self)
-        self.voices_model = VoicesModel(self.voice_data, parent=v)
-        self.proxy_model = p = QSortFilterProxyModel(self)
-        p.setFilterCaseSensitivity(Qt.CaseSensitivity.CaseInsensitive)
-        p.setSourceModel(self.voices_model)
-        v.setModel(p)
-        v.setSelectionBehavior(QAbstractItemView.SelectionBehavior.SelectRows)
-        v.setSortingEnabled(True)
-        v.horizontalHeader().resizeSection(0, QFontMetrics(self.font()).averageCharWidth() * 25)
-        v.horizontalHeader().resizeSection(1, QFontMetrics(self.font()).averageCharWidth() * 30)
-        v.verticalHeader().close()
-        v.verticalHeader().close()
-        v.setSelectionMode(QAbstractItemView.SelectionMode.SingleSelection)
-        v.sortByColumn(0, Qt.SortOrder.AscendingOrder)
-        l.addRow(v)
-
-        self.sound_outputs = so = QComboBox(self)
-        so.addItem(_('System default'), ())
-        for x in self.all_sound_outputs:
-            so.addItem(x.name, x.spec())
-        l.addRow(_('Sound output:'), so)
-
-        self.backend_settings = initial_backend_settings or {}
-
-    def restore_state(self, prefs):
-        data = prefs.get(f'{self.tts_client.name}-voice-table-state')
-        if data is not None:
-            self.voices.horizontalHeader().restoreState(QByteArray(data))
-
-    def save_state(self, prefs):
-        data = bytearray(self.voices.horizontalHeader().saveState())
-        prefs.set(f'{self.tts_client.name}-voice-table-state', data)
-
-    def restore_to_defaults(self):
-        self.backend_settings = {}
-
-    def sizeHint(self):
-        ans = super().sizeHint()
-        ans.setHeight(max(ans.height(), 600))
-        ans.setWidth(max(ans.width(), 500))
-        return ans
-
-    @property
-    def selected_voice(self):
-        for x in self.voices.selectedIndexes():
-            return x.data(Qt.ItemDataRole.UserRole)
-
-    @selected_voice.setter
-    def selected_voice(self, val):
-        val = val or VoicesModel.system_default_voice
-        idx = self.voices_model.index_for_voice(val)
-        if idx is not None:
-            idx = self.proxy_model.mapFromSource(idx)
-            self.voices.selectionModel().select(idx, QItemSelectionModel.SelectionFlag.ClearAndSelect | QItemSelectionModel.SelectionFlag.Rows)
-            self.voices.scrollTo(idx)
-
-    @property
-    def rate(self):
-        return self.speed.value() / 100
-
-    @rate.setter
-    def rate(self, val):
-        val = int((val or self.default_system_rate) * 100)
-        self.speed.setValue(val)
-
-    @property
-    def sound_output(self):
-        return self.sound_outputs.currentData()
-
-    @sound_output.setter
-    def sound_output(self, val):
-        idx = 0
-        if val:
-            val = tuple(val)
-            for q in range(self.sound_outputs.count()):
-                x = self.sound_outputs.itemData(q)
-                if x == val:
-                    idx = q
-                    break
-        self.sound_outputs.setCurrentIndex(idx)
-
-    @property
-    def backend_settings(self):
-        ans = {}
-        voice = self.selected_voice
-        if voice and voice != VoicesModel.system_default_voice:
-            ans['voice'] = voice
-        rate = self.rate
-        if rate and rate != self.default_system_rate:
-            ans['rate'] = rate
-        so = self.sound_output
-        if so:
-            ans['sound_output'] = so
-        return ans
-
-    @backend_settings.setter
-    def backend_settings(self, val):
-        voice = val.get('voice') or VoicesModel.system_default_voice
-        self.selected_voice = voice
-        self.rate = val.get('rate', self.default_system_rate)
-        self.sound_output = val.get('sound_output') or ()
-
-
-def develop():
-    from calibre.gui2 import Application
-    from calibre.gui2.tts.implementation import Client
-    from calibre.gui2.viewer.config import vprefs
-    s = vprefs.get('tts_winspeech') or {}
-    print(s)
-    print(flush=True)
-    app = Application([])
-    c = Client()
-    w = Widget(c, s)
-    w.show()
-    app.exec()
-    print(flush=True)
-    print(w.backend_settings)
-
-
-if __name__ == '__main__':
-    develop()
diff --git a/src/calibre/gui2/tts/windows_sapi.py b/src/calibre/gui2/tts/windows_sapi.py
deleted file mode 100644
index 853271165d..0000000000
--- a/src/calibre/gui2/tts/windows_sapi.py
+++ /dev/null
@@ -1,285 +0,0 @@
-#!/usr/bin/env python
-# License: GPL v3 Copyright: 2020, Kovid Goyal <kovid at kovidgoyal.net>
-
-
-from threading import Thread
-from time import monotonic
-from typing import NamedTuple
-
-from calibre import prepare_string_for_xml
-
-from .common import Event, EventType, add_markup
-
-
-class QueueEntry(NamedTuple):
-    stream_number: int
-    text: str
-
-
-class SpeechQueue:
-
-    def __init__(self):
-        self.clear()
-
-    def __len__(self):
-        return len(self.items)
-
-    def clear(self, keep_mark=False):
-        self.items = []
-        self.pos = -1
-        if not keep_mark:
-            self.last_mark = None
-
-    def add(self, stream_number, text):
-        self.items.append(QueueEntry(stream_number, text))
-
-    def start(self, stream_number):
-        self.pos = -1
-        for i, x in enumerate(self.items):
-            if x.stream_number == stream_number:
-                self.pos = i
-                break
-
-    @property
-    def is_at_start(self):
-        return self.pos == 0
-
-    @property
-    def is_at_end(self):
-        return self.pos >= len(self.items) - 1
-
-    @property
-    def current_stream_number(self):
-        if -1 < self.pos < len(self.items):
-            return self.items[self.pos].stream_number
-
-    def resume_from_last_mark(self, mark_template):
-        if self.pos < 0 or self.pos >= len(self.items):
-            return
-        item = self.items[self.pos]
-        if self.last_mark is None:
-            idx = -1
-        else:
-            idx = item.text.find(mark_template.format(self.last_mark))
-        if idx == -1:
-            text = item.text
-        else:
-            text = item.text[idx:]
-        yield text
-        for i in range(self.pos + 1, len(self.items)):
-            yield self.items[i].text
-
-
-class Client:
-
-    mark_template = '<bookmark mark="{}"/>'
-    name = 'sapi'
-    min_rate = -10
-    max_rate = 10
-    chunk_size = 128 * 1024
-
-    @classmethod
-    def escape_marked_text(cls, text):
-        return prepare_string_for_xml(text)
-
-    def __init__(self, settings=None, dispatch_on_main_thread=lambda f: f()):
-        self.create_voice()
-        self.ignore_next_stop_event = None
-        self.ignore_next_start_event = False
-        self.default_system_rate = self.sp_voice.get_current_rate()
-        self.default_system_voice = self.sp_voice.get_current_voice()
-        self.default_system_sound_output = self.sp_voice.get_current_sound_output()
-        self.current_stream_queue = SpeechQueue()
-        self.current_callback = None
-        self.dispatch_on_main_thread = dispatch_on_main_thread
-        self.synthesizing = False
-        self.pause_count = 0
-        self.settings = settings or {}
-        self.apply_settings()
-
-    @property
-    def status(self):
-        return {'synthesizing': self.synthesizing, 'paused': self.pause_count > 0}
-
-    def clear_pauses(self):
-        while self.pause_count:
-            self.sp_voice.resume()
-            self.pause_count -= 1
-
-    def create_voice(self):
-        from calibre.utils.windows.winsapi import ISpVoice
-        self.sp_voice = ISpVoice()
-        self.events_thread = Thread(name='SAPIEvents', target=self.wait_for_events, daemon=True)
-        self.events_thread.start()
-
-    def __del__(self):
-        if self.sp_voice is not None:
-            self.sp_voice.shutdown_event_loop()
-            self.events_thread.join(5)
-            self.sp_voice = None
-    shutdown = __del__
-
-    def apply_settings(self, new_settings=None):
-        if self.pause_count:
-            self.clear_pauses()
-            self.ignore_next_stop_event = monotonic()
-            self.synthesizing = False
-        if new_settings is not None:
-            self.settings = new_settings
-        try:
-            self.sp_voice.set_current_rate(self.settings.get('rate', self.default_system_rate))
-        except OSError:
-            self.settings.pop('rate', None)
-        try:
-            self.sp_voice.set_current_voice(self.settings.get('voice') or self.default_system_voice)
-        except OSError:
-            self.settings.pop('voice', None)
-        try:
-            self.sp_voice.set_current_sound_output(self.settings.get('sound_output') or self.default_system_sound_output)
-        except OSError:
-            self.settings.pop('sound_output', None)
-
-    def wait_for_events(self):
-        while True:
-            if self.sp_voice.wait_for_event() is False:
-                break
-            self.dispatch_on_main_thread(self.handle_events)
-
-    def handle_events(self):
-        from calibre_extensions.winsapi import SPEI_END_INPUT_STREAM, SPEI_START_INPUT_STREAM, SPEI_TTS_BOOKMARK
-        c = self.current_callback
-
-        for (stream_number, event_type, event_data) in self.sp_voice.get_events():
-            if event_type == SPEI_TTS_BOOKMARK:
-                self.current_stream_queue.last_mark = event_data
-                event = Event(EventType.mark, event_data)
-            elif event_type == SPEI_START_INPUT_STREAM:
-                self.current_stream_queue.start(stream_number)
-                if self.ignore_next_start_event:
-                    self.ignore_next_start_event = False
-                    continue
-                self.synthesizing = True
-                if not self.current_stream_queue.is_at_start:
-                    continue
-                event = Event(EventType.begin)
-            elif event_type == SPEI_END_INPUT_STREAM:
-                if self.ignore_next_stop_event is not None and monotonic() - self.ignore_next_stop_event < 2:
-                    self.ignore_next_stop_event = None
-                    continue
-                self.synthesizing = False
-                if not self.current_stream_queue.is_at_end:
-                    continue
-                event = Event(EventType.end)
-            else:
-                continue
-            if c is not None and stream_number == self.current_stream_queue.current_stream_number:
-                try:
-                    c(event)
-                except Exception:
-                    import traceback
-                    traceback.print_exc()
-
-    def speak_implementation(self, *args):
-        try:
-            return self.sp_voice.speak(*args)
-        except OSError as err:
-            # see https://docs.microsoft.com/en-us/previous-versions/office/developer/speech-technologies/jj127491(v=msdn.10)
-            import re
-            hr = int(re.search(r'\[hr=(0x\S+)', str(err)).group(1), 16)
-            if hr == 0x8004503a:
-                raise OSError(_('No active audio output devices found. Connect headphones or speakers.')) from err
-            raise
-
-    def speak(self, text, is_xml=False, want_events=True, purge=True):
-        from calibre_extensions.winsapi import SPF_ASYNC, SPF_IS_NOT_XML, SPF_IS_XML, SPF_PURGEBEFORESPEAK
-        flags = SPF_IS_XML if is_xml else SPF_IS_NOT_XML
-        if purge:
-            flags |= SPF_PURGEBEFORESPEAK
-        return self.speak_implementation(text, flags | SPF_ASYNC, want_events)
-
-    def purge(self):
-        from calibre_extensions.winsapi import SPF_PURGEBEFORESPEAK
-        self.speak_implementation('', SPF_PURGEBEFORESPEAK, False)
-        self.synthesizing = False
-
-    def speak_simple_text(self, text):
-        self.current_callback = None
-        self.current_stream_queue.clear()
-        number = self.speak(text)
-        self.clear_pauses()
-        self.current_stream_queue.add(number, text)
-
-    def speak_marked_text(self, text, callback):
-        self.clear_pauses()
-        self.current_stream_queue.clear()
-        if self.synthesizing:
-            self.ignore_next_stop_event = monotonic()
-        self.current_callback = callback
-        for i, chunk in enumerate(add_markup(text, self.mark_template, self.escape_marked_text, self.chunk_size)):
-            number = self.speak(chunk, is_xml=True, purge=i == 0)
-            self.current_stream_queue.add(number, chunk)
-
-    def stop(self):
-        self.clear_pauses()
-        self.purge()
-        if self.current_callback is not None:
-            self.current_callback(Event(EventType.cancel))
-        self.current_callback = None
-
-    def pause(self):
-        self.sp_voice.pause()
-        self.pause_count += 1
-        if self.current_callback is not None:
-            self.current_callback(Event(EventType.pause))
-
-    def resume(self):
-        if self.pause_count:
-            self.clear_pauses()
-            if self.current_callback is not None:
-                self.current_callback(Event(EventType.resume))
-
-    def resume_after_configure(self):
-        if self.pause_count:
-            self.clear_pauses()
-            return
-        chunks = tuple(self.current_stream_queue.resume_from_last_mark(self.mark_template))
-        self.ignore_next_start_event = True
-        self.current_stream_queue.clear(keep_mark=True)
-        self.purge()
-        for chunk in chunks:
-            number = self.speak(chunk, is_xml=True, purge=False)
-            self.current_stream_queue.add(number, chunk)
-        if self.current_callback is not None:
-            self.current_callback(Event(EventType.resume))
-        self.synthesizing = bool(chunks)
-
-    def get_voice_data(self):
-        ans = getattr(self, 'voice_data', None)
-        if ans is None:
-            ans = self.voice_data = self.sp_voice.get_all_voices()
-        return ans
-
-    def get_sound_outputs(self):
-        ans = getattr(self, 'sound_outputs', None)
-        if ans is None:
-            ans = self.sound_outputs = self.sp_voice.get_all_sound_outputs()
-        return ans
-
-    def config_widget(self, backend_settings, parent):
-        from calibre.gui2.tts.windows_sapi_config import Widget
-        return Widget(self, backend_settings, parent)
-
-    def change_rate(self, steps=1):
-        rate = current_rate = self.settings.get('rate', self.default_system_rate)
-        step_size = (self.max_rate - self.min_rate) // 10
-        rate += steps * step_size
-        rate = max(self.min_rate, min(rate, self.max_rate))
-        if rate != current_rate:
-            self.settings['rate'] = rate
-            was_synthesizing = self.synthesizing
-            self.pause()
-            self.apply_settings()
-            if was_synthesizing:
-                self.synthesizing = True
-                self.resume_after_configure()
-            return self.settings
diff --git a/src/calibre/gui2/tts/windows_sapi_config.py b/src/calibre/gui2/tts/windows_sapi_config.py
deleted file mode 100644
index da0ebe6573..0000000000
--- a/src/calibre/gui2/tts/windows_sapi_config.py
+++ /dev/null
@@ -1,210 +0,0 @@
-#!/usr/bin/env python
-# License: GPL v3 Copyright: 2020, Kovid Goyal <kovid at kovidgoyal.net>
-
-from contextlib import suppress
-
-from qt.core import (
-    QAbstractItemView,
-    QAbstractTableModel,
-    QByteArray,
-    QComboBox,
-    QFontMetrics,
-    QFormLayout,
-    QItemSelectionModel,
-    QSlider,
-    QSortFilterProxyModel,
-    Qt,
-    QTableView,
-    QWidget,
-)
-
-from calibre.gui2.widgets import BusyCursor
-
-
-class VoicesModel(QAbstractTableModel):
-
-    system_default_voice = ''
-
-    def __init__(self, voice_data, parent=None):
-        super().__init__(parent)
-        self.voice_data = voice_data
-
-        def language(x):
-            return x.get('language_display_name') or x.get('language') or ''
-
-        self.current_voices = tuple((x['name'], language(x), x.get('age', ''), x.get('gender', ''), x['id']) for x in voice_data)
-        self.column_headers = _('Name'), _('Language'), _('Age'), _('Gender')
-
-    def rowCount(self, parent=None):
-        return len(self.current_voices) + 1
-
-    def columnCount(self, parent=None):
-        return len(self.column_headers)
-
-    def headerData(self, section, orientation, role=Qt.ItemDataRole.DisplayRole):
-        if role == Qt.ItemDataRole.DisplayRole and orientation == Qt.Orientation.Horizontal:
-            return self.column_headers[section]
-        return super().headerData(section, orientation, role)
-
-    def data(self, index, role=Qt.ItemDataRole.DisplayRole):
-        if role == Qt.ItemDataRole.DisplayRole:
-            row = index.row()
-            with suppress(IndexError):
-                if row == 0:
-                    return (_('System default'), '', '', '')[index.column()]
-                data = self.current_voices[row - 1]
-                col = index.column()
-                ans = data[col] or ''
-                return ans
-        if role == Qt.ItemDataRole.UserRole:
-            row = index.row()
-            with suppress(IndexError):
-                if row == 0:
-                    return self.system_default_voice
-                return self.current_voices[row - 1][4]
-
-    def index_for_voice(self, v):
-        r = 0
-        if v != self.system_default_voice:
-            for i, x in enumerate(self.current_voices):
-                if x[4] == v:
-                    r = i + 1
-                    break
-            else:
-                return
-        return self.index(r, 0)
-
-
-class Widget(QWidget):
-
-    def __init__(self, tts_client, initial_backend_settings=None, parent=None):
-        QWidget.__init__(self, parent)
-        self.l = l = QFormLayout(self)
-        self.tts_client = tts_client
-
-        with BusyCursor():
-            self.voice_data = self.tts_client.get_voice_data()
-            self.default_system_rate = self.tts_client.default_system_rate
-            self.all_sound_outputs = self.tts_client.get_sound_outputs()
-
-        self.speed = s = QSlider(Qt.Orientation.Horizontal, self)
-        s.setMinimumWidth(200)
-        l.addRow(_('&Speed of speech (words per minute):'), s)
-        s.setRange(self.tts_client.min_rate, self.tts_client.max_rate)
-        s.setSingleStep(1)
-        s.setPageStep(2)
-
-        self.voices = v = QTableView(self)
-        self.voices_model = VoicesModel(self.voice_data, parent=v)
-        self.proxy_model = p = QSortFilterProxyModel(self)
-        p.setFilterCaseSensitivity(Qt.CaseSensitivity.CaseInsensitive)
-        p.setSourceModel(self.voices_model)
-        v.setModel(p)
-        v.setSelectionBehavior(QAbstractItemView.SelectionBehavior.SelectRows)
-        v.setSortingEnabled(True)
-        v.horizontalHeader().resizeSection(0, QFontMetrics(self.font()).averageCharWidth() * 25)
-        v.horizontalHeader().resizeSection(1, QFontMetrics(self.font()).averageCharWidth() * 30)
-        v.verticalHeader().close()
-        v.verticalHeader().close()
-        v.setSelectionMode(QAbstractItemView.SelectionMode.SingleSelection)
-        v.sortByColumn(0, Qt.SortOrder.AscendingOrder)
-        l.addRow(v)
-
-        self.sound_outputs = so = QComboBox(self)
-        so.addItem(_('System default'), '')
-        for x in self.all_sound_outputs:
-            so.addItem(x.get('description') or x['id'], x['id'])
-        l.addRow(_('Sound output:'), so)
-
-        self.backend_settings = initial_backend_settings or {}
-
-    def restore_state(self, prefs):
-        data = prefs.get(f'{self.tts_client.name}-voice-table-state')
-        if data is not None:
-            self.voices.horizontalHeader().restoreState(QByteArray(data))
-
-    def save_state(self, prefs):
-        data = bytearray(self.voices.horizontalHeader().saveState())
-        prefs.set(f'{self.tts_client.name}-voice-table-state', data)
-
-    def restore_to_defaults(self):
-        self.backend_settings = {}
-
-    def sizeHint(self):
-        ans = super().sizeHint()
-        ans.setHeight(max(ans.height(), 600))
-        ans.setWidth(max(ans.width(), 500))
-        return ans
-
-    @property
-    def selected_voice(self):
-        for x in self.voices.selectedIndexes():
-            return x.data(Qt.ItemDataRole.UserRole)
-
-    @selected_voice.setter
-    def selected_voice(self, val):
-        val = val or VoicesModel.system_default_voice
-        idx = self.voices_model.index_for_voice(val)
-        if idx is not None:
-            idx = self.proxy_model.mapFromSource(idx)
-            self.voices.selectionModel().select(idx, QItemSelectionModel.SelectionFlag.ClearAndSelect | QItemSelectionModel.SelectionFlag.Rows)
-            self.voices.scrollTo(idx)
-
-    @property
-    def rate(self):
-        return self.speed.value()
-
-    @rate.setter
-    def rate(self, val):
-        val = int(val or self.default_system_rate)
-        self.speed.setValue(val)
-
-    @property
-    def sound_output(self):
-        return self.sound_outputs.currentData()
-
-    @sound_output.setter
-    def sound_output(self, val):
-        val = val or ''
-        idx = 0
-        if val:
-            q = self.sound_outputs.findData(val)
-            if q > -1:
-                idx = q
-        self.sound_outputs.setCurrentIndex(idx)
-
-    @property
-    def backend_settings(self):
-        ans = {}
-        voice = self.selected_voice
-        if voice and voice != VoicesModel.system_default_voice:
-            ans['voice'] = voice
-        rate = self.rate
-        if rate and rate != self.default_system_rate:
-            ans['rate'] = rate
-        so = self.sound_output
-        if so:
-            ans['sound_output'] = so
-        return ans
-
-    @backend_settings.setter
-    def backend_settings(self, val):
-        voice = val.get('voice') or VoicesModel.system_default_voice
-        self.selected_voice = voice
-        self.rate = val.get('rate') or self.default_system_rate
-        self.sound_output = val.get('sound_output') or ''
-
-
-def develop():
-    from calibre.gui2 import Application
-    from calibre.gui2.tts.implementation import Client
-    app = Application([])
-    c = Client()
-    w = Widget(c, {})
-    w.show()
-    app.exec()
-    print(w.backend_settings)
-
-
-if __name__ == '__main__':
-    develop()
diff --git a/src/calibre/utils/run_tests.py b/src/calibre/utils/run_tests.py
index 36960d3404..e72402e232 100644
--- a/src/calibre/utils/run_tests.py
+++ b/src/calibre/utils/run_tests.py
@@ -303,8 +303,6 @@ def find_tests(which_tests=None, exclude_tests=None):
         if iswindows:
             from calibre.utils.windows.wintest import find_tests
             a(find_tests())
-            from calibre.utils.windows.winsapi import find_tests
-            a(find_tests())
         a(unittest.defaultTestLoader.loadTestsFromTestCase(TestImports))
     if ok('dbcli'):
         from calibre.db.cli.tests import find_tests
diff --git a/src/calibre/utils/windows/winsapi.cpp b/src/calibre/utils/windows/winsapi.cpp
deleted file mode 100644
index 692ed6928c..0000000000
--- a/src/calibre/utils/windows/winsapi.cpp
+++ /dev/null
@@ -1,619 +0,0 @@
-/*
- * winsapi.cpp
- * Copyright (C) 2020 Kovid Goyal <kovid at kovidgoyal.net>
- *
- * Distributed under terms of the GPL3 license.
- */
-
-#define _ATL_APARTMENT_THREADED
-#include "common.h"
-
-#include <atlbase.h>
-extern CComModule _Module;
-#include <atlcom.h>
-
-#include <sapi.h>
-#pragma warning( push )
-#pragma warning( disable : 4996 )  // sphelper.h uses deprecated GetVersionEx
-#include <sphelper.h>
-#pragma warning( pop )
-
-// Structures {{{
-typedef struct {
-    PyObject_HEAD
-    ISpVoice *voice;
-    HANDLE shutdown_events_thread, events_available;
-} Voice;
-
-
-static PyTypeObject VoiceType = {
-    PyVarObject_HEAD_INIT(NULL, 0)
-};
-
-static const ULONGLONG speak_events = SPFEI(SPEI_START_INPUT_STREAM) | SPFEI(SPEI_END_INPUT_STREAM) | SPFEI(SPEI_TTS_BOOKMARK);
-
-static PyObject *
-Voice_new(PyTypeObject *type, PyObject *args, PyObject *kwds) {
-    HRESULT hr = CoInitialize(NULL);
-    if (hr != S_OK && hr != S_FALSE) {
-        if (hr == RPC_E_CHANGED_MODE) {
-            return error_from_hresult(hr, "COM initialization failed as it was already initialized in multi-threaded mode");
-        }
-        return PyErr_NoMemory();
-    }
-	Voice *self = (Voice *) type->tp_alloc(type, 0);
-    if (self) {
-        if (FAILED(hr = CoCreateInstance(CLSID_SpVoice, NULL, CLSCTX_ALL, IID_ISpVoice, (void **)&self->voice))) {
-            Py_CLEAR(self);
-            return error_from_hresult(hr, "Failed to create ISpVoice instance");
-        }
-        if (FAILED(hr = self->voice->SetNotifyWin32Event())) {
-            Py_CLEAR(self);
-            return error_from_hresult(hr, "Failed to set event based notify mechanism");
-        }
-        self->events_available = self->voice->GetNotifyEventHandle();
-        if (self->events_available == INVALID_HANDLE_VALUE) {
-            Py_CLEAR(self);
-            PyErr_SetString(PyExc_OSError, "Failed to get events handle for ISpVoice");
-            return NULL;
-        }
-        self->shutdown_events_thread = CreateEventW(NULL, true, false, NULL);
-        if (self->shutdown_events_thread == INVALID_HANDLE_VALUE) {
-            Py_CLEAR(self);
-            PyErr_SetFromWindowsErr(0);
-            return NULL;
-        }
-    }
-    return (PyObject*)self;
-}
-
-static void
-Voice_dealloc(Voice *self) {
-    if (self->voice) { self->voice->Release(); self->voice = NULL; }
-    if (self->shutdown_events_thread != INVALID_HANDLE_VALUE) {
-        CloseHandle(self->shutdown_events_thread);
-        self->shutdown_events_thread = INVALID_HANDLE_VALUE;
-    }
-    CoUninitialize();
-}
-// }}}
-
-// Enumeration {{{
-static PyObject*
-Voice_get_all_sound_outputs(Voice *self, PyObject *args) {
-    HRESULT hr = S_OK;
-    CComPtr<IEnumSpObjectTokens> iterator = NULL;
-    if (FAILED(hr = SpEnumTokens(SPCAT_AUDIOOUT, NULL, NULL, &iterator))) {
-        return error_from_hresult(hr, "Failed to create audio output category iterator");
-    }
-    pyobject_raii ans(PyList_New(0));
-    if (!ans) return NULL;
-    while (true) {
-        CComPtr<ISpObjectToken> token = NULL;
-        if (FAILED(hr = iterator->Next(1, &token, NULL)) || hr == S_FALSE || !token) break;
-        pyobject_raii dict(PyDict_New());
-        if (!dict) return NULL;
-        com_wchar_raii id, description;
-        if (FAILED(hr = token->GetId(id.unsafe_address()))) continue;
-        pyobject_raii idpy(PyUnicode_FromWideChar(id.ptr(), -1));
-        if (!idpy) return NULL;
-        if (PyDict_SetItemString(dict.ptr(), "id", idpy.ptr()) != 0) return NULL;
-
-        if (FAILED(hr = SpGetDescription(token, description.unsafe_address(), NULL))) continue;
-        pyobject_raii descriptionpy(PyUnicode_FromWideChar(description.ptr(), -1));
-        if (!descriptionpy) return NULL;
-        if (PyDict_SetItemString(dict.ptr(), "description", descriptionpy.ptr()) != 0) return NULL;
-
-        if (PyList_Append(ans.ptr(), dict.ptr()) != 0) return NULL;
-    }
-    return PyList_AsTuple(ans.ptr());
-}
-
-static PyObject*
-Voice_get_current_sound_output(Voice *self, PyObject *args) {
-    HRESULT hr = S_OK;
-    CComPtr<ISpObjectToken> token = NULL;
-    if (FAILED(hr = self->voice->GetOutputObjectToken(&token))) return error_from_hresult(hr, "Failed to get current output object token");
-    if (hr == S_FALSE) Py_RETURN_NONE;
-    com_wchar_raii id;
-    if (FAILED(hr = token->GetId(id.unsafe_address()))) return error_from_hresult(hr, "Failed to get ID for current audio output token");
-    return PyUnicode_FromWideChar(id.ptr(), -1);
-}
-
-static PyObject*
-Voice_set_current_sound_output(Voice *self, PyObject *args) {
-    wchar_raii id;
-    int allow_format_changes = 1;
-    if (!PyArg_ParseTuple(args, "|O&p", py_to_wchar, &id, &allow_format_changes)) return NULL;
-    HRESULT hr = S_OK;
-    if (id) {
-        CComPtr<ISpObjectToken> token = NULL;
-        if (FAILED(hr = SpGetTokenFromId(id.ptr(), &token))) {
-            return error_from_hresult(hr, "Failed to find sound output with id", PyTuple_GET_ITEM(args, 0));
-        }
-        if (FAILED(hr = self->voice->SetOutput(token, allow_format_changes))) return error_from_hresult(hr, "Failed to set sound output to", PyTuple_GET_ITEM(args, 0));
-
-    } else {
-        if (FAILED(hr = self->voice->SetOutput(NULL, allow_format_changes))) return error_from_hresult(hr, "Failed to set sound output to default");
-    }
-    Py_RETURN_NONE;
-}
-
-
-static PyObject*
-Voice_get_current_voice(Voice *self, PyObject *args) {
-    HRESULT hr = S_OK;
-    CComPtr<ISpObjectToken> token = NULL;
-    if (FAILED(hr = self->voice->GetVoice(&token))) {
-        return error_from_hresult(hr, "Failed to get current voice");
-    }
-    com_wchar_raii id;
-    if (FAILED(hr = token->GetId(id.unsafe_address()))) return error_from_hresult(hr, "Failed to get ID for current voice");
-    return PyUnicode_FromWideChar(id.ptr(), -1);
-}
-
-static PyObject*
-Voice_set_current_voice(Voice *self, PyObject *args) {
-    wchar_raii id;
-    if (!PyArg_ParseTuple(args, "|O&", py_to_wchar, &id)) return NULL;
-    HRESULT hr = S_OK;
-    if (id) {
-        CComPtr<ISpObjectToken> token = NULL;
-        if (FAILED(hr = SpGetTokenFromId(id.ptr(), &token))) {
-            return error_from_hresult(hr, "Failed to find voice with id", PyTuple_GET_ITEM(args, 0));
-        }
-        if (FAILED(hr = self->voice->SetVoice(token))) return error_from_hresult(hr, "Failed to set voice to", PyTuple_GET_ITEM(args, 0));
-    } else {
-        if (FAILED(hr = self->voice->SetVoice(NULL))) return error_from_hresult(hr, "Failed to set voice to default");
-    }
-    Py_RETURN_NONE;
-}
-
-static PyObject*
-Voice_get_all_voices(Voice *self, PyObject *args) {
-    HRESULT hr = S_OK;
-    CComPtr<IEnumSpObjectTokens> iterator = NULL;
-    if (FAILED(hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &iterator))) {
-        return error_from_hresult(hr, "Failed to create voice category iterator");
-    }
-    pyobject_raii ans(PyList_New(0));
-    if (!ans) return NULL;
-    while (true) {
-        CComPtr<ISpObjectToken> token = NULL;
-        if (FAILED(hr = iterator->Next(1, &token, NULL)) || hr == S_FALSE || !token) break;
-        pyobject_raii dict(PyDict_New());
-        if (!dict) return NULL;
-
-        com_wchar_raii id, description;
-        if (FAILED(hr = token->GetId(id.unsafe_address()))) continue;
-        pyobject_raii idpy(PyUnicode_FromWideChar(id.ptr(), -1));
-        if (!idpy) return NULL;
-        if (PyDict_SetItemString(dict.ptr(), "id", idpy.ptr()) != 0) return NULL;
-
-        if (FAILED(hr = SpGetDescription(token, description.unsafe_address(), NULL))) continue;
-        pyobject_raii descriptionpy(PyUnicode_FromWideChar(description.ptr(), -1));
-        if (!descriptionpy) return NULL;
-        if (PyDict_SetItemString(dict.ptr(), "description", descriptionpy.ptr()) != 0) return NULL;
-        CComPtr<ISpDataKey> attributes = NULL;
-        if (FAILED(hr = token->OpenKey(L"Attributes", &attributes))) continue;
-#define ATTR(name) {\
-    com_wchar_raii val; \
-    if (SUCCEEDED(attributes->GetStringValue(TEXT(#name), val.unsafe_address()))) { \
-        pyobject_raii pyval(PyUnicode_FromWideChar(val.ptr(), -1)); if (!pyval) return NULL; \
-        if (PyDict_SetItemString(dict.ptr(), #name, pyval.ptr()) != 0) return NULL; \
-    }\
-}
-        ATTR(gender); ATTR(name); ATTR(vendor); ATTR(age);
-#undef ATTR
-        com_wchar_raii val;
-        if (SUCCEEDED(attributes->GetStringValue(L"language", val.unsafe_address()))) {
-            int lcid = wcstol(val.ptr(), NULL, 16);
-            wchar_t buf[LOCALE_NAME_MAX_LENGTH];
-            if (LCIDToLocaleName(lcid, buf, LOCALE_NAME_MAX_LENGTH, 0) > 0) {
-                pyobject_raii pyval(PyUnicode_FromWideChar(buf, -1)); if (!pyval) return NULL;
-                if (PyDict_SetItemString(dict.ptr(), "language", pyval.ptr()) != 0) return NULL;
-				wchar_t display_name[1024];
-				int res = GetLocaleInfoEx(buf, LOCALE_SLOCALIZEDDISPLAYNAME, display_name, sizeof(display_name)/sizeof(display_name[0]));
-				if (res > 0) {
-					pyobject_raii pd(PyUnicode_FromWideChar(display_name, -1)); if (!pd) return NULL;
-					if (PyDict_SetItemString(dict.ptr(), "language_display_name", pd.ptr()) != 0) return NULL;
-				}
-            }
-        }
-        if (PyList_Append(ans.ptr(), dict.ptr()) != 0) return NULL;
-    }
-    return PyList_AsTuple(ans.ptr());
-}
-// }}}
-
-// Volume and rate {{{
-static PyObject*
-Voice_get_current_volume(Voice *self, PyObject *args) {
-    HRESULT hr = S_OK;
-    USHORT volume;
-    if (FAILED(hr = self->voice->GetVolume(&volume))) return error_from_hresult(hr);
-    return PyLong_FromUnsignedLong((unsigned long)volume);
-}
-
-static PyObject*
-Voice_get_current_rate(Voice *self, PyObject *args) {
-    HRESULT hr = S_OK;
-    long rate;
-    if (FAILED(hr = self->voice->GetRate(&rate))) return error_from_hresult(hr);
-    return PyLong_FromLong(rate);
-}
-
-static PyObject*
-Voice_set_current_rate(Voice *self, PyObject *args) {
-    HRESULT hr = S_OK;
-    long rate;
-    if (!PyArg_ParseTuple(args, "l", &rate)) return NULL;
-    if (rate < -10 || rate > 10) { PyErr_SetString(PyExc_ValueError, "rate must be between -10 and 10"); return NULL; }
-    if (FAILED(hr = self->voice->SetRate(rate))) return error_from_hresult(hr);
-    Py_RETURN_NONE;
-}
-
-static PyObject*
-Voice_set_current_volume(Voice *self, PyObject *args) {
-    HRESULT hr = S_OK;
-    unsigned short volume;
-    if (!PyArg_ParseTuple(args, "H", &volume)) return NULL;
-    if (FAILED(hr = self->voice->SetVolume(volume))) return error_from_hresult(hr);
-    Py_RETURN_NONE;
-}
-// }}}
-
-static PyObject*
-Voice_speak(Voice *self, PyObject *args) {
-    wchar_raii text_or_path;
-    unsigned long flags = SPF_DEFAULT;
-    int want_events = 0;
-    HRESULT hr = S_OK;
-    if (!PyArg_ParseTuple(args, "O&|kp", py_to_wchar, &text_or_path, &flags, &want_events)) return NULL;
-    ULONGLONG events = want_events ? speak_events : 0;
-    if (FAILED(hr = self->voice->SetInterest(events, events))) {
-        return error_from_hresult(hr, "Failed to ask for events");
-    }
-    ULONG stream_number;
-    Py_BEGIN_ALLOW_THREADS;
-    hr = self->voice->Speak(text_or_path.ptr(), flags, &stream_number);
-    Py_END_ALLOW_THREADS;
-    if (FAILED(hr)) return error_from_hresult(hr, "Failed to speak");
-    return PyLong_FromUnsignedLong(stream_number);
-}
-
-static PyObject*
-Voice_wait_until_done(Voice *self, PyObject *args) {
-    unsigned long timeout = INFINITE;
-    if (!PyArg_ParseTuple(args, "|k", &timeout)) return NULL;
-    HRESULT hr ;
-    Py_BEGIN_ALLOW_THREADS;
-    hr = self->voice->WaitUntilDone(timeout);
-    Py_END_ALLOW_THREADS;
-    if (hr == S_OK) Py_RETURN_TRUE;
-    Py_RETURN_FALSE;
-}
-
-static PyObject*
-Voice_pause(Voice *self, PyObject *args) {
-    HRESULT hr = self->voice->Pause();
-    if (FAILED(hr)) return error_from_hresult(hr);
-    Py_RETURN_NONE;
-}
-
-static PyObject*
-Voice_resume(Voice *self, PyObject *args) {
-    HRESULT hr = self->voice->Resume();
-    if (FAILED(hr)) return error_from_hresult(hr);
-    Py_RETURN_NONE;
-}
-
-static PyObject*
-Voice_create_recording_wav(Voice *self, PyObject *args) {
-    HRESULT hr = S_OK;
-    wchar_raii path, text;
-    int do_events = 0;
-    SPSTREAMFORMAT format = SPSF_22kHz16BitMono;
-    if (!PyArg_ParseTuple(args, "O&O&|ip", py_to_wchar_no_none, &path, py_to_wchar_no_none, &text, &format, &do_events)) return NULL;
-    CComPtr <ISpStream> stream = NULL;
-    CSpStreamFormat audio_fmt;
-    if (FAILED(hr = audio_fmt.AssignFormat(format))) return error_from_hresult(hr, "Invalid Audio format");
-    CComPtr<ISpObjectToken> token = NULL;
-    if (FAILED(hr = self->voice->GetOutputObjectToken(&token))) return error_from_hresult(hr, "Failed to get current output object token");
-    bool uses_default_output = hr == S_FALSE;
-
-    if (FAILED(hr = SPBindToFile(path.ptr(), SPFM_CREATE_ALWAYS, &stream, &audio_fmt.FormatId(), audio_fmt.WaveFormatExPtr())))
-        return error_from_hresult(hr, "Failed to open file", PyTuple_GET_ITEM(args, 0));
-
-    if (FAILED(hr = self->voice->SetOutput(stream, TRUE))) {
-        stream->Close();
-        return error_from_hresult(hr, "Failed to set output to wav file", PyTuple_GET_ITEM(args, 0));
-    }
-    Py_BEGIN_ALLOW_THREADS;
-    hr = self->voice->Speak(text.ptr(), SPF_DEFAULT, NULL);
-    Py_END_ALLOW_THREADS;
-    stream->Close();
-    self->voice->SetOutput(uses_default_output ? NULL: token, TRUE);
-    if (FAILED(hr)) return error_from_hresult(hr, "Failed to speak into wav file", PyTuple_GET_ITEM(args, 0));
-    Py_RETURN_NONE;
-}
-
-
-static PyObject*
-Voice_shutdown_event_loop(Voice *self, PyObject *args) {
-    if (!SetEvent(self->shutdown_events_thread)) return PyErr_SetFromWindowsErr(0);
-    Py_RETURN_NONE;
-}
-
-static PyObject*
-Voice_get_events(Voice *self, PyObject *args) {
-    HRESULT hr;
-    const ULONG asz = 32;
-    ULONG num_events;
-    SPEVENT events[asz];
-    PyObject *ret;
-    long long val;
-    int etype;
-    PyObject *ans = PyList_New(0);
-    if (!ans) return NULL;
-    while (true) {
-        Py_BEGIN_ALLOW_THREADS;
-        hr = self->voice->GetEvents(asz, events, &num_events);
-        Py_END_ALLOW_THREADS;
-        if (hr != S_OK && hr != S_FALSE) break;
-        if (num_events == 0) break;
-        for (ULONG i = 0; i < num_events; i++) {
-            etype = events[i].eEventId;
-            bool ok = false;
-            switch(etype) {
-                case SPEI_TTS_BOOKMARK:
-                    val = events[i].wParam;
-                    ok = true;
-                    break;
-                case SPEI_START_INPUT_STREAM:
-                case SPEI_END_INPUT_STREAM:
-                    val = 0;
-                    ok = true;
-                    break;
-            }
-            if (ok) {
-                ret = Py_BuildValue("kiL", events[i].ulStreamNum, etype, val);
-                if (!ret) { Py_CLEAR(ans); return NULL; }
-                int x = PyList_Append(ans, ret);
-                Py_DECREF(ret);
-                if (x != 0) { Py_CLEAR(ans); return NULL; }
-            }
-        }
-    }
-    return ans;
-}
-
-static PyObject*
-Voice_wait_for_event(Voice *self, PyObject *args) {
-    const HANDLE handles[2] = {self->shutdown_events_thread, self->events_available};
-    DWORD ev;
-    Py_BEGIN_ALLOW_THREADS;
-    ev = WaitForMultipleObjects(2, handles, false, INFINITE);
-    Py_END_ALLOW_THREADS;
-    switch (ev) {
-        case WAIT_OBJECT_0:
-            Py_RETURN_FALSE;
-        case WAIT_OBJECT_0 + 1:
-            Py_RETURN_TRUE;
-    }
-    Py_RETURN_NONE;
-}
-
-// Boilerplate {{{
-#define M(name, args) { #name, (PyCFunction)Voice_##name, args, ""}
-static PyMethodDef Voice_methods[] = {
-    M(get_all_voices, METH_NOARGS),
-    M(get_all_sound_outputs, METH_NOARGS),
-
-    M(speak, METH_VARARGS),
-    M(wait_until_done, METH_VARARGS),
-    M(pause, METH_NOARGS),
-    M(resume, METH_NOARGS),
-    M(create_recording_wav, METH_VARARGS),
-
-    M(get_current_rate, METH_NOARGS),
-    M(get_current_volume, METH_NOARGS),
-    M(get_current_voice, METH_NOARGS),
-    M(get_current_sound_output, METH_NOARGS),
-    M(set_current_voice, METH_VARARGS),
-    M(set_current_rate, METH_VARARGS),
-    M(set_current_volume, METH_VARARGS),
-    M(set_current_sound_output, METH_VARARGS),
-
-    M(shutdown_event_loop, METH_NOARGS),
-    M(wait_for_event, METH_NOARGS),
-    M(get_events, METH_NOARGS),
-    {NULL, NULL, 0, NULL}
-};
-#undef M
-
-#define M(name, args) { #name, name, args, ""}
-static PyMethodDef winsapi_methods[] = {
-    {NULL, NULL, 0, NULL}
-};
-#undef M
-
-static int
-exec_module(PyObject *m) {
-    VoiceType.tp_name = "winsapi.ISpVoice";
-    VoiceType.tp_doc = "Wrapper for ISpVoice";
-    VoiceType.tp_basicsize = sizeof(Voice);
-    VoiceType.tp_itemsize = 0;
-    VoiceType.tp_flags = Py_TPFLAGS_DEFAULT;
-    VoiceType.tp_new = Voice_new;
-    VoiceType.tp_methods = Voice_methods;
-	VoiceType.tp_dealloc = (destructor)Voice_dealloc;
-	if (PyType_Ready(&VoiceType) < 0) return -1;
-
-	Py_INCREF(&VoiceType);
-    if (PyModule_AddObject(m, "ISpVoice", (PyObject *) &VoiceType) < 0) {
-        Py_DECREF(&VoiceType);
-        return -1;
-    }
-#define AI(name) if (PyModule_AddIntMacro(m, name) != 0) { Py_DECREF(&VoiceType); return -1; }
-    AI(SPF_DEFAULT);
-    AI(SPF_ASYNC);
-    AI(SPF_PURGEBEFORESPEAK);
-    AI(SPF_IS_FILENAME);
-    AI(SPF_IS_XML);
-    AI(SPF_IS_NOT_XML);
-    AI(SPF_PERSIST_XML);
-    AI(SPF_NLP_SPEAK_PUNC);
-    AI(SPF_PARSE_SSML);
-    AI(SPF_PARSE_AUTODETECT);
-    AI(SPF_NLP_MASK);
-    AI(SPF_PARSE_MASK);
-    AI(SPF_VOICE_MASK);
-    AI(SPF_UNUSED_FLAGS);
-
-    AI(INFINITE);
-
-    AI(SPSF_Default);
-    AI(SPSF_NoAssignedFormat);
-    AI(SPSF_Text);
-    AI(SPSF_NonStandardFormat);
-    AI(SPSF_ExtendedAudioFormat);
-
-    // Standard PCM wave formats
-    AI(SPSF_8kHz8BitMono);
-    AI(SPSF_8kHz8BitStereo);
-    AI(SPSF_8kHz16BitMono);
-    AI(SPSF_8kHz16BitStereo);
-    AI(SPSF_11kHz8BitMono);
-    AI(SPSF_11kHz8BitStereo);
-    AI(SPSF_11kHz16BitMono);
-    AI(SPSF_11kHz16BitStereo);
-    AI(SPSF_12kHz8BitMono);
-    AI(SPSF_12kHz8BitStereo);
-    AI(SPSF_12kHz16BitMono);
-    AI(SPSF_12kHz16BitStereo);
-    AI(SPSF_16kHz8BitMono);
-    AI(SPSF_16kHz8BitStereo);
-    AI(SPSF_16kHz16BitMono);
-    AI(SPSF_16kHz16BitStereo);
-    AI(SPSF_22kHz8BitMono);
-    AI(SPSF_22kHz8BitStereo);
-    AI(SPSF_22kHz16BitMono);
-    AI(SPSF_22kHz16BitStereo);
-    AI(SPSF_24kHz8BitMono);
-    AI(SPSF_24kHz8BitStereo);
-    AI(SPSF_24kHz16BitMono);
-    AI(SPSF_24kHz16BitStereo);
-    AI(SPSF_32kHz8BitMono);
-    AI(SPSF_32kHz8BitStereo);
-    AI(SPSF_32kHz16BitMono);
-    AI(SPSF_32kHz16BitStereo);
-    AI(SPSF_44kHz8BitMono);
-    AI(SPSF_44kHz8BitStereo);
-    AI(SPSF_44kHz16BitMono);
-    AI(SPSF_44kHz16BitStereo);
-    AI(SPSF_48kHz8BitMono);
-    AI(SPSF_48kHz8BitStereo);
-    AI(SPSF_48kHz16BitMono);
-    AI(SPSF_48kHz16BitStereo);
-
-    // TrueSpeech format
-    AI(SPSF_TrueSpeech_8kHz1BitMono);
-
-    // A-Law formats
-    AI(SPSF_CCITT_ALaw_8kHzMono);
-    AI(SPSF_CCITT_ALaw_8kHzStereo);
-    AI(SPSF_CCITT_ALaw_11kHzMono);
-    AI(SPSF_CCITT_ALaw_11kHzStereo);
-    AI(SPSF_CCITT_ALaw_22kHzMono);
-    AI(SPSF_CCITT_ALaw_22kHzStereo);
-    AI(SPSF_CCITT_ALaw_44kHzMono);
-    AI(SPSF_CCITT_ALaw_44kHzStereo);
-
-    // u-Law formats
-    AI(SPSF_CCITT_uLaw_8kHzMono);
-    AI(SPSF_CCITT_uLaw_8kHzStereo);
-    AI(SPSF_CCITT_uLaw_11kHzMono);
-    AI(SPSF_CCITT_uLaw_11kHzStereo);
-    AI(SPSF_CCITT_uLaw_22kHzMono);
-    AI(SPSF_CCITT_uLaw_22kHzStereo);
-    AI(SPSF_CCITT_uLaw_44kHzMono);
-    AI(SPSF_CCITT_uLaw_44kHzStereo);
-
-    // ADPCM formats
-    AI(SPSF_ADPCM_8kHzMono);
-    AI(SPSF_ADPCM_8kHzStereo);
-    AI(SPSF_ADPCM_11kHzMono);
-    AI(SPSF_ADPCM_11kHzStereo);
-    AI(SPSF_ADPCM_22kHzMono);
-    AI(SPSF_ADPCM_22kHzStereo);
-    AI(SPSF_ADPCM_44kHzMono);
-    AI(SPSF_ADPCM_44kHzStereo);
-
-    // GSM 6.10 formats
-    AI(SPSF_GSM610_8kHzMono);
-    AI(SPSF_GSM610_11kHzMono);
-    AI(SPSF_GSM610_22kHzMono);
-    AI(SPSF_GSM610_44kHzMono);
-
-    AI(SPEI_UNDEFINED);
-
-    //--- TTS engine
-    AI(SPEI_START_INPUT_STREAM);
-    AI(SPEI_END_INPUT_STREAM);
-    AI(SPEI_VOICE_CHANGE);
-    AI(SPEI_TTS_BOOKMARK);
-    AI(SPEI_WORD_BOUNDARY);
-    AI(SPEI_PHONEME);
-    AI(SPEI_SENTENCE_BOUNDARY);
-    AI(SPEI_VISEME);
-    AI(SPEI_TTS_AUDIO_LEVEL);
-
-    //--- Engine vendors use these reserved bits
-    AI(SPEI_TTS_PRIVATE);
-    AI(SPEI_MIN_TTS);
-    AI(SPEI_MAX_TTS);
-
-    //--- Speech Recognition
-    AI(SPEI_END_SR_STREAM);
-    AI(SPEI_SOUND_START);
-    AI(SPEI_SOUND_END);
-    AI(SPEI_PHRASE_START);
-    AI(SPEI_RECOGNITION);
-    AI(SPEI_HYPOTHESIS);
-    AI(SPEI_SR_BOOKMARK);
-    AI(SPEI_PROPERTY_NUM_CHANGE);
-    AI(SPEI_PROPERTY_STRING_CHANGE);
-    AI(SPEI_FALSE_RECOGNITION);
-    AI(SPEI_INTERFERENCE);
-    AI(SPEI_REQUEST_UI);
-    AI(SPEI_RECO_STATE_CHANGE);
-    AI(SPEI_ADAPTATION);
-    AI(SPEI_START_SR_STREAM);
-    AI(SPEI_RECO_OTHER_CONTEXT);
-    AI(SPEI_SR_AUDIO_LEVEL);
-    AI(SPEI_SR_RETAINEDAUDIO);
-
-    //--- Engine vendors use these reserved bits
-    AI(SPEI_SR_PRIVATE);
-    AI(SPEI_MIN_SR);
-    AI(SPEI_MAX_SR);
-
-    //--- Reserved: Do not use
-    AI(SPEI_RESERVED1);
-    AI(SPEI_RESERVED2);
-#undef AI
-    return 0;
-}
-
-static PyModuleDef_Slot slots[] = { {Py_mod_exec, (void*)exec_module}, {0, NULL} };
-
-static struct PyModuleDef module_def = {PyModuleDef_HEAD_INIT};
-
-CALIBRE_MODINIT_FUNC PyInit_winsapi(void) {
-    module_def.m_name     = "winsapi";
-    module_def.m_doc      = "SAPI wrapper";
-    module_def.m_methods  = winsapi_methods;
-    module_def.m_slots    = slots;
-	return PyModuleDef_Init(&module_def);
-}
diff --git a/src/calibre/utils/windows/winsapi.py b/src/calibre/utils/windows/winsapi.py
deleted file mode 100644
index efc865c2a0..0000000000
--- a/src/calibre/utils/windows/winsapi.py
+++ /dev/null
@@ -1,79 +0,0 @@
-#!/usr/bin/env python
-# License: GPL v3 Copyright: 2020, Kovid Goyal <kovid at kovidgoyal.net>
-
-from calibre_extensions.winsapi import ISpVoice
-
-
-def develop():
-    spv = ISpVoice()
-    spv.create_recording_wav('test.wav', 'Hello, world!')
-
-
-def find_tests():
-    import os
-    import unittest
-    is_ci = os.environ.get('CI', '').lower() == 'true'
-
-    class TestSAPI(unittest.TestCase):
-
-        def setUp(self):
-            self.sapi = ISpVoice()
-
-        def tearDown(self):
-            self.sapi = None
-
-        def test_enumeration_of_voices(self):
-            default_voice = self.sapi.get_current_voice()
-            self.assertTrue(default_voice)
-            all_voices = self.sapi.get_all_voices()
-            self.assertTrue(all_voices)
-            self.assertIn(default_voice, {x['id'] for x in all_voices})
-            for voice in all_voices:
-                for key in ('name', 'gender', 'age', 'language', 'description'):
-                    self.assertIn(key, voice)
-                self.sapi.set_current_voice(voice['id'])
-                self.assertEqual(self.sapi.get_current_voice(), voice['id'])
-            self.sapi.set_current_voice()
-            self.assertEqual(self.sapi.get_current_voice(), default_voice)
-
-        @unittest.skipIf(is_ci, 'No sound output on CI')
-        def test_enumeration_of_sound_outputs(self):
-            default_output = self.sapi.get_current_sound_output()
-            self.assertTrue(default_output)
-            all_outputs = self.sapi.get_all_sound_outputs()
-            self.assertTrue(all_outputs)
-            self.assertIn(default_output, {x['id'] for x in all_outputs})
-            for output in all_outputs:
-                for key in ('id', 'description',):
-                    self.assertIn(key, output)
-                self.sapi.set_current_voice(output['id'])
-                self.assertEqual(self.sapi.get_current_sound_output(), output['id'])
-            self.sapi.set_current_sound_output()
-            self.assertEqual(self.sapi.get_current_sound_output(), default_output)
-
-        def test_volume_and_rate(self):
-            dr = self.sapi.get_current_rate()
-            new_rate = dr // 2 + 1
-            self.sapi.set_current_rate(new_rate)
-            self.assertEqual(self.sapi.get_current_rate(), new_rate)
-            self.sapi.set_current_rate(dr)
-
-            dv = self.sapi.get_current_volume()
-            new_vol = dv // 2 + 3
-            self.sapi.set_current_volume(new_vol)
-            self.assertEqual(self.sapi.get_current_volume(), new_vol)
-            self.sapi.set_current_volume(dv)
-
-        def test_record_as_audio_file(self):
-            import tempfile
-            with tempfile.TemporaryDirectory() as tdir:
-                wav_path = os.path.join(tdir, 'test.wav')
-                self.sapi.create_recording_wav(wav_path, 'testing microsoft voices')
-                self.assertGreater(os.path.getsize(wav_path), 256)
-
-    return unittest.defaultTestLoader.loadTestsFromTestCase(TestSAPI)
-
-
-def run_tests():
-    from calibre.utils.run_tests import run_tests
-    run_tests(find_tests)
diff --git a/src/calibre/utils/windows/winspeech.cpp b/src/calibre/utils/windows/winspeech.cpp
deleted file mode 100644
index b12cc1bd67..0000000000
--- a/src/calibre/utils/windows/winspeech.cpp
+++ /dev/null
@@ -1,935 +0,0 @@
-/*
- * winspeech.cpp
- * Copyright (C) 2023 Kovid Goyal <kovid at kovidgoyal.net>
- *
- * Distributed under terms of the GPL3 license.
- */
-#include "common.h"
-
-#include <atomic>
-#include <filesystem>
-#include <string_view>
-#include <fstream>
-#include <mutex>
-#include <filesystem>
-#include <functional>
-#include <iostream>
-#include <unordered_map>
-#include <io.h>
-#include <winrt/base.h>
-#include <winrt/windows.foundation.h>
-#include <winrt/windows.foundation.collections.h>
-#include <winrt/windows.storage.streams.h>
-#include <winrt/windows.media.speechsynthesis.h>
-#include <winrt/windows.media.core.h>
-#include <winrt/windows.media.playback.h>
-#include <winrt/windows.media.devices.h>
-#include <winrt/windows.devices.enumeration.h>
-
-#ifdef max
-#undef max
-#endif
-using namespace winrt::Windows::Foundation;
-using namespace winrt::Windows::Foundation::Collections;
-using namespace winrt::Windows::Media::SpeechSynthesis;
-using namespace winrt::Windows::Media::Playback;
-using namespace winrt::Windows::Media::Core;
-using namespace winrt::Windows::Media::Devices;
-using namespace winrt::Windows::Devices::Enumeration;
-using namespace winrt::Windows::Storage::Streams;
-typedef uint64_t id_type;
-
-static std::mutex output_lock;
-static DWORD main_thread_id;
-
-template<typename T> static void
-__debug_multiple_impl(T x) {
-    if constexpr (std::is_same_v<T, wchar_t*> || std::is_same_v<T, std::wstring> || std::is_same_v<T, winrt::hstring> || std::is_same_v<T, std::wstring_view>) {
-        std::cerr << winrt::to_string(x);
-    } else {
-        std::cerr << x;
-    }
-}
-
-template<typename T> static void
-__debug_multiple(T x) {
-    __debug_multiple_impl(x);
-    std::cerr << std::endl;
-}
-
-template<typename T, typename... Args> static void
-__debug_multiple(T x, Args... args) {
-    __debug_multiple_impl(x);
-    std::cerr << " ";
-    __debug_multiple(args...);
-}
-
-template<typename... Args> static void
-debug(Args... args) {
-    std::scoped_lock _sl_(output_lock);
-    DWORD tid = GetCurrentThreadId();
-    if (tid == main_thread_id) std::cerr << "thread-main"; else std::cerr << "thread-" << tid;
-    std::cerr << ": ";
-    __debug_multiple(args...);
-}
-
-static std::atomic_bool main_loop_is_running;
-enum {
-    STDIN_FAILED = 1,
-    STDIN_MSG,
-    EXIT_REQUESTED
-};
-
-static std::vector<std::wstring_view>
-split(std::wstring_view const &src, std::wstring const &delim = L" ") {
-    size_t pos;
-    std::vector<std::wstring_view> ans; ans.reserve(16);
-    std::wstring_view sv(src);
-    while ((pos = sv.find(delim)) != std::wstring_view::npos) {
-        if (pos > 0) ans.emplace_back(sv.substr(0, pos));
-        sv = sv.substr(pos + 1);
-    }
-    if (sv.size() > 0) ans.emplace_back(sv);
-    return ans;
-}
-
-static std::wstring
-join(std::vector<std::wstring_view> parts, std::wstring const &delim = L" ") {
-    std::wstring ans; ans.reserve(1024);
-    for (auto const &x : parts) {
-        ans.append(x);
-        ans.append(delim);
-    }
-    ans.erase(ans.size() - delim.size());
-    return ans;
-}
-
-static id_type
-parse_id(std::wstring_view const& s) {
-    id_type ans = 0;
-    for (auto ch : s) {
-        auto delta = ch - '0';
-        if (delta < 0 || delta > 9) {
-            throw std::wstring(L"Not a valid id: ") + std::wstring(s);
-        }
-        ans = (ans * 10) + delta;
-    }
-    return ans;
-}
-
-static double
-parse_double(const wchar_t *raw) {
-    std::wistringstream s(raw, std::ios_base::in);
-    s.imbue(std::locale("C"));
-    double ans;
-    s >> ans;
-    return ans;
-}
-
-static void
-serialize_string_for_json(std::string const &src, std::ostream &out) {
-    out << '"';
-    for (auto ch : src) {
-        switch(ch) {
-            case '\\':
-                out << "\\\\"; break;
-            case '"':
-                out << "\\\""; break;
-            case '\n':
-                out << "\\n"; break;
-            case '\r':
-                out << "\\r"; break;
-            default:
-                out << ch; break;
-        }
-    }
-    out << '"';
-}
-
-template<typename T> static void
-serialize_integer(std::ostream &out, T val, int base = 10) {
-    std::array<char, 16> str;
-    if (auto [ptr, ec] = std::to_chars(str.data(), str.data() + str.size(), val, base); ec == std::errc()) {
-        out << std::string_view(str.data(), ptr - str.data());
-    } else {
-        throw std::exception(std::make_error_code(ec).message().c_str());
-    }
-}
-
-template<typename T>static void
-serialize_float(std::ostream &out, T val, std::chars_format fmt = std::chars_format::fixed) {
-    std::array<char, 16> str;
-    if (auto [ptr, ec] = std::to_chars(str.data(), str.data() + str.size(), val, fmt); ec == std::errc()) {
-        out << std::string_view(str.data(), ptr - str.data());
-    } else {
-        throw std::exception(std::make_error_code(ec).message().c_str());
-    }
-}
-
-
-class json_val {  // {{{
-private:
-    enum { DT_INT, DT_UINT, DT_STRING, DT_LIST, DT_OBJECT, DT_NONE, DT_BOOL, DT_FLOAT } type;
-    std::string s;
-    bool b;
-    double f;
-    int64_t i;
-    uint64_t u;
-    std::vector<json_val> list;
-    std::map<std::string, json_val> object;
-
-    void serialize(std::ostream &out) const {
-        switch(type) {
-            case DT_NONE:
-                out << "nil"; break;
-            case DT_BOOL:
-                out << (b ? "true" : "false"); break;
-            case DT_INT:
-                // this is not really correct since JS has various limits on numeric types, but good enough for us
-                serialize_integer(out, i); break;
-            case DT_UINT:
-                // this is not really correct since JS has various limits on numeric types, but good enough for us
-                serialize_integer(out, u); break;
-            case DT_FLOAT:
-                // again not technically correct
-                serialize_float(out, f); break;
-            case DT_STRING:
-                return serialize_string_for_json(s, out);
-            case DT_LIST: {
-                out << '[';
-                bool first = true;
-                for (auto const &i : list) {
-                    if (!first) out << ", ";
-                    first = false;
-                    i.serialize(out);
-                }
-                out << ']';
-                break;
-            }
-            case DT_OBJECT: {
-                out << '{';
-                bool first = true;
-                for (const auto& [key, value]: object) {
-                    if (!first) out << ", ";
-                    first = false;
-                    serialize_string_for_json(key, out);
-                    out << ": ";
-                    value.serialize(out);
-                }
-                out << '}';
-                break;
-            }
-        }
-    }
-
-public:
-    json_val() : type(DT_NONE) {}
-    json_val(std::string &&text) : type(DT_STRING), s(text) {}
-    json_val(const char *ns) : type(DT_STRING), s(ns) {}
-    json_val(winrt::hstring const& text) : type(DT_STRING), s(winrt::to_string(text)) {}
-    json_val(std::wstring const& text) : type(DT_STRING), s(winrt::to_string(text)) {}
-    json_val(std::string_view text) : type(DT_STRING), s(text) {}
-    json_val(std::vector<json_val> &&items) : type(DT_LIST), list(items) {}
-    json_val(std::map<std::string, json_val> &&m) : type(DT_OBJECT), object(m) {}
-    json_val(std::initializer_list<std::pair<const std::string, json_val>> const& vals) : type(DT_OBJECT), object(vals) { }
-
-    static json_val from_hresult(HRESULT hr) {
-        json_val ans; ans.type = DT_STRING;
-        std::array<char, 16> str;
-        str[0] = '0'; str[1] = 'x';
-        if (auto [ptr, ec] = std::to_chars(str.data()+2, str.data() + str.size(), (uint32_t)hr, 16); ec == std::errc()) {
-            ans.s = std::string(str.data(), ptr - str.data());
-        } else {
-            throw std::exception(std::make_error_code(ec).message().c_str());
-        }
-        return ans;
-    }
-
-    json_val(VoiceInformation const& voice) : type(DT_OBJECT) {
-        const char *gender = "";
-        switch (voice.Gender()) {
-            case VoiceGender::Male: gender = "male"; break;
-            case VoiceGender::Female: gender = "female"; break;
-        }
-        object = {
-            {"display_name", voice.DisplayName()},
-            {"description", voice.Description()},
-            {"id", voice.Id()},
-            {"language", voice.Language()},
-            {"gender", gender},
-        };
-    }
-
-    json_val(IVectorView<VoiceInformation> const& voices) : type(DT_LIST) {
-        list.reserve(voices.Size());
-        for(auto const& voice : voices) {
-            list.emplace_back(voice);
-        }
-    }
-
-    json_val(TimedMetadataTrackErrorCode const ec) : type(DT_STRING) {
-        switch(ec) {
-            case TimedMetadataTrackErrorCode::DataFormatError:
-                s = "data_format_error"; break;
-            case TimedMetadataTrackErrorCode::NetworkError:
-                s = "network_error"; break;
-            case TimedMetadataTrackErrorCode::InternalError:
-                s = "internal_error"; break;
-            case TimedMetadataTrackErrorCode::None:
-                s = "none"; break;
-        }
-    }
-
-    json_val(DeviceInformationKind const dev) : type(DT_STRING) {
-        switch(dev) {
-            case DeviceInformationKind::Unknown:
-                s = "unknown"; break;
-            case DeviceInformationKind::AssociationEndpoint:
-                s = "association_endpoint"; break;
-            case DeviceInformationKind::AssociationEndpointContainer:
-                s = "association_endpoint_container"; break;
-            case DeviceInformationKind::AssociationEndpointService:
-                s = "association_endpoint_service"; break;
-            case DeviceInformationKind::Device:
-                s = "device"; break;
-            case DeviceInformationKind::DevicePanel:
-                s = "device_panel"; break;
-            case DeviceInformationKind::DeviceInterface:
-                s = "device_interface"; break;
-            case DeviceInformationKind::DeviceInterfaceClass:
-                s = "device_interface_class"; break;
-            case DeviceInformationKind::DeviceContainer:
-                s = "device_container"; break;
-        }
-    }
-
-    json_val(DeviceInformation const& dev) : type(DT_OBJECT) {
-        object = {
-            {"id", dev.Id()},
-            {"name", dev.Name()},
-            {"kind", dev.Kind()},
-            {"is_default", dev.IsDefault()},
-            {"is_enabled", dev.IsEnabled()},
-        };
-    }
-
-    json_val(DeviceInformationCollection const& devices) : type(DT_LIST) {
-        list.reserve(devices.Size());
-        for(auto const& dev : devices) {
-            list.emplace_back(json_val(dev));
-        }
-    }
-
-    json_val(MediaPlaybackState const& state) : type(DT_STRING) {
-        switch(state) {
-            case MediaPlaybackState::None: s = "none"; break;
-            case MediaPlaybackState::Opening: s = "opening"; break;
-            case MediaPlaybackState::Buffering: s = "buffering"; break;
-            case MediaPlaybackState::Playing: s = "playing"; break;
-            case MediaPlaybackState::Paused: s = "paused"; break;
-        }
-    }
-
-    json_val(MediaPlayerError const& e) : type(DT_STRING) {
-        // https://learn.microsoft.com/en-us/uwp/api/windows.media.playback.mediaplayererror
-        switch(e) {
-            case MediaPlayerError::Unknown: s = "unknown"; break;
-            case MediaPlayerError::Aborted: s = "aborted"; break;
-            case MediaPlayerError::NetworkError: s = "network_error"; break;
-            case MediaPlayerError::DecodingError: s = "decoding_error"; break;
-            case MediaPlayerError::SourceNotSupported: s = "source_not_supported"; break;
-        }
-    }
-
-    json_val(winrt::Windows::Foundation::TimeSpan const &t) : type(DT_INT) {
-        i = std::chrono::nanoseconds(t).count();
-    }
-
-    json_val(winrt::hstring const &label, SpeechCue const &cue) : type(DT_OBJECT) {
-        object = {
-            {"type", label},
-            {"text", cue.Text()},
-            {"start_time", cue.StartTime()},
-            {"start_pos_in_text", cue.StartPositionInInput().Value()},
-            {"end_pos_in_text", cue.EndPositionInInput().Value()},
-        };
-    }
-
-    template<typename T> json_val(T const x) {
-        if constexpr (std::is_same_v<T, bool>) {
-            type = DT_BOOL;
-            b = x;
-        } else if constexpr (std::is_unsigned_v<T>) {
-            type = DT_UINT;
-            u = x;
-        } else if constexpr (std::is_integral_v<T>) {
-            type = DT_INT;
-            i = x;
-        } else if constexpr (std::is_floating_point_v<T>) {
-            type = DT_FLOAT;
-            f = x;
-        } else {
-            static_assert(!sizeof(T), "Unknown type T cannot be converted to JSON");
-        }
-    }
-
-    friend std::ostream& operator<<(std::ostream &os, const json_val &self) {
-        self.serialize(os);
-        return os;
-    }
-
-}; // }}}
-
-static void
-output(id_type cmd_id, std::string_view const &msg_type, json_val const &&msg) {
-    std::scoped_lock sl(output_lock);
-    try {
-        std::cout << cmd_id << " " << msg_type << " " << msg << std::endl;
-    } catch(...) {}
-}
-
-static void
-output_error(id_type cmd_id, std::string_view const &msg, std::string_view const &error, int64_t line, HRESULT hr=S_OK) {
-    std::map<std::string, json_val> m = {{"msg", msg}, {"error", error}, {"file", "winspeech.cpp"}, {"line", line}};
-    if (hr != S_OK) m["hr"] = json_val::from_hresult(hr);
-    output(cmd_id, "error", std::move(m));
-}
-
-static bool
-run_catching_exceptions(std::function<void(void)> f, std::string_view const &msg, int64_t line, id_type cmd_id=0) {
-    bool ok = false;
-    try {
-        f();
-        ok = true;
-    } catch(winrt::hresult_error const& ex) {
-        output_error(cmd_id, msg, winrt::to_string(ex.message()), line, ex.to_abi());
-    } catch(const std::system_error& ex) {
-        output_error(cmd_id, msg, "system_error with code: " + std::to_string(ex.code().value()) + " and meaning: " + ex.what(), line);
-    } catch (std::exception const &ex) {
-        output_error(cmd_id, msg, ex.what(), line);
-    } catch (std::string const &ex) {
-        output_error(cmd_id, msg, ex, line);
-    } catch (std::wstring const &ex) {
-        output_error(cmd_id, msg, winrt::to_string(ex), line);
-    } catch (...) {
-        output_error(cmd_id, msg, "Unknown exception type was raised", line);
-    }
-    return ok;
-}
-
-
-struct Revokers {
-    MediaPlaybackSession::PlaybackStateChanged_revoker playback_state_changed;
-    MediaPlayer::MediaEnded_revoker media_ended; MediaPlayer::MediaOpened_revoker media_opened;
-    MediaPlayer::MediaFailed_revoker media_failed; MediaPlayer::SourceChanged_revoker source_changed;
-
-    MediaPlaybackItem::TimedMetadataTracksChanged_revoker timed_metadata_tracks_changed;
-    std::vector<TimedMetadataTrack::CueEntered_revoker> cue_entered;
-    std::vector<TimedMetadataTrack::CueExited_revoker> cue_exited;
-    std::vector<TimedMetadataTrack::TrackFailed_revoker> track_failed;
-};
-
-struct Mark {
-    uint32_t id, pos_in_text;
-    Mark(uint32_t id, uint32_t pos) : id(id), pos_in_text(pos) {}
-};
-
-struct Marks {
-    std::vector<Mark> entries;
-    int32_t last_reported_mark_index;
-    Marks() : entries(), last_reported_mark_index(-1) {}
-};
-
-static SpeechSynthesizer speech_synthesizer{nullptr};
-static MediaPlayer media_player{nullptr};
-
-static size_t
-decode_into(std::string_view src, std::wstring_view dest) {
-    int n = MultiByteToWideChar(CP_UTF8, 0, src.data(), (int)src.size(), (wchar_t*)dest.data(), (int)dest.size());
-    if (n == 0 && src.size() > 0) {
-        throw std::system_error(GetLastError(), std::system_category(), "Failed to decode cued text");
-    }
-    return n;
-}
-
-static std::wstring_view
-parse_cued_text(std::string_view src, Marks &marks, std::wstring_view dest) {
-    size_t dest_pos = 0;
-    if (dest.size() < src.size()) throw std::exception("Destination buffer for parse_cued_text() too small");
-    while (src.size()) {
-        auto pos = src.find('\0');
-        size_t limit = pos == std::string_view::npos ? src.size() : pos;
-        if (limit) {
-            dest_pos += decode_into(src.substr(0, limit), dest.substr(dest_pos, dest.size() - dest_pos));
-            src = src.substr(limit, src.size() - limit);
-        }
-        if (pos != std::string_view::npos) {
-            src = src.substr(1, src.size() - 1);
-            if (src.size() >= 4) {
-                uint32_t mark = *((uint32_t*)src.data());
-                marks.entries.emplace_back(mark, (uint32_t)dest_pos);
-                src = src.substr(4, src.size() - 4);
-            }
-        }
-    }
-    return dest.substr(0, dest_pos);
-}
-
-static std::wstring_view
-read_from_shm(id_type cmd_id, const std::wstring_view size, const std::wstring &address, std::vector<wchar_t> &buf, Marks &marks, bool is_cued=false) {
-    id_type shm_size = parse_id(size);
-    handle_raii_null handle(OpenFileMappingW(FILE_MAP_READ, false, address.data()));
-    if (!handle) {
-        output_error(cmd_id, "Could not open shared memory at: " + winrt::to_string(address), winrt::to_string(get_last_error()), __LINE__);
-        return {};
-    }
-    mapping_raii mapping(MapViewOfFile(handle.ptr(), FILE_MAP_READ, 0, 0, (SIZE_T)shm_size));
-    if (!mapping) {
-        output_error(cmd_id, "Could not map shared memory", winrt::to_string(get_last_error()), __LINE__);
-        return {};
-    }
-    buf.reserve(shm_size + 2);
-    std::string_view src((const char*)mapping.ptr(), shm_size);
-    std::wstring_view dest(buf.data(), buf.capacity());
-    if (is_cued) return parse_cued_text(src, marks, dest);
-    return std::wstring_view(buf.data(), decode_into(src, dest));
-}
-
-
-// Speak {{{
-static Revokers speak_revoker = {};
-
-static void
-register_metadata_handler_for_track(MediaPlaybackTimedMetadataTrackList const &tracks, uint32_t index, id_type cmd_id, std::shared_ptr<Marks> marks) {
-    TimedMetadataTrack track = tracks.GetAt(index);
-    tracks.SetPresentationMode((unsigned int)index, TimedMetadataTrackPresentationMode::ApplicationPresented);
-
-    speak_revoker.cue_entered.emplace_back(track.CueEntered(winrt::auto_revoke, [cmd_id, marks](auto track, const auto& args) {
-        if (!main_loop_is_running.load()) return;
-        auto label = track.Label();
-        auto cue = args.Cue().template as<SpeechCue>();
-        output(cmd_id, "cue_entered", {label, cue});
-        if (label != L"SpeechWord") return;
-        uint32_t pos = cue.StartPositionInInput().Value();
-        for (int32_t i = std::max(0, marks->last_reported_mark_index); i < (int32_t)marks->entries.size(); i++) {
-            int32_t idx = -1;
-            if (marks->entries[i].pos_in_text > pos) {
-                idx = i-1;
-                if (idx == marks->last_reported_mark_index && marks->entries[i].pos_in_text - pos < 3) idx = i;
-            } else if (marks->entries[i].pos_in_text == pos) idx = i;
-            if (idx > -1) {
-                output(cmd_id, "mark_reached", {{"id", marks->entries[idx].id}});
-                marks->last_reported_mark_index = idx;
-                break;
-            }
-        }
-    }));
-
-    speak_revoker.cue_exited.emplace_back(track.CueExited(winrt::auto_revoke, [cmd_id](auto track, const auto& args) {
-        if (main_loop_is_running.load()) output(
-            cmd_id, "cue_exited", json_val(track.Label(), args.Cue().template as<SpeechCue>()));
-    }));
-
-    speak_revoker.track_failed.emplace_back(track.TrackFailed(winrt::auto_revoke, [cmd_id](auto, const auto& args) {
-        auto error = args.Error();
-        if (main_loop_is_running.load()) output(
-            cmd_id, "track_failed", {{"code", error.ErrorCode()}, {"hr", json_val::from_hresult(error.ExtendedError())}});
-    }));
-};
-
-
-static void
-handle_speak(id_type cmd_id, std::vector<std::wstring_view> &parts) {
-    bool is_ssml = false, is_shm = false, is_cued = false;
-    try {
-        is_ssml = parts.at(0) == L"ssml";
-        is_shm = parts.at(1) == L"shm";
-        is_cued = parts.at(0) == L"cued";
-    } catch (std::exception const&) {
-        throw std::string("Not a well formed speak command");
-    }
-    parts.erase(parts.begin(), parts.begin() + 2);
-    std::wstring address;
-    auto marks = std::make_shared<Marks>();
-    std::vector<wchar_t> buf;
-    std::wstring_view text;
-    if (is_shm) {
-        text = read_from_shm(cmd_id, parts.at(0), std::wstring(parts.at(1)), buf, *marks, is_cued);
-        if (text.size() == 0) return;
-    } else {
-        address = join(parts);
-        if (address.size() == 0) throw std::string("Address missing");
-        buf.reserve(address.size() + 1);
-        text = std::wstring_view(buf.data(), address.size());
-        address.copy(buf.data(), address.size());
-    }
-    *((wchar_t*)text.data() + text.size()) = 0;  // ensure NULL termination
-
-    output(cmd_id, "synthesizing", {{"ssml", is_ssml}, {"num_marks", marks->entries.size()}, {"text_length", text.size()}});
-    SpeechSynthesisStream stream{nullptr};
-    if (!run_catching_exceptions([&]() {
-        speech_synthesizer.Options().IncludeSentenceBoundaryMetadata(true);
-        speech_synthesizer.Options().IncludeWordBoundaryMetadata(true);
-        if (is_ssml) stream = speech_synthesizer.SynthesizeSsmlToStreamAsync(text).get();
-        else stream = speech_synthesizer.SynthesizeTextToStreamAsync(text).get();
-    }, "Failed to synthesize speech", __LINE__, cmd_id)) return;
-
-    speak_revoker = {};  // delete any revokers previously installed
-    MediaSource source(MediaSource::CreateFromStream(stream, stream.ContentType()));
-
-    speak_revoker.playback_state_changed = media_player.PlaybackSession().PlaybackStateChanged(
-            winrt::auto_revoke, [cmd_id](auto session, auto const&) {
-        if (main_loop_is_running.load()) output(
-            cmd_id, "playback_state_changed", {{"state", session.PlaybackState()}});
-    });
-    speak_revoker.media_opened = media_player.MediaOpened(winrt::auto_revoke, [cmd_id](auto player, auto const&) {
-        if (main_loop_is_running.load()) output(
-            cmd_id, "media_state_changed", {{"state", "opened"}});
-    });
-    speak_revoker.media_ended = media_player.MediaEnded(winrt::auto_revoke, [cmd_id](auto player, auto const&) {
-        if (main_loop_is_running.load()) output(
-            cmd_id, "media_state_changed", {{"state", "ended"}});
-    });
-    speak_revoker.media_failed = media_player.MediaFailed(winrt::auto_revoke, [cmd_id](auto player, auto const& args) {
-        if (main_loop_is_running.load()) output(
-            cmd_id, "media_state_changed", {{"state", "failed"}, {"error", args.ErrorMessage()}, {"hr", json_val::from_hresult(args.ExtendedErrorCode())}, {"code", args.Error()}});
-    });
-    auto playback_item = std::make_shared<MediaPlaybackItem>(source);
-
-    speak_revoker.timed_metadata_tracks_changed = playback_item->TimedMetadataTracksChanged(winrt::auto_revoke,
-        [cmd_id, playback_item_weak_ref = std::weak_ptr(playback_item), marks](auto, auto const &args) {
-        auto change_type = args.CollectionChange();
-        long index;
-        switch (change_type) {
-            case CollectionChange::ItemInserted: index = args.Index(); break;
-            case CollectionChange::Reset: index = -1; break;
-            default: index = -2; break;
-        }
-        auto pi{ playback_item_weak_ref.lock() };
-        if (index > -2 && pi && main_loop_is_running.load()) register_metadata_handler_for_track(pi->TimedMetadataTracks(), index, cmd_id, marks);
-    });
-
-    for (uint32_t i = 0; i < playback_item->TimedMetadataTracks().Size(); i++) {
-        register_metadata_handler_for_track(playback_item->TimedMetadataTracks(), i, cmd_id, marks);
-    }
-    media_player.Source(*playback_item);
-}
-// }}}
-
-// Save {{{
-static void
-save_stream(SpeechSynthesisStream const &&stream, std::filesystem::path path, id_type cmd_id) {
-    unsigned long long stream_size = stream.Size(), bytes_read = 0;
-    DataReader reader(stream);
-    unsigned int n;
-    const static unsigned int chunk_size = 16 * 1024;
-    std::array<uint8_t, chunk_size> buf;
-    std::ofstream outfile;
-    if (!run_catching_exceptions([&](){
-        outfile.open(path.string(), std::ios::out | std::ios::trunc);
-    }, "Failed to create file: " + path.string(), __LINE__, cmd_id)) return;
-
-    while (bytes_read < stream_size) {
-        if (!run_catching_exceptions([&]() {
-            n = reader.LoadAsync(chunk_size).get();
-        }, "Failed to load data from DataReader", __LINE__, cmd_id)) return;
-        if (n > 0) {
-            bytes_read += n;
-            if (!run_catching_exceptions([&]() {
-                reader.ReadBytes(winrt::array_view(buf.data(), buf.data() + n));
-                outfile.write((const char*)buf.data(), n);
-                if (!outfile.good()) throw "Failed to write to output file";
-            }, "Failed to save bytes from DataReader to file", __LINE__, cmd_id)) return;
-        }
-    }
-    output(cmd_id, "saved", {{"size", bytes_read}});
-}
-
-static void
-handle_save(id_type cmd_id, std::vector<std::wstring_view> &parts) {
-    bool is_ssml;
-    try {
-        is_ssml = parts.at(0) == L"ssml";
-    } catch (std::exception const&) {
-        throw "Not a well formed save command"s;
-    }
-    std::vector<wchar_t> buf;
-    std::wstring address;
-    Marks marks;
-    std::wstring_view text = read_from_shm(cmd_id, parts.at(1), std::wstring(parts.at(2)), buf, marks);
-    if (text.size() == 0) return;
-    parts.erase(parts.begin(), parts.begin() + 3);
-    *((wchar_t*)text.data() + text.size()) = 0;  // ensure NULL termination
-    auto filename = join(parts);
-    auto path = std::filesystem::absolute(filename);
-    output(cmd_id, "saving", {{"ssml", is_ssml}, {"output_path", path.string()}});
-    SpeechSynthesisStream stream{nullptr};
-    speech_synthesizer.Options().IncludeSentenceBoundaryMetadata(false);
-    speech_synthesizer.Options().IncludeWordBoundaryMetadata(false);
-    if (!run_catching_exceptions([&]() {
-        if (is_ssml) stream = speech_synthesizer.SynthesizeSsmlToStreamAsync(text).get();
-        else stream = speech_synthesizer.SynthesizeTextToStreamAsync(text).get();
-    }, "Failed to synthesize speech", __LINE__, cmd_id)) return;
-    save_stream(std::move(stream), path, cmd_id);
-}
-// }}}
-
-
-typedef std::function<void(id_type, std::vector<std::wstring_view>, int64_t*)> handler_function;
-
-static DeviceInformationKind
-get_device_kind(const std::wstring x) {
-    if (x == L"device") return DeviceInformationKind::Device;
-    if (x == L"association_endpoint") return DeviceInformationKind::AssociationEndpoint;
-    if (x == L"association_endpoint_container") return DeviceInformationKind::AssociationEndpointContainer;
-    if (x == L"association_endpoint_service") return DeviceInformationKind::AssociationEndpointService;
-    if (x == L"device_container") return DeviceInformationKind::DeviceContainer;
-    if (x == L"device_interface") return DeviceInformationKind::DeviceInterface;
-    if (x == L"device_interface_class") return DeviceInformationKind::DeviceInterfaceClass;
-    if (x == L"device_panel") return DeviceInformationKind::DevicePanel;
-    return DeviceInformationKind::Unknown;
-}
-
-static const std::unordered_map<std::string, handler_function> handlers = {
-
-    {"exit", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t* exit_code) {
-        try {
-            *exit_code = parse_id(parts.at(0));
-        } catch(...) { }
-        *exit_code = 0;
-    }},
-
-    {"echo", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
-        output(cmd_id, "echo", {{"msg", join(parts)}});
-    }},
-
-    {"play", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
-        media_player.Play();
-        output(cmd_id, "play", {{"playback_state", media_player.PlaybackSession().PlaybackState()}});
-    }},
-
-    {"pause", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
-        media_player.Pause();
-        output(cmd_id, "pause", {{"playback_state", media_player.PlaybackSession().PlaybackState()}});
-    }},
-
-    {"state", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
-        auto ps = media_player.PlaybackSession();
-        if (ps) output(cmd_id, "state", {{"playback_state", ps.PlaybackState()}});
-        else output(cmd_id, "state", {{"playback_state", ""}});
-    }},
-
-    {"default_voice", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
-        output(cmd_id, "default_voice", {{"voice", SpeechSynthesizer::DefaultVoice()}});
-    }},
-
-    {"all_voices", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
-        output(cmd_id, "all_voices", {{"voices", SpeechSynthesizer::AllVoices()}});
-    }},
-
-    {"all_audio_devices", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
-        output(cmd_id, "all_audio_devices", {{"devices", DeviceInformation::FindAllAsync(MediaDevice::GetAudioRenderSelector()).get()}});
-    }},
-
-    {"speak", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
-        handle_speak(cmd_id, parts);
-    }},
-
-    {"audio_device", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
-        bool found = false;
-        if (parts.size()) {
-            auto device_kind = std::wstring(parts.at(0));
-            parts.erase(parts.begin(), parts.begin() + 1);
-            auto device_id = join(parts);
-            auto di = DeviceInformation::CreateFromIdAsync(device_id, {}, get_device_kind(device_kind)).get();
-            if (di) {
-                media_player.AudioDevice(di);
-                found = true;
-            }
-        }
-        auto x = media_player.AudioDevice();
-        if (x) output(cmd_id, "audio_device", {{"device", x}, {"found", found}});
-        else output(cmd_id, "audio_device", {{"device", ""}, {"found", found}});
-    }},
-
-    {"voice", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
-        bool found = false;
-        if (parts.size()) {
-            auto voice_id = winrt::hstring(parts.at(0));
-            if (voice_id == L"__default__") {
-                voice_id = SpeechSynthesizer::DefaultVoice().Id();
-            }
-            for (auto const &candidate : SpeechSynthesizer::AllVoices()) {
-                if (candidate.Id() == voice_id) {
-                    speech_synthesizer.Voice(candidate);
-                    found = true;
-                    break;
-                }
-            }
-        }
-        auto x = speech_synthesizer.Voice();
-        if (x) output(cmd_id, "voice", {{"voice", speech_synthesizer.Voice()}, {"found", found}});
-        else output(cmd_id, "voice", {{"voice", ""}, {"found", found}});
-    }},
-
-    {"volume", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
-        if (parts.size()) {
-            auto vol = parse_double(parts.at(0).data());
-            if (vol < 0 || vol > 1) throw std::out_of_range("Invalid volume value must be between 0 and 1");
-            speech_synthesizer.Options().AudioVolume(vol);
-        }
-        output(cmd_id, "volume", {{"value", speech_synthesizer.Options().AudioVolume()}});
-    }},
-
-    {"rate", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
-        if (parts.size()) {
-            auto rate = parse_double(parts.at(0).data());
-            if (rate < 0.5 || rate > 6.0) throw std::out_of_range("Invalid rate value must be between 0.5 and 6");
-            speech_synthesizer.Options().SpeakingRate(rate);
-        }
-        output(cmd_id, "rate", {{"value", speech_synthesizer.Options().SpeakingRate()}});
-    }},
-
-    {"pitch", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
-        if (parts.size()) {
-            auto pitch = parse_double(parts.at(0).data());
-            if (pitch < 0 || pitch > 2) throw std::out_of_range("Invalid pitch value must be between 0 and 2");
-            speech_synthesizer.Options().AudioPitch(pitch);
-        }
-        output(cmd_id, "pitch", {{"value", speech_synthesizer.Options().AudioPitch()}});
-    }},
-
-    {"save", [](id_type cmd_id, std::vector<std::wstring_view> parts, int64_t*) {
-        handle_save(cmd_id, parts);
-    }},
-};
-
-
-static int64_t
-handle_stdin_message(winrt::hstring const &&msg) {
-    if (msg == L"exit") {
-        return 0;
-    }
-    id_type cmd_id;
-    std::wstring_view command;
-    bool ok = false;
-    std::vector<std::wstring_view> parts;
-    int64_t exit_code = -1;
-    if (!run_catching_exceptions([&]() {
-        parts = split(msg);
-        command = parts.at(1); cmd_id = parse_id(parts.at(0));
-        if (cmd_id == 0) {
-            throw std::exception("Command id of zero is not allowed");
-        }
-        parts.erase(parts.begin(), parts.begin() + 2);
-        ok = true;
-    }, "Invalid input message: " + winrt::to_string(msg), __LINE__)) return exit_code;
-    handler_function handler;
-    std::string cmd(winrt::to_string(command));
-    try {
-        handler = handlers.at(cmd.c_str());
-    } catch (std::out_of_range) {
-        output_error(cmd_id, "Unknown command", cmd, __LINE__);
-        return exit_code;
-    }
-    run_catching_exceptions([&]() {
-        handler(cmd_id, parts, &exit_code);
-    }, "Error handling input message", __LINE__, cmd_id);
-    return exit_code;
-}
-
-#define INITIALIZE_FAILURE_MESSAGE  "Failed to initialize SpeechSynthesizer and MediaPlayer"
-
-static PyObject*
-run_main_loop(PyObject*, PyObject*) {
-    if (!run_catching_exceptions([]() {
-        std::cout.imbue(std::locale("C"));
-        std::cin.imbue(std::locale("C"));
-        std::cerr.imbue(std::locale("C"));
-        std::wcin.imbue(std::locale("C"));
-        std::wcout.imbue(std::locale("C"));
-        std::wcerr.imbue(std::locale("C"));
-    }, "Failed to set stdio locales to C", __LINE__)) {
-        return PyLong_FromLongLong(1);
-    }
-
-    if (!run_catching_exceptions([]() {
-    winrt::init_apartment(winrt::apartment_type::multi_threaded);
-    }, "Failed to initialize COM", __LINE__)) {
-        return PyLong_FromLongLong(1);
-    }
-
-    main_thread_id = GetCurrentThreadId();
-
-    if (!run_catching_exceptions([]() {
-        speech_synthesizer = SpeechSynthesizer();
-        media_player = MediaPlayer();
-        media_player.AudioCategory(MediaPlayerAudioCategory::Speech);
-        media_player.AutoPlay(true);
-    }, INITIALIZE_FAILURE_MESSAGE, __LINE__)) {
-        return PyLong_FromLongLong(1);
-    }
-
-    if (_isatty(_fileno(stdin))) {
-        std::cout << "Welcome to winspeech. Type exit to quit." << std::endl;
-    }
-    int64_t exit_code = -1;
-    main_loop_is_running.store(true);
-
-    Py_BEGIN_ALLOW_THREADS;
-    std::string input_buffer;
-    while (exit_code < 0) {
-        try {
-            if (!std::getline(std::cin, input_buffer)) {
-                if (!std::cin.eof()) exit_code = 1;
-                break;
-            }
-            rtrim(input_buffer);
-            if (input_buffer.size() > 0) {
-                run_catching_exceptions([&]() {
-                    exit_code = handle_stdin_message(std::move(winrt::to_hstring(input_buffer)));
-                }, "Error handling STDIN message", __LINE__);
-                if (exit_code >= 0) break;
-            }
-        } catch(...) {
-            exit_code = 1;
-            output_error(0, "Unknown exception type reading and handling line of input", "", __LINE__);
-            break;
-        }
-    }
-    Py_END_ALLOW_THREADS;
-
-    main_loop_is_running.store(false);
-    try {
-        speak_revoker = {};
-        speech_synthesizer = SpeechSynthesizer{nullptr};
-        media_player = MediaPlayer{nullptr};
-    } catch(...) {}
-
-    return PyLong_FromLongLong(exit_code);
-}
-
-#define M(name, args) { #name, name, args, ""}
-static PyMethodDef methods[] = {
-    M(run_main_loop, METH_NOARGS),
-    {NULL, NULL, 0, NULL}
-};
-#undef M
-
-static int
-exec_module(PyObject *m) {
-    PyModule_AddStringMacro(m, INITIALIZE_FAILURE_MESSAGE);
-    return 0;
-}
-
-static PyModuleDef_Slot slots[] = { {Py_mod_exec, (void*)exec_module}, {0, NULL} };
-
-static struct PyModuleDef module_def = {PyModuleDef_HEAD_INIT};
-
-PyMODINIT_FUNC PyInit_winspeech(void) {
-    module_def.m_name     = "winspeech";
-    module_def.m_doc      = "Windows Speech API wrapper";
-    module_def.m_methods  = methods;
-    module_def.m_slots    = slots;
-	return PyModuleDef_Init(&module_def);
-}
diff --git a/src/calibre/utils/windows/winspeech.py b/src/calibre/utils/windows/winspeech.py
deleted file mode 100644
index 0045361630..0000000000
--- a/src/calibre/utils/windows/winspeech.py
+++ /dev/null
@@ -1,573 +0,0 @@
-#!/usr/bin/env python
-# License: GPLv3 Copyright: 2023, Kovid Goyal <kovid at kovidgoyal.net>
-
-
-import json
-import os
-import struct
-import sys
-from contextlib import closing, suppress
-from enum import Enum, auto
-from itertools import count
-from queue import Empty, Queue
-from threading import Thread
-from time import monotonic
-from typing import NamedTuple, Optional, Tuple
-
-from calibre.constants import DEBUG
-from calibre.utils.ipc.simple_worker import start_pipe_worker
-from calibre.utils.shm import SharedMemory
-
-SSML_SAMPLE = '''
-<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="en-US">
-    <voice name="en-US-AriaNeural">
-        We are selling <bookmark mark='flower_1'/>roses and <bookmark mark='flower_2'/>daisies.
-    </voice>
-</speak>
-'''
-
-def start_worker():
-    return start_pipe_worker('from calibre_extensions.winspeech import run_main_loop; raise SystemExit(run_main_loop())')
-
-
-def max_buffer_size(text) -> int:
-    if isinstance(text, str):
-        text = [text]
-    ans = 0
-    for x in text:
-        if isinstance(x, int):
-            ans += 5
-        else:
-            ans += 4 * len(x)
-    return ans
-
-
-def encode_to_file_object(text, output) -> int:
-    if isinstance(text, str):
-        text = [text]
-    p = struct.pack
-    sz = 0
-    for x in text:
-        if isinstance(x, int):
-            output.write(b'\0')
-            output.write(p('=I', x))
-            sz += 5
-        else:
-            b = x.encode('utf-8')
-            output.write(b)
-            sz += len(b)
-    return sz
-
-
-# message decoding {{{
-class Saving(NamedTuple):
-    related_to: int
-    ssml: bool
-    output_path: str
-
-
-class Saved(NamedTuple):
-    related_to: int
-    size: int
-
-
-class CueEntered(NamedTuple):
-    related_to: int
-    start_pos_in_text: int
-    end_pos_in_text: int
-    start_time: int
-    type: str
-    text: str
-
-
-class CueExited(CueEntered):
-    related_to: int
-    start_pos_in_text: int
-    end_pos_in_text: int
-    start_time: int
-    type: str
-
-
-class MarkReached(NamedTuple):
-    related_to: int
-    id: int
-
-
-class SpeechError(OSError):
-
-    def __init__(self, err, msg=''):
-        val = 'There was an error in the Windows Speech subsystem. '
-        if msg:
-            val += f'{msg}. '
-        val += err.msg + ': ' + err.error + f'\nFile: {err.file} Line: {err.line}'
-        if err.hr:
-            # List of mediaserver errors is here: https://www.hresult.info/FACILITY_MEDIASERVER
-            val += f' HRESULT: 0x{err.hr:x}'
-        super().__init__(val)
-
-
-class NoAudioDevices(OSError):
-    display_to_user = True
-    def __init__(self):
-        super().__init__(_('No active audio output devices found.'
-                           ' Connect headphones or speakers. If you are using Remote Desktop then enable Remote Audio for it.'))
-
-
-class NoMediaPack(OSError):
-    display_to_user = True
-
-    def __init__(self):
-        super().__init__(_('This computer is missing the Windows MediaPack, or the DLLs are corrupted. This is needed for Read aloud. Instructions'
-                           ' for installing it are available at {}').format(
-
-            'https://support.medal.tv/support/solutions/articles/48001157311-windows-is-missing-media-pack'))
-
-
-class Error(NamedTuple):
-    msg: str
-    error: str = ''
-    line: int = 0
-    file: str = 'winspeech.py'
-    hr: str = 0
-    related_to: int = 0
-
-    def as_exception(self, msg='', check_for_no_audio_devices=False):
-        from calibre_extensions.winspeech import INITIALIZE_FAILURE_MESSAGE
-        if check_for_no_audio_devices and self.hr == 0xc00d36fa:
-            return NoAudioDevices()
-        if check_for_no_audio_devices and self.hr == 0x80070002 and self.msg == INITIALIZE_FAILURE_MESSAGE:
-            return NoMediaPack()
-        return SpeechError(self, msg)
-
-
-class Synthesizing(NamedTuple):
-    related_to: int
-    ssml: bool
-    num_marks: int
-    text_length: int
-
-
-class TrackFailed(NamedTuple):
-    related_to: int
-    code: str
-    hr: str
-
-
-class PlaybackState(Enum):
-    none = auto()
-    opening = auto()
-    buffering = auto()
-    playing = auto()
-    paused = auto()
-
-
-class PlaybackStateChanged(NamedTuple):
-    related_to: int
-    state: PlaybackState
-
-
-class MediaState(Enum):
-    opened = auto()
-    ended = auto()
-    failed = auto()
-
-
-class MediaPlayerError(Enum):
-    unknown = auto()
-    aborted = auto()
-    network_error = auto()
-    decoding_error = auto()
-    source_not_supported = auto()
-
-
-class MediaStateChanged(NamedTuple):
-    related_to: int
-    state: MediaState
-    error: str = ""
-    code: MediaPlayerError = MediaPlayerError.unknown
-    hr: int = 0
-
-    def as_exception(self):
-        err = Error("Playback of speech stream failed", self.error + f' ({self.code})', hr=self.hr)
-        return err.as_exception(check_for_no_audio_devices=True)
-
-
-class Echo(NamedTuple):
-    related_to: int
-    msg: str
-
-
-class Play(NamedTuple):
-    related_to: int
-    playback_state: PlaybackState
-
-
-class Pause(NamedTuple):
-    related_to: int
-    playback_state: PlaybackState
-
-
-class State(NamedTuple):
-    related_to: int
-    playback_state: PlaybackState
-
-
-class VoiceInformation(NamedTuple):
-    display_name: str
-    description: str
-    id: str
-    language: str
-    gender: str
-
-
-class DefaultVoice(NamedTuple):
-    related_to: int
-    voice: VoiceInformation
-
-
-class Voice(NamedTuple):
-    related_to: int
-    voice: Optional[VoiceInformation]
-    found: bool = True
-
-
-class DeviceInformation(NamedTuple):
-    id: str
-    name: str
-    kind: str
-    is_default: bool
-    is_enabled: bool
-
-    def spec(self) -> Tuple[str, str]:
-        return self.kind, self.id
-
-
-class AudioDevice(NamedTuple):
-    related_to: int
-    device: Optional[DeviceInformation]
-    found: bool = True
-
-
-class AllAudioDevices(NamedTuple):
-    related_to: int
-    devices: Tuple[DeviceInformation, ...]
-
-
-class AllVoices(NamedTuple):
-    related_to: int
-    voices: Tuple[VoiceInformation, ...]
-
-
-class Volume(NamedTuple):
-    related_to: int
-    value: float
-
-
-class Rate(NamedTuple):
-    related_to: int
-    value: float
-
-
-class Pitch(NamedTuple):
-    related_to: int
-    value: float
-
-
-def parse_message(line):
-    parts = line.strip().split(b' ', 2)
-    msg_id, msg_type, ans = int(parts[0]), parts[1].decode(), json.loads(parts[2])
-    ans['related_to'] = msg_id
-    if msg_type == 'cue_entered':
-        return CueEntered(**ans)
-    if msg_type == 'cue_exited':
-        return CueExited(**ans)
-    if msg_type == 'mark_reached':
-        return MarkReached(**ans)
-    if msg_type == 'playback_state_changed':
-        ans['state'] = getattr(PlaybackState, ans['state'])
-        return PlaybackStateChanged(**ans)
-    if msg_type == 'media_state_changed':
-        ans['state'] = getattr(MediaState, ans['state'])
-        if 'code' in ans:
-            ans['code'] = getattr(MediaPlayerError, ans['code'])
-        if 'hr' in ans:
-            ans['hr'] = int(ans['hr'], 16)
-        return MediaStateChanged(**ans)
-    if msg_type == 'error':
-        if 'hr' in ans:
-            ans['hr'] = int(ans['hr'], 16)
-        return Error(**ans)
-    if msg_type == 'synthesizing':
-        return Synthesizing(**ans)
-    if msg_type == 'track_failed':
-        return TrackFailed(**ans)
-    if msg_type == 'saving':
-        return Saving(**ans)
-    if msg_type == 'saved':
-        return Saved(**ans)
-    if msg_type == 'echo':
-        return Echo(**ans)
-    if msg_type == 'play':
-        ans['playback_state'] = getattr(PlaybackState, ans['playback_state'])
-        return Play(**ans)
-    if msg_type == 'pause':
-        ans['playback_state'] = getattr(PlaybackState, ans['playback_state'])
-        return Pause(**ans)
-    if msg_type == 'state':
-        ans['playback_state'] = getattr(PlaybackState, ans['playback_state'])
-        return State(**ans)
-    if msg_type == 'default_voice':
-        ans['voice'] = VoiceInformation(**ans['voice'])
-        return DefaultVoice(**ans)
-    if msg_type == 'all_voices':
-        ans['voices'] = tuple(VoiceInformation(**x) for x in ans['voices'])
-        return AllVoices(**ans)
-    if msg_type == 'all_audio_devices':
-        ans['devices'] = tuple(DeviceInformation(**x) for x in ans['devices'])
-        return AllAudioDevices(**ans)
-    if msg_type == 'audio_device':
-        if ans['device']:
-            ans['device'] = DeviceInformation(**ans['device'])
-        else:
-            ans['device'] = None
-        return AudioDevice(**ans)
-    if msg_type == 'voice':
-        if ans['voice']:
-            ans['voice'] = VoiceInformation(**ans['voice'])
-        else:
-            ans['voice'] = None
-        return Voice(**ans)
-    if msg_type == 'volume':
-        return Volume(**ans)
-    if msg_type == 'rate':
-        return Rate(**ans)
-    if msg_type == 'Pitch':
-        return Pitch(**ans)
-    return Error(f'Unknown message type: {msg_type}')
-# }}}
-
-
-class WinSpeech:
-
-    def __init__(self, event_dispatcher=print):
-        self._worker = None
-        self.queue = Queue()
-        self.msg_id_counter = count()
-        next(self.msg_id_counter)
-        self.pending_messages = []
-        self.current_speak_cmd_id = 0
-        self.waiting_for = -1
-        self.event_dispatcher = event_dispatcher
-
-    @property
-    def worker(self):
-        if self._worker is None:
-            self._worker = start_worker()
-            Thread(name='WinspeechQueue', target=self._get_messages, args=(self._worker, self.queue), daemon=True).start()
-        return self._worker
-
-    def __del__(self):
-        if self._worker is not None:
-            self.send_command('exit')
-            with suppress(Exception):
-                self._worker.wait(0.3)
-            if self._worker.poll() is None:
-                self._worker.kill()
-            self._worker = None
-    shutdown = __del__
-
-    def _get_messages(self, worker, queue):
-        def send_msg(msg):
-            if self.waiting_for == msg.related_to:
-                self.queue.put(msg)
-            else:
-                self.dispatch_message(msg)
-        try:
-            for line in worker.stdout:
-                line = line.strip()
-                if DEBUG:
-                    with suppress(Exception):
-                        print('winspeech:\x1b[32m<-\x1b[39m', line.decode('utf-8', 'replace'), flush=True)
-                send_msg(parse_message(line))
-        except OSError as e:
-            send_msg(Error('Failed to read from worker', str(e)))
-        except Exception as e:
-            send_msg(Error('Failed to parse message from worker', str(e)))
-
-    def send_command(self, cmd):
-        cmd_id = next(self.msg_id_counter)
-        w = self.worker
-        cmd = f'{cmd_id} {cmd}'
-        if DEBUG:
-            with suppress(Exception):
-                print('winspeech:\x1b[31m->\x1b[39m', cmd, flush=True)
-        w.stdin.write(f'{cmd}\n'.encode())
-        w.stdin.flush()
-        return cmd_id
-
-    def wait_for(self, error_msg, *classes, related_to=-1, timeout=4):
-        orig, self.waiting_for = self.waiting_for, related_to
-        try:
-            limit = monotonic() + timeout
-            while True:
-                left = limit - monotonic()
-                if left <= 0:
-                    break
-                try:
-                    x = self.queue.get(True, left)
-                except Empty:
-                    break
-                if (not classes or isinstance(x, *classes)) and (not related_to or x.related_to == related_to):
-                    return x
-                if isinstance(x, Error) and (not related_to or x.related_to == related_to):
-                    raise x.as_exception(error_msg)
-            raise TimeoutError('Timed out waiting for: ' + error_msg)
-        finally:
-            self.waiting_for = orig
-
-    def speak(self, text, is_cued=False, is_xml=False):
-        with SharedMemory(size=max_buffer_size(text)) as shm:
-            st = 'cued' if is_cued else ('ssml' if is_xml else 'text')
-            sz = encode_to_file_object(text, shm)
-            self.current_speak_cmd_id = self.send_command(f'speak {st} shm {sz} {shm.name}')
-            self.wait_for('speech synthesis to start', Synthesizing, related_to=self.current_speak_cmd_id, timeout=8)
-        return self.current_speak_cmd_id
-
-    def dispatch_message(self, x):
-        if x.related_to == self.current_speak_cmd_id:
-            if isinstance(x, (Error, MediaStateChanged, MarkReached)):
-                self.event_dispatcher(x)
-
-    def pause(self):
-        self.wait_for('pause', Pause, related_to=self.send_command('pause'))
-
-    def play(self):
-        self.wait_for('play', Play, related_to=self.send_command('play'))
-
-    def set_rate(self, val):
-        val = float(val)
-        self.wait_for('Setting the rate', Rate, related_to=self.send_command(f'rate {val}'))
-
-    def set_voice(self, spec, default_system_voice):
-        val = spec or getattr(default_system_voice, 'id', '__default__')
-        x = self.wait_for('Setting the voice', Voice, related_to=self.send_command(f'voice {val}'))
-        if not x.found:
-            raise SpeechError(f'Failed to find the voice: {val}')
-
-    def set_audio_device(self, spec, default_system_audio_device):
-        if not spec and not default_system_audio_device:
-            return
-        if not spec:
-            spec = default_system_audio_device.spec()
-        x = self.wait_for('Setting the audio device', AudioDevice, related_to=self.send_command(f'audio_device {spec[0]} {spec[1]}'))
-        if not x.found:
-            raise SpeechError(f'Failed to find the audio device: {spec}')
-
-    def get_audio_device(self):
-        return self.wait_for('Audio device', AudioDevice, related_to=self.send_command('audio_device'))
-
-    def default_voice(self):
-        return self.wait_for('Default voice', DefaultVoice, related_to=self.send_command('default_voice'))
-
-    def all_voices(self):
-        return self.wait_for('All voices', AllVoices, related_to=self.send_command('all_voices'))
-
-    def all_audio_devices(self):
-        return self.wait_for('All audio devices', AllAudioDevices, related_to=self.send_command('all_audio_devices'))
-
-
-
-# develop {{{
-def develop_loop(*commands):
-    p = start_worker()
-    q = Queue()
-
-    def echo_output(p):
-        for line in p.stdout:
-            sys.stdout.buffer.write(b'\x1b[33m' + line + b'\x1b[39m]]'[:-2])
-            sys.stdout.buffer.flush()
-            q.put(parse_message(line))
-
-    def send(*a):
-        cmd = ' '.join(map(str, a)) + '\n'
-        p.stdin.write(cmd.encode())
-        p.stdin.flush()
-
-    Thread(name='Echo', target=echo_output, args=(p,), daemon=True).start()
-    exit_code = 0
-    with closing(p.stdin), closing(p.stdout):
-        try:
-            send('1 echo Synthesizer started')
-            send('1 volume 0.1')
-            for command in commands:
-                if isinstance(command, str):
-                    send(command)
-                else:
-                    while True:
-                        m = q.get()
-                        if m.related_to != command:
-                            continue
-                        if isinstance(m, MediaStateChanged) and m.state in (MediaState.ended, MediaState.failed):
-                            break
-                        if isinstance(m, Saved):
-                            break
-                        if isinstance(m, Error):
-                            exit_code = 1
-                            break
-            send(f'333 echo Synthesizer exiting with exit code: {exit_code}')
-            send(f'334 exit {exit_code}')
-            ec = p.wait(1)
-            print(f'Worker exited with code: {os.waitstatus_to_exitcode(p.wait(1))}', file=sys.stderr, flush=True)
-            raise SystemExit(ec)
-        finally:
-            if p.poll() is None:
-                p.kill()
-                raise SystemExit(1)
-
-
-def develop_speech(text='Lucca Brazzi sleeps with the fishes.', mark_words=True):
-    print('\x1b[32mSpeaking', text, '\x1b[39m]]'[:-2], flush=True)
-    st = 'ssml' if '<speak' in text else 'text'
-    if mark_words:
-        st = 'cued'
-        words = text.split()
-        text = []
-        for i, w in enumerate(words):
-            text.append(i+1)
-            text.append(w)
-            if w is not words[-1]:
-                text.append(' ')
-
-    with SharedMemory(size=max_buffer_size(text)) as shm:
-        sz = encode_to_file_object(text, shm)
-        develop_loop(f'2 speak {st} shm {sz} {shm.name}', 2)
-
-
-def develop_save(text='Lucca Brazzi sleeps with the fishes.', filename="speech.wav"):
-    print('\x1b[32mSaving', text, '\x1b[39m]]'[:-2], flush=True)
-    st = 'ssml' if '<speak' in text else 'text'
-    with SharedMemory(size=max_buffer_size(text)) as shm:
-        sz = encode_to_file_object(text, shm)
-        develop_loop(f'2 save {st} {sz} {shm.name} {filename}', 2)
-
-
-def develop_interactive():
-    import subprocess
-
-    from calibre.debug import run_calibre_debug
-    print('\x1b[32mInteractive winspeech', '\x1b[39m]]'[:-2], flush=True)
-    p = run_calibre_debug('-c', 'from calibre_extensions.winspeech import run_main_loop; raise SystemExit(run_main_loop())',
-                          stdin=subprocess.PIPE)
-    try:
-        while True:
-            line = input()
-            if p.poll() is not None:
-                raise SystemExit(p.returncode)
-            p.stdin.write((line + '\n').encode())
-            p.stdin.flush()
-    except KeyboardInterrupt:
-        print('Exiting on interrupt', flush=True)
-    finally:
-        if p.poll() is None:
-            p.kill()
-# }}}