From 661499e391cc212964fe5ab1bb027e467b0ee0bd Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 29 Jul 2025 13:14:40 +0530 Subject: [PATCH] Move piper backend to use new code --- src/calibre/gui2/tts/piper.py | 384 +++++++-------------------------- src/calibre/gui2/tts/types.py | 13 +- src/calibre/test_build.py | 9 +- src/calibre/utils/tts/piper.py | 21 +- 4 files changed, 105 insertions(+), 322 deletions(-) diff --git a/src/calibre/gui2/tts/piper.py b/src/calibre/gui2/tts/piper.py index 3033ba6e92..ed25a33c38 100644 --- a/src/calibre/gui2/tts/piper.py +++ b/src/calibre/gui2/tts/piper.py @@ -2,44 +2,26 @@ # License: GPLv3 Copyright: 2024, Kovid Goyal import atexit -import io import json import os -import re import sys from collections import deque from collections.abc import Iterable, Iterator from contextlib import suppress from dataclasses import dataclass -from functools import lru_cache from itertools import count from time import monotonic -from typing import BinaryIO -from qt.core import ( - QAudio, - QAudioFormat, - QAudioSink, - QByteArray, - QIODevice, - QIODeviceBase, - QMediaDevices, - QObject, - QProcess, - Qt, - QTextToSpeech, - QWidget, - pyqtSignal, - sip, -) +from qt.core import QAudio, QAudioFormat, QAudioSink, QByteArray, QIODevice, QIODeviceBase, QMediaDevices, QObject, Qt, QTextToSpeech, QWidget, pyqtSignal, sip -from calibre.constants import cache_dir, is_debugging, iswindows, piper_cmdline +from calibre.constants import cache_dir, is_debugging, iswindows from calibre.gui2 import error_dialog from calibre.gui2.tts.types import TTS_EMBEDED_CONFIG, EngineSpecificSettings, Quality, TTSBackend, Voice, widget_parent from calibre.spell.break_iterator import PARAGRAPH_SEPARATOR, split_into_sentences_for_tts from calibre.utils.filenames import ascii_text from calibre.utils.localization import canonicalize_lang, get_lang from calibre.utils.resources import get_path as P +from calibre.utils.tts.piper import SynthesisResult, global_piper_instance, global_piper_instance_if_exists, play_pcm_data HIGH_QUALITY_SAMPLE_RATE = 22050 @@ -60,20 +42,14 @@ def audio_format(audio_rate: int = HIGH_QUALITY_SAMPLE_RATE) -> QAudioFormat: return fmt -def piper_process_metadata(model_path, config_path, s: EngineSpecificSettings, voice: Voice) -> tuple[int, list[str]]: +def piper_process_metadata(callback, model_path, config_path, s: EngineSpecificSettings, voice: Voice) -> int: if not model_path: raise Exception('Could not download voice data') if 'metadata' not in voice.engine_data: with open(config_path) as f: voice.engine_data['metadata'] = json.load(f) - audio_rate = voice.engine_data['metadata']['audio']['sample_rate'] - length_scale = max(0.1, 1 + -1 * s.rate) # maps -1 to 1 to 2 to 0.1 - cmdline = list(piper_cmdline()) + [ - '--model', model_path, '--config', config_path, '--output-raw', '--json-input', - '--sentence-silence', str(s.sentence_delay), '--length_scale', str(length_scale)] - if is_debugging(): - cmdline.append('--debug') - return audio_rate, cmdline + return global_piper_instance().set_voice( + callback, config_path, model_path, length_scale_multiplier=s.rate, sentence_delay=s.sentence_delay) def piper_cache_dir() -> str: @@ -149,8 +125,7 @@ class Utterance: id: int start: int length: int - payload_size: int - left_to_write: QByteArray + sentence: str audio_data: QByteArray started: bool = False @@ -241,47 +216,19 @@ class UtteranceAudioQueue(QIODevice): def split_into_utterances(text: str, counter: count, lang: str = 'en'): for start, sentence in split_into_sentences_for_tts(text, lang): - payload = json.dumps({'text': sentence}).encode('utf-8') - ba = QByteArray() - ba.reserve(len(payload) + 1) - ba.append(payload) - ba.append(UTTERANCE_SEPARATOR) - u = Utterance(id=next(counter), payload_size=len(ba), audio_data=QByteArray(), - left_to_write=ba, start=start, length=len(sentence)) + u = Utterance(id=next(counter), audio_data=QByteArray(), sentence=sentence, start=start, length=len(sentence)) debug(f'Utterance created {u.id} {start=}: {sentence!r}') yield u -@lru_cache(2) -def stderr_pat(): - return re.compile(rb'\[piper\] \[([a-zA-Z0-9_]+?)\] (.+)') - - -def detect_end_of_data(data: bytes, callback): - lines = data.split(b'\n') - for line in lines[:-1]: - if m := stderr_pat().search(line): - which, payload = m.group(1), m.group(2) - if which == b'info': - debug(f'[piper-info] {payload.decode("utf-8", "replace")}') - if payload.startswith(b'Real-time factor:'): - callback(True, None) - elif which == b'error': - callback(False, payload.decode('utf-8', 'replace')) - elif which == b'debug': - debug(f'[piper-debug] {payload.decode("utf-8", "replace")}') - return lines[-1] - - class Piper(TTSBackend): engine_name: str = 'piper' filler_char: str = PARAGRAPH_SEPARATOR - _synthesis_done = pyqtSignal() + _synthesis_done = pyqtSignal(object, object, object) def __init__(self, engine_name: str = '', parent: QObject | None = None): super().__init__(parent) - self._process: QProcess | None = None self._audio_sink: QAudioSink | None = None self._current_voice: Voice | None = None @@ -296,7 +243,7 @@ class Piper(TTSBackend): self._errors_from_piper: list[str] = [] self._pending_stderr_data = b'' - self._synthesis_done.connect(self._utterance_synthesized, type=Qt.ConnectionType.QueuedConnection) + self._synthesis_done.connect(self._on_synthesis_done, type=Qt.ConnectionType.QueuedConnection) atexit.register(self.shutdown) @property @@ -304,27 +251,16 @@ class Piper(TTSBackend): self._load_voice_metadata() return {'': self._voices} - def _wait_for_process_to_start(self) -> bool: - if not self.process.waitForStarted(): - cmdline = [self.process.program()] + self.process.arguments() - if self.process.error() is QProcess.ProcessError.TimedOut: - self._set_error(f'Timed out waiting for piper process {cmdline} to start') - else: - self._set_error(f'Failed to start piper process: {cmdline}') - return False - return True - def say(self, text: str) -> None: if self._last_error: return self.stop() - if not self._wait_for_process_to_start(): - return + self.ensure_started() lang = 'en' if self._current_voice and self._current_voice.language_code: lang = self._current_voice.language_code self._utterances_being_synthesized.extend(split_into_utterances(text, self._utterance_counter, lang)) - self._write_current_utterance() + self._queue_current_utterance() def pause(self) -> None: if self._audio_sink is not None: @@ -335,29 +271,27 @@ class Piper(TTSBackend): self._audio_sink.resume() def stop(self) -> None: - if self._process is not None: + if self._audio_sink is not None: if self._state is not QTextToSpeech.State.Ready or self._utterances_being_synthesized or self._utterances_being_spoken: self.shutdown() - # We cannot re-create self.process here as that will cause the + # We cannot call ensure_started() here as that will cause the # audio device to go to active state which will cause a # speaking event to be generated def shutdown(self) -> None: - if self._process is not None: + if self._audio_sink is not None: + gp = global_piper_instance_if_exists() + if gp is not None: + gp.cancel() self._audio_sink.stateChanged.disconnect() - self._process.readyReadStandardError.disconnect() - self._process.bytesWritten.disconnect() - self._process.readyReadStandardOutput.disconnect() - self._process.stateChanged.disconnect() - self._process.kill() - self._process.waitForFinished(-1) # this dance is needed otherwise stop() is very slow on Linux self._audio_sink.suspend() self._audio_sink.reset() self._audio_sink.stop() sip.delete(self._audio_sink) - sip.delete(self._process) - self._process = self._audio_sink = None + self._audio_sink = None + self._utterances_being_synthesized.clear() + self._utterances_being_spoken.clear() self._set_state(QTextToSpeech.State.Ready) def reload_after_configure(self) -> None: @@ -379,9 +313,8 @@ class Piper(TTSBackend): self._last_error = msg self._set_state(QTextToSpeech.State.Error) - @property - def process(self) -> QProcess: - if self._process is None: + def ensure_started(self) -> None: + if self._audio_sink is None: model_path = config_path = '' try: self._load_voice_metadata() @@ -394,18 +327,9 @@ class Piper(TTSBackend): self._utterances_being_spoken.clear() self._utterances_being_synthesized.clear() self._errors_from_piper.clear() - self._process = QProcess(self) - self._pending_stderr_data = b'' self._set_state(QTextToSpeech.State.Ready) - audio_rate, cmdline = piper_process_metadata(model_path, config_path, s, voice) - self._process.setProgram(cmdline[0]) - self._process.setArguments(cmdline[1:]) - debug('Running piper:', cmdline) - self._process.readyReadStandardError.connect(self.piper_stderr_available) - self._process.readyReadStandardOutput.connect(self.piper_stdout_available) - self._process.bytesWritten.connect(self.bytes_written) - self._process.stateChanged.connect(self._update_status) + audio_rate = piper_process_metadata(self.on_synthesis_done, model_path, config_path, s, voice) fmt = audio_format(audio_rate) dev = None if s.audio_device_id: @@ -420,47 +344,37 @@ class Piper(TTSBackend): if s.volume is not None: self._audio_sink.setVolume(s.volume) self._audio_sink.stateChanged.connect(self._utterances_being_spoken.audio_state_changed) - self._process.start() self._audio_sink.start(self._utterances_being_spoken) - return self._process - def piper_stdout_available(self) -> None: - if self._utterances_being_synthesized: + def on_synthesis_done(self, sr, err, tb): + self._synthesis_done.emit(sr, err, tb) + + def _on_synthesis_done(self, sr: SynthesisResult, err: Exception, tb: str): + if self._audio_sink is None: + return + if err is not None: + self._errors_from_piper.append(str(err)) + self._errors_from_piper.append(tb) + else: u = self._utterances_being_synthesized[0] - while True: - ba = self.process.readAll() - if not len(ba): - break - debug('Synthesized data read:', len(ba), 'bytes') - u.audio_data.append(ba) - - def piper_stderr_available(self) -> None: - if self._process is not None: - def callback(ok, payload): - if ok: - if self._utterances_being_synthesized: - self._synthesis_done.emit() + if u.id == sr.utterance_id: + u.audio_data.append(sr.audio_data) + if sr.is_last: + debug(f'Utterance {u.id} got {len(sr.audio_data)} bytes of audio data from piper') + self._utterances_being_synthesized.popleft() + u.synthesized = True + if len(u.audio_data): + self._utterances_being_spoken.add_utterance(u) else: - self._errors_from_piper.append(payload.decode('utf-8', 'replace')) - data = self._pending_stderr_data + bytes(self._process.readAllStandardError()) - self._pending_stderr_data = detect_end_of_data(data, callback) - - def _utterance_synthesized(self): - self.piper_stdout_available() # just in case - u = self._utterances_being_synthesized.popleft() - u.synthesized = True - debug(f'Utterance {u.id} got {len(u.audio_data)} bytes of audio data from piper') - if len(u.audio_data): - self._utterances_being_spoken.add_utterance(u) - self._write_current_utterance() + debug(f'Synthesized data read for utterance {u.id}: {len(sr.audio_data)} bytes') + self._queue_current_utterance() self._update_status() def _update_status(self): - if self._process is not None and self._process.state() is QProcess.ProcessState.NotRunning: - if self._process.exitStatus() is not QProcess.ExitStatus.NormalExit or self._process.exitCode(): - m = '\n'.join(self._errors_from_piper) - self._set_error(f'piper process failed with exit code: {self._process.exitCode()} and error messages: {m}') - return + if self._errors_from_piper: + m = '\n'.join(self._errors_from_piper) + self._set_error(f'piper failed with error: {m}') + return if self._state is QTextToSpeech.State.Error: return state = self._utterances_being_spoken.audio_state @@ -478,21 +392,12 @@ class Piper(TTSBackend): if not self._utterances_being_synthesized and not self._utterances_being_spoken: self._set_state(QTextToSpeech.State.Ready) - def bytes_written(self, count: int) -> None: - self._write_current_utterance() - - def _write_current_utterance(self) -> None: + def _queue_current_utterance(self) -> None: if self._utterances_being_synthesized: u = self._utterances_being_synthesized[0] - while len(u.left_to_write): - written = self.process.write(u.left_to_write) - if written < 0: - self._set_error(f'Failed to write to piper process with error: {self.process.errorString()}') - break - if not u.started and written: - u.started = True - debug(f'Utterance {u.id} synthesis started') - u.left_to_write = u.left_to_write.last(len(u.left_to_write) - written) + global_piper_instance().synthesize(u.id, u.sentence) + u.started = True + debug(f'Utterance {u.id} synthesis queued') def audio_sink_state_changed(self, state: QAudio.State) -> None: self._update_status() @@ -564,7 +469,7 @@ class PiperEmbedded: lang = get_lang() lang = canonicalize_lang(lang) or lang self._default_voice = self._voice_for_lang.get(lang) or self._voice_for_lang['eng'] - self._current_voice = self._process = self._pipe_reader = None + self._current_voice = None self._current_audio_rate = 0 def resolve_voice(self, lang: str, voice_name: str) -> Voice: @@ -586,42 +491,27 @@ class PiperEmbedded: if voice is not self._current_voice: self._current_voice = voice self.shutdown() - self.ensure_process_started() - piper_done, errors_from_piper = [], [] + self.ensure_started() needs_conversion = sample_rate != self._current_audio_rate if needs_conversion: from calibre_extensions.ffmpeg import resample_raw_audio_16bit - def callback(ok, payload): - if ok: - piper_done.append(True) - else: - errors_from_piper.append(payload.decode('utf-8', 'replace')) - for text in texts: text = text.strip() if not text: yield b'', 0. continue - payload = json.dumps({'text': text}).encode('utf-8') - self._process.stdin.write(payload) - self._process.stdin.write(UTTERANCE_SEPARATOR) - self._process.stdin.flush() - stderr_data = b'' - buf = io.BytesIO() - piper_done, errors_from_piper = [], [] + all_data = [] + global_piper_instance().synthesize(1, text) + while True: + sr, exc, tb = self._queue.get() + if exc is not None: + raise Exception(f'failed to synthesize text to audio with error: {exc} and traceback: {tb}') + all_data.append(sr.audio_data) + if sr.is_last: + break - def stderr_callback(data: bytes) -> bool: - nonlocal stderr_data - stderr_data = detect_end_of_data(stderr_data + data, callback) - return not piper_done - - try: - self._pipe_reader(buf.write, stderr_callback) - except Exception as e: - raise Exception(f'Reading output from piper process failed with error: {e} and STDERR: ' + '\n'.join(errors_from_piper)) - - raw_data = buf.getvalue() + raw_data = b''.join(all_data) if needs_conversion: raw_data = resample_raw_audio_16bit(raw_data, self._current_audio_rate, sample_rate) yield raw_data, duration_of_raw_audio_data(raw_data, sample_rate) @@ -635,126 +525,23 @@ class PiperEmbedded: return True def shutdown(self): - if self._process is not None: - self._pipe_reader.close() - self._pipe_reader = None - self._process.stdin.close() - self._process.stdout.close() - self._process.stderr.close() - self._process.kill() - self._process.wait() - self._process = None + if self._current_audio_rate != 0: + gp = global_piper_instance_if_exists() + if gp is not None: + gp.cancel() + self._current_audio_rate = 0 __del__ = shutdown - def ensure_process_started(self): - if self._process is not None: - return - model_path, config_path = download_voice(self._current_voice, headless=True) - self._current_audio_rate, cmdline = piper_process_metadata(model_path, config_path, self._embedded_settings, self._current_voice) - import subprocess - self._process = subprocess.Popen(cmdline, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) - self._pipe_reader = (ThreadedPipeReader if iswindows else PipeReader)(self._process.stdout, self._process.stderr) + def on_synthesis_done(self, sr: SynthesisResult, exc: Exception, tb: str) -> None: + self._queue.put((sr, exc, tb)) - -class PipeReader: - - TIMEOUT = 30. # seconds - - def __init__(self, stdout: BinaryIO, stderr: BinaryIO): - self.stdout_fd = stdout.fileno() - self.stderr_fd = stderr.fileno() - os.set_blocking(self.stdout_fd, False) - os.set_blocking(self.stderr_fd, False) - - def close(self): - self.stderr_fd = self.stdout_fd = -1 - - def __call__(self, stdout_callback, stderr_callback): - from select import select - out, err = self.stdout_fd, self.stderr_fd - readers = out, err - buf = memoryview(bytearray(io.DEFAULT_BUFFER_SIZE)) - - def readall(fd: int) -> bytes: - output = io.BytesIO() - while True: - try: - num = os.readv(fd, (buf,)) - except BlockingIOError: - break - else: - output.write(buf[:num]) - return output.getvalue() - - def readinto(fd: int, callback) -> None: - while True: - try: - num = os.readv(fd, (buf,)) - except BlockingIOError: - break - else: - callback(buf[:num]) - - while True: - r, _, _ = select(readers, (), (), self.TIMEOUT) - if not r: - raise TimeoutError('Timed out waiting for output from piper process') - if out in r: - readinto(out, stdout_callback) - if err in r: - data = readall(err) - if not stderr_callback(data): - # In case there is new data written to stdout - readinto(out, stdout_callback) - break - - -class ThreadedPipeReader(PipeReader): - - def __init__(self, stdout: BinaryIO, stderr: BinaryIO): - from queue import Queue - from threading import Event, Thread - self.shutting_down = Event() - self.queue = Queue() - Thread(target=self._reader, args=(stdout.fileno(), True), daemon=True).start() - Thread(target=self._reader, args=(stderr.fileno(), False), daemon=True).start() - - def close(self): - self.shutting_down.set() - - def __call__(self, stdout_callback, stderr_callback): - from queue import Empty - while True: - data, is_stdout, err = self.queue.get(True, self.TIMEOUT) - if err is not None: - raise err - if data: - if is_stdout: - stdout_callback(data) - else: - if not stderr_callback(data): - # in case more data was written to stdout - while True: - try: - data, is_stdout, err = self.queue.get_nowait() - except Empty: - break - if err is not None: - raise err - if is_stdout: - stdout_callback(data) - break - - def _reader(self, pipe_fd: int, is_stdout: bool): - while not self.shutting_down.is_set(): - try: - data = os.read(pipe_fd, io.DEFAULT_BUFFER_SIZE) - except OSError as e: - if not self.shutting_down.is_set(): - self.queue.put((b'', is_stdout, e)) - break - else: - self.queue.put((data, is_stdout, None)) + def ensure_started(self): + if self._current_audio_rate == 0: + from queue import Queue + model_path, config_path = download_voice(self._current_voice, headless=True) + self._queue = Queue() + self._current_audio_rate = piper_process_metadata( + self.on_synthesis_done, model_path, config_path, self._embedded_settings, self._current_voice) def duration_of_raw_audio_data(data: bytes, sample_rate: int = HIGH_QUALITY_SAMPLE_RATE, bytes_per_sample: int = 2, num_channels: int = 1) -> float: @@ -765,25 +552,14 @@ def duration_of_raw_audio_data(data: bytes, sample_rate: int = HIGH_QUALITY_SAMP # develop {{{ def develop_embedded(): - import subprocess - - from calibre.utils.speedups import ReadOnlyFileBuffer - from calibre_extensions.ffmpeg import transcode_single_audio_stream, wav_header_for_pcm_data p = PiperEmbedded() - all_data = [b''] - sz = 0 + all_data = [] for data, duration in p.text_to_raw_audio_data(( 'Hello, good day to you.', 'This is the second sentence.', 'This is the final sentence.' )): print(f'{duration=} {len(data)=}') all_data.append(data) - sz += len(data) - all_data[0] = wav_header_for_pcm_data(sz, HIGH_QUALITY_SAMPLE_RATE) - wav = ReadOnlyFileBuffer(b''.join(all_data), name='tts.wav') - m4a = io.BytesIO() - m4a.name = 'tts.m4a' - transcode_single_audio_stream(wav, m4a) - subprocess.run(['mpv', '-'], input=m4a.getvalue()) + play_pcm_data(b''.join(all_data), HIGH_QUALITY_SAMPLE_RATE) def develop(): diff --git a/src/calibre/gui2/tts/types.py b/src/calibre/gui2/tts/types.py index fd1f8548b2..40a2aad27e 100644 --- a/src/calibre/gui2/tts/types.py +++ b/src/calibre/gui2/tts/types.py @@ -9,7 +9,7 @@ from typing import Literal, NamedTuple from qt.core import QApplication, QLocale, QObject, QTextToSpeech, QVoice, QWidget, pyqtSignal -from calibre.constants import islinux, ismacos, iswindows, piper_cmdline +from calibre.constants import islinux, ismacos, iswindows from calibre.utils.config import JSONConfig from calibre.utils.config_base import tweaks from calibre.utils.localization import canonicalize_lang @@ -234,12 +234,11 @@ def available_engines() -> dict[str, EngineMetadata]: ), True) elif x == 'speechd': continue - if piper_cmdline(): - ans['piper'] = EngineMetadata('piper', _('The Piper Neural Engine'), _( - 'The "piper" engine can track the currently spoken sentence on screen. It uses a neural network ' - 'for natural sounding voices. The neural network is run locally on your computer, it is fairly resource intensive to run.' - ), TrackingCapability.Sentence, can_change_pitch=False, voices_have_quality_metadata=True, has_managed_voices=True, - has_sentence_delay=True) + ans['piper'] = EngineMetadata('piper', _('The Piper Neural Engine'), _( + 'The "piper" engine can track the currently spoken sentence on screen. It uses a neural network ' + 'for natural sounding voices. The neural network is run locally on your computer, it is fairly resource intensive to run.' + ), TrackingCapability.Sentence, can_change_pitch=False, voices_have_quality_metadata=True, has_managed_voices=True, + has_sentence_delay=True) if islinux: try: from speechd.paths import SPD_SPAWN_CMD diff --git a/src/calibre/test_build.py b/src/calibre/test_build.py index 8a23df108e..c825cb30ad 100644 --- a/src/calibre/test_build.py +++ b/src/calibre/test_build.py @@ -123,14 +123,9 @@ class BuildTest(unittest.TestCase): from speechd.client import SSIPClient del SSIPClient - @unittest.skipIf('SKIP_SPEECH_TESTS' in os.environ, 'Speech support is opted out') def test_piper(self): - import subprocess - - from calibre.constants import piper_cmdline - self.assertTrue(piper_cmdline()) - raw = subprocess.check_output(piper_cmdline() + ('-h',), stderr=subprocess.STDOUT).decode() - self.assertIn('--sentence_silence', raw) + from calibre.utils.tts.piper import simple_test + simple_test() def test_zeroconf(self): import ifaddr diff --git a/src/calibre/utils/tts/piper.py b/src/calibre/utils/tts/piper.py index aff3cd9c7e..95e7af944a 100644 --- a/src/calibre/utils/tts/piper.py +++ b/src/calibre/utils/tts/piper.py @@ -83,6 +83,13 @@ class SynthesisResult(NamedTuple): is_last: bool +def simple_test(): + piper.initialize(espeak_data_dir()) + piper.set_espeak_voice_by_name('en-us') + if not piper.phonemize('simple test'): + raise ValueError('No phonemes returned by phonemize()') + + class Piper(Thread): def __init__(self): @@ -172,6 +179,10 @@ def global_piper_instance() -> Piper: return _global_piper_instance +def global_piper_instance_if_exists() -> Piper | None: + return _global_piper_instance + + def play_wav_data(wav_data: bytes): from qt.core import QAudioOutput, QBuffer, QByteArray, QCoreApplication, QIODevice, QMediaPlayer, QUrl app = QCoreApplication([]) @@ -190,6 +201,11 @@ def play_wav_data(wav_data: bytes): app.exec() +def play_pcm_data(pcm_data, sample_rate): + from calibre_extensions.ffmpeg import wav_header_for_pcm_data + play_wav_data(wav_header_for_pcm_data(len(pcm_data), sample_rate) + pcm_data) + + def develop(): from calibre.gui2.tts.piper import piper_cache_dir p = global_piper_instance() @@ -210,10 +226,7 @@ def develop(): print(f'Got {len(sr.audio_data)} bytes of audio data', flush=True) if sr.is_last: break - from calibre_extensions.ffmpeg import wav_header_for_pcm_data - pcm_data = b''.join(all_data) - wav_data = wav_header_for_pcm_data(len(pcm_data), sample_rate) + pcm_data - play_wav_data(wav_data) + play_pcm_data(b''.join(all_data), sample_rate) if __name__ == '__main__':