From a495b9db6a26eee74f88cff3a55b8519d4664969 Mon Sep 17 00:00:00 2001 From: Ben Xu Date: Tue, 18 Jun 2024 10:11:52 -0700 Subject: [PATCH] add latency logs --- software/source/clients/base_device.py | 2 +- software/source/server/async_interpreter.py | 49 +++++++++++++-------- 2 files changed, 32 insertions(+), 19 deletions(-) diff --git a/software/source/clients/base_device.py b/software/source/clients/base_device.py index c5ac73f..885753e 100644 --- a/software/source/clients/base_device.py +++ b/software/source/clients/base_device.py @@ -167,7 +167,7 @@ class Device: audio = await self.audiosegments.get() if self.playback_latency and isinstance(audio, bytes): elapsed_time = time.time() - self.playback_latency - # print(f"Time from request to playback: {elapsed_time} seconds") + print(f"Time from request to playback: {elapsed_time} seconds") self.playback_latency = None if self.tts_service == "elevenlabs": diff --git a/software/source/server/async_interpreter.py b/software/source/server/async_interpreter.py index 4fee556..39303d2 100644 --- a/software/source/server/async_interpreter.py +++ b/software/source/server/async_interpreter.py @@ -22,9 +22,10 @@ import os class AsyncInterpreter: def __init__(self, interpreter): - # self.stt_latency = None - # self.tts_latency = None - # self.interpreter_latency = None + self.stt_latency = None + self.tts_latency = None + self.interpreter_latency = None + self.time_from_first_yield_to_first_put = None self.interpreter = interpreter # STT @@ -125,7 +126,10 @@ class AsyncInterpreter: # Experimental: The AI voice sounds better with replacements like these, but it should happen at the TTS layer # content = content.replace(". ", ". ... ").replace(", ", ", ... ").replace("!", "! ... ").replace("?", "? ... ") - # print("yielding ", content) + print("yielding ", content) + if self.time_from_first_yield_to_first_put is None: + self.time_from_first_yield_to_first_put = time.time() + yield content # Handle code blocks @@ -156,9 +160,9 @@ class AsyncInterpreter: ) # Send a completion signal - # end_interpreter = time.time() - # self.interpreter_latency = end_interpreter - start_interpreter - # print("INTERPRETER LATENCY", self.interpreter_latency) + end_interpreter = time.time() + self.interpreter_latency = end_interpreter - start_interpreter + print("INTERPRETER LATENCY", self.interpreter_latency) # self.add_to_output_queue_sync({"role": "server","type": "completion", "content": "DONE"}) async def run(self): @@ -173,13 +177,13 @@ class AsyncInterpreter: while not self._input_queue.empty(): input_queue.append(self._input_queue.get()) - # start_stt = time.time() + start_stt = time.time() message = self.stt.text() - # end_stt = time.time() - # self.stt_latency = end_stt - start_stt - # print("STT LATENCY", self.stt_latency) + end_stt = time.time() + self.stt_latency = end_stt - start_stt + print("STT LATENCY", self.stt_latency) - # print(message) + print(message) # Feed generate to RealtimeTTS self.add_to_output_queue_sync( @@ -190,7 +194,7 @@ class AsyncInterpreter: self.tts.feed(text_iterator) - self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=True) + self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=False) while True: await asyncio.sleep(0.1) @@ -204,14 +208,23 @@ class AsyncInterpreter: "end": True, } ) - # end_tts = time.time() - # self.tts_latency = end_tts - self.tts.stream_start_time - # print("TTS LATENCY", self.tts_latency) + end_tts = time.time() + self.tts_latency = end_tts - self.tts.stream_start_time + print("TTS LATENCY", self.tts_latency) self.tts.stop() break async def _on_tts_chunk_async(self, chunk): - # print("adding chunk to queue") + print("adding chunk to queue") + if ( + self.time_from_first_yield_to_first_put is not None + and self.time_from_first_yield_to_first_put != 0 + ): + print( + "time from first yield to first put is ", + time.time() - self.time_from_first_yield_to_first_put, + ) + self.time_from_first_yield_to_first_put = 0 await self._add_to_queue(self._output_queue, chunk) def on_tts_chunk(self, chunk): @@ -219,5 +232,5 @@ class AsyncInterpreter: asyncio.run(self._on_tts_chunk_async(chunk)) async def output(self): - # print("outputting chunks") + print("outputting chunks") return await self._output_queue.get()