diff --git a/software/source/clients/base_device.py b/software/source/clients/base_device.py index 18eef14..8153718 100644 --- a/software/source/clients/base_device.py +++ b/software/source/clients/base_device.py @@ -93,7 +93,7 @@ class Device: self.server_url = "" self.ctrl_pressed = False self.tts_service = "" - # self.playback_latency = None + self.playback_latency = None def fetch_image_from_camera(self, camera_index=CAMERA_DEVICE_INDEX): """Captures an image from the specified camera device and saves it to a temporary file. Adds the image to the captured_images list.""" @@ -168,10 +168,10 @@ class Device: while True: try: audio = await self.audiosegments.get() - # if self.playback_latency and isinstance(audio, bytes): - # elapsed_time = time.time() - self.playback_latency - # print(f"Time from request to playback: {elapsed_time} seconds") - # self.playback_latency = None + if self.playback_latency and isinstance(audio, bytes): + elapsed_time = time.time() - self.playback_latency + print(f"Time from request to playback: {elapsed_time} seconds") + self.playback_latency = None if self.tts_service == "elevenlabs": mpv_process.stdin.write(audio) # type: ignore diff --git a/software/source/server/async_interpreter.py b/software/source/server/async_interpreter.py index 1c58b8c..a57a35e 100644 --- a/software/source/server/async_interpreter.py +++ b/software/source/server/async_interpreter.py @@ -22,9 +22,10 @@ import os class AsyncInterpreter: def __init__(self, interpreter): - # self.stt_latency = None - # self.tts_latency = None - # self.interpreter_latency = None + self.stt_latency = None + self.tts_latency = None + self.interpreter_latency = None + self.time_from_first_yield_to_first_put = None self.interpreter = interpreter # STT @@ -125,7 +126,10 @@ class AsyncInterpreter: # Experimental: The AI voice sounds better with replacements like these, but it should happen at the TTS layer # content = content.replace(". ", ". ... ").replace(", ", ", ... ").replace("!", "! ... ").replace("?", "? ... ") - # print("yielding ", content) + print("yielding ", content) + if self.time_from_first_yield_to_first_put is None: + self.time_from_first_yield_to_first_put = time.time() + yield content # Handle code blocks @@ -156,9 +160,9 @@ class AsyncInterpreter: ) # Send a completion signal - # end_interpreter = time.time() - # self.interpreter_latency = end_interpreter - start_interpreter - # print("INTERPRETER LATENCY", self.interpreter_latency) + end_interpreter = time.time() + self.interpreter_latency = end_interpreter - start_interpreter + print("INTERPRETER LATENCY", self.interpreter_latency) # self.add_to_output_queue_sync({"role": "server","type": "completion", "content": "DONE"}) async def run(self): @@ -173,11 +177,11 @@ class AsyncInterpreter: while not self._input_queue.empty(): input_queue.append(self._input_queue.get()) - # start_stt = time.time() + start_stt = time.time() message = self.stt.text() - # end_stt = time.time() - # self.stt_latency = end_stt - start_stt - # print("STT LATENCY", self.stt_latency) + end_stt = time.time() + self.stt_latency = end_stt - start_stt + print("STT LATENCY", self.stt_latency) print(message) @@ -190,7 +194,7 @@ class AsyncInterpreter: self.tts.feed(text_iterator) - self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=True) + self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=False) while True: await asyncio.sleep(0.1) @@ -204,14 +208,23 @@ class AsyncInterpreter: "end": True, } ) - # end_tts = time.time() - # self.tts_latency = end_tts - self.tts.stream_start_time - # print("TTS LATENCY", self.tts_latency) + end_tts = time.time() + self.tts_latency = end_tts - self.tts.stream_start_time + print("TTS LATENCY", self.tts_latency) self.tts.stop() break async def _on_tts_chunk_async(self, chunk): - # print("adding chunk to queue") + print("adding chunk to queue") + if ( + self.time_from_first_yield_to_first_put is not None + and self.time_from_first_yield_to_first_put != 0 + ): + print( + "time from first yield to first put is ", + time.time() - self.time_from_first_yield_to_first_put, + ) + self.time_from_first_yield_to_first_put = 0 await self._add_to_queue(self._output_queue, chunk) def on_tts_chunk(self, chunk): @@ -219,5 +232,5 @@ class AsyncInterpreter: asyncio.run(self._on_tts_chunk_async(chunk)) async def output(self): - # print("outputting chunks") + print("outputting chunks") return await self._output_queue.get()