merge temp branch

pull/279/head
Ben Xu 7 months ago
commit 280983597c

@ -93,7 +93,7 @@ class Device:
self.server_url = "" self.server_url = ""
self.ctrl_pressed = False self.ctrl_pressed = False
self.tts_service = "" self.tts_service = ""
# self.playback_latency = None self.playback_latency = None
def fetch_image_from_camera(self, camera_index=CAMERA_DEVICE_INDEX): def fetch_image_from_camera(self, camera_index=CAMERA_DEVICE_INDEX):
"""Captures an image from the specified camera device and saves it to a temporary file. Adds the image to the captured_images list.""" """Captures an image from the specified camera device and saves it to a temporary file. Adds the image to the captured_images list."""
@ -168,10 +168,10 @@ class Device:
while True: while True:
try: try:
audio = await self.audiosegments.get() audio = await self.audiosegments.get()
# if self.playback_latency and isinstance(audio, bytes): if self.playback_latency and isinstance(audio, bytes):
# elapsed_time = time.time() - self.playback_latency elapsed_time = time.time() - self.playback_latency
# print(f"Time from request to playback: {elapsed_time} seconds") print(f"Time from request to playback: {elapsed_time} seconds")
# self.playback_latency = None self.playback_latency = None
if self.tts_service == "elevenlabs": if self.tts_service == "elevenlabs":
mpv_process.stdin.write(audio) # type: ignore mpv_process.stdin.write(audio) # type: ignore

@ -22,9 +22,10 @@ import os
class AsyncInterpreter: class AsyncInterpreter:
def __init__(self, interpreter): def __init__(self, interpreter):
# self.stt_latency = None self.stt_latency = None
# self.tts_latency = None self.tts_latency = None
# self.interpreter_latency = None self.interpreter_latency = None
self.time_from_first_yield_to_first_put = None
self.interpreter = interpreter self.interpreter = interpreter
# STT # STT
@ -125,7 +126,10 @@ class AsyncInterpreter:
# Experimental: The AI voice sounds better with replacements like these, but it should happen at the TTS layer # Experimental: The AI voice sounds better with replacements like these, but it should happen at the TTS layer
# content = content.replace(". ", ". ... ").replace(", ", ", ... ").replace("!", "! ... ").replace("?", "? ... ") # content = content.replace(". ", ". ... ").replace(", ", ", ... ").replace("!", "! ... ").replace("?", "? ... ")
# print("yielding ", content) print("yielding ", content)
if self.time_from_first_yield_to_first_put is None:
self.time_from_first_yield_to_first_put = time.time()
yield content yield content
# Handle code blocks # Handle code blocks
@ -156,9 +160,9 @@ class AsyncInterpreter:
) )
# Send a completion signal # Send a completion signal
# end_interpreter = time.time() end_interpreter = time.time()
# self.interpreter_latency = end_interpreter - start_interpreter self.interpreter_latency = end_interpreter - start_interpreter
# print("INTERPRETER LATENCY", self.interpreter_latency) print("INTERPRETER LATENCY", self.interpreter_latency)
# self.add_to_output_queue_sync({"role": "server","type": "completion", "content": "DONE"}) # self.add_to_output_queue_sync({"role": "server","type": "completion", "content": "DONE"})
async def run(self): async def run(self):
@ -173,11 +177,11 @@ class AsyncInterpreter:
while not self._input_queue.empty(): while not self._input_queue.empty():
input_queue.append(self._input_queue.get()) input_queue.append(self._input_queue.get())
# start_stt = time.time() start_stt = time.time()
message = self.stt.text() message = self.stt.text()
# end_stt = time.time() end_stt = time.time()
# self.stt_latency = end_stt - start_stt self.stt_latency = end_stt - start_stt
# print("STT LATENCY", self.stt_latency) print("STT LATENCY", self.stt_latency)
print(message) print(message)
@ -190,7 +194,7 @@ class AsyncInterpreter:
self.tts.feed(text_iterator) self.tts.feed(text_iterator)
self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=True) self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=False)
while True: while True:
await asyncio.sleep(0.1) await asyncio.sleep(0.1)
@ -204,14 +208,23 @@ class AsyncInterpreter:
"end": True, "end": True,
} }
) )
# end_tts = time.time() end_tts = time.time()
# self.tts_latency = end_tts - self.tts.stream_start_time self.tts_latency = end_tts - self.tts.stream_start_time
# print("TTS LATENCY", self.tts_latency) print("TTS LATENCY", self.tts_latency)
self.tts.stop() self.tts.stop()
break break
async def _on_tts_chunk_async(self, chunk): async def _on_tts_chunk_async(self, chunk):
# print("adding chunk to queue") print("adding chunk to queue")
if (
self.time_from_first_yield_to_first_put is not None
and self.time_from_first_yield_to_first_put != 0
):
print(
"time from first yield to first put is ",
time.time() - self.time_from_first_yield_to_first_put,
)
self.time_from_first_yield_to_first_put = 0
await self._add_to_queue(self._output_queue, chunk) await self._add_to_queue(self._output_queue, chunk)
def on_tts_chunk(self, chunk): def on_tts_chunk(self, chunk):
@ -219,5 +232,5 @@ class AsyncInterpreter:
asyncio.run(self._on_tts_chunk_async(chunk)) asyncio.run(self._on_tts_chunk_async(chunk))
async def output(self): async def output(self):
# print("outputting chunks") print("outputting chunks")
return await self._output_queue.get() return await self._output_queue.get()

Loading…
Cancel
Save