merge temp branch

pull/279/head
Ben Xu 7 months ago
commit 280983597c

@ -93,7 +93,7 @@ class Device:
self.server_url = ""
self.ctrl_pressed = False
self.tts_service = ""
# self.playback_latency = None
self.playback_latency = None
def fetch_image_from_camera(self, camera_index=CAMERA_DEVICE_INDEX):
"""Captures an image from the specified camera device and saves it to a temporary file. Adds the image to the captured_images list."""
@ -168,10 +168,10 @@ class Device:
while True:
try:
audio = await self.audiosegments.get()
# if self.playback_latency and isinstance(audio, bytes):
# elapsed_time = time.time() - self.playback_latency
# print(f"Time from request to playback: {elapsed_time} seconds")
# self.playback_latency = None
if self.playback_latency and isinstance(audio, bytes):
elapsed_time = time.time() - self.playback_latency
print(f"Time from request to playback: {elapsed_time} seconds")
self.playback_latency = None
if self.tts_service == "elevenlabs":
mpv_process.stdin.write(audio) # type: ignore

@ -22,9 +22,10 @@ import os
class AsyncInterpreter:
def __init__(self, interpreter):
# self.stt_latency = None
# self.tts_latency = None
# self.interpreter_latency = None
self.stt_latency = None
self.tts_latency = None
self.interpreter_latency = None
self.time_from_first_yield_to_first_put = None
self.interpreter = interpreter
# STT
@ -125,7 +126,10 @@ class AsyncInterpreter:
# Experimental: The AI voice sounds better with replacements like these, but it should happen at the TTS layer
# content = content.replace(". ", ". ... ").replace(", ", ", ... ").replace("!", "! ... ").replace("?", "? ... ")
# print("yielding ", content)
print("yielding ", content)
if self.time_from_first_yield_to_first_put is None:
self.time_from_first_yield_to_first_put = time.time()
yield content
# Handle code blocks
@ -156,9 +160,9 @@ class AsyncInterpreter:
)
# Send a completion signal
# end_interpreter = time.time()
# self.interpreter_latency = end_interpreter - start_interpreter
# print("INTERPRETER LATENCY", self.interpreter_latency)
end_interpreter = time.time()
self.interpreter_latency = end_interpreter - start_interpreter
print("INTERPRETER LATENCY", self.interpreter_latency)
# self.add_to_output_queue_sync({"role": "server","type": "completion", "content": "DONE"})
async def run(self):
@ -173,11 +177,11 @@ class AsyncInterpreter:
while not self._input_queue.empty():
input_queue.append(self._input_queue.get())
# start_stt = time.time()
start_stt = time.time()
message = self.stt.text()
# end_stt = time.time()
# self.stt_latency = end_stt - start_stt
# print("STT LATENCY", self.stt_latency)
end_stt = time.time()
self.stt_latency = end_stt - start_stt
print("STT LATENCY", self.stt_latency)
print(message)
@ -190,7 +194,7 @@ class AsyncInterpreter:
self.tts.feed(text_iterator)
self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=True)
self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=False)
while True:
await asyncio.sleep(0.1)
@ -204,14 +208,23 @@ class AsyncInterpreter:
"end": True,
}
)
# end_tts = time.time()
# self.tts_latency = end_tts - self.tts.stream_start_time
# print("TTS LATENCY", self.tts_latency)
end_tts = time.time()
self.tts_latency = end_tts - self.tts.stream_start_time
print("TTS LATENCY", self.tts_latency)
self.tts.stop()
break
async def _on_tts_chunk_async(self, chunk):
# print("adding chunk to queue")
print("adding chunk to queue")
if (
self.time_from_first_yield_to_first_put is not None
and self.time_from_first_yield_to_first_put != 0
):
print(
"time from first yield to first put is ",
time.time() - self.time_from_first_yield_to_first_put,
)
self.time_from_first_yield_to_first_put = 0
await self._add_to_queue(self._output_queue, chunk)
def on_tts_chunk(self, chunk):
@ -219,5 +232,5 @@ class AsyncInterpreter:
asyncio.run(self._on_tts_chunk_async(chunk))
async def output(self):
# print("outputting chunks")
print("outputting chunks")
return await self._output_queue.get()

Loading…
Cancel
Save