Removed print statements, better audio playback

pull/266/merge
killian 6 months ago
parent 4640b4f1a0
commit d2496fa8a2

10
software/poetry.lock generated

@ -2243,13 +2243,13 @@ socks = ["socksio (==1.*)"]
[[package]] [[package]]
name = "huggingface-hub" name = "huggingface-hub"
version = "0.23.4" version = "0.23.5"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false optional = false
python-versions = ">=3.8.0" python-versions = ">=3.8.0"
files = [ files = [
{file = "huggingface_hub-0.23.4-py3-none-any.whl", hash = "sha256:3a0b957aa87150addf0cc7bd71b4d954b78e749850e1e7fb29ebbd2db64ca037"}, {file = "huggingface_hub-0.23.5-py3-none-any.whl", hash = "sha256:d7a7d337615e11a45cc14a0ce5a605db6b038dc24af42866f731684825226e90"},
{file = "huggingface_hub-0.23.4.tar.gz", hash = "sha256:35d99016433900e44ae7efe1c209164a5a81dbbcd53a52f99c281dcd7ce22431"}, {file = "huggingface_hub-0.23.5.tar.gz", hash = "sha256:67a9caba79b71235be3752852ca27da86bd54311d2424ca8afdb8dda056edf98"},
] ]
[package.dependencies] [package.dependencies]
@ -3988,7 +3988,7 @@ server = ["fastapi (>=0.111.0,<0.112.0)", "janus (>=1.0.0,<2.0.0)", "uvicorn (>=
type = "git" type = "git"
url = "https://github.com/OpenInterpreter/open-interpreter.git" url = "https://github.com/OpenInterpreter/open-interpreter.git"
reference = "development" reference = "development"
resolved_reference = "3db7e4b2dd93f48e1761ccbd24cd2b5a7985b06f" resolved_reference = "59409c2ddccb1a8d457099de7e24021afcba9ad8"
[[package]] [[package]]
name = "openai" name = "openai"
@ -4032,10 +4032,10 @@ files = [
[package.dependencies] [package.dependencies]
numpy = [ numpy = [
{version = ">=1.21.0", markers = "python_version == \"3.9\" and platform_system == \"Darwin\" and platform_machine == \"arm64\""}, {version = ">=1.21.0", markers = "python_version == \"3.9\" and platform_system == \"Darwin\" and platform_machine == \"arm64\""},
{version = ">=1.23.5", markers = "python_version >= \"3.11\""},
{version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""}, {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""},
{version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""}, {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""},
{version = ">=1.19.3", markers = "platform_system == \"Linux\" and platform_machine == \"aarch64\" and python_version >= \"3.8\" and python_version < \"3.10\" or python_version > \"3.9\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_system != \"Darwin\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_machine != \"arm64\" and python_version < \"3.10\""}, {version = ">=1.19.3", markers = "platform_system == \"Linux\" and platform_machine == \"aarch64\" and python_version >= \"3.8\" and python_version < \"3.10\" or python_version > \"3.9\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_system != \"Darwin\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_machine != \"arm64\" and python_version < \"3.10\""},
{version = ">=1.23.5", markers = "python_version >= \"3.11\""},
] ]
[[package]] [[package]]

@ -3,6 +3,7 @@ import websockets
import pyaudio import pyaudio
from pynput import keyboard from pynput import keyboard
import json import json
from yaspin import yaspin
CHUNK = 1024 CHUNK = 1024
FORMAT = pyaudio.paInt16 FORMAT = pyaudio.paInt16
@ -18,6 +19,8 @@ class Device:
self.recording = False self.recording = False
self.input_stream = None self.input_stream = None
self.output_stream = None self.output_stream = None
self.spinner = yaspin()
self.play_audio = True
async def connect_with_retry(self, max_retries=50, retry_delay=2): async def connect_with_retry(self, max_retries=50, retry_delay=2):
for attempt in range(max_retries): for attempt in range(max_retries):
@ -26,7 +29,8 @@ class Device:
print("Connected to server.") print("Connected to server.")
return return
except ConnectionRefusedError: except ConnectionRefusedError:
print(f"Waiting for the server to be ready. Retrying in {retry_delay} seconds...") if attempt % 4 == 0:
print(f"Waiting for the server to be ready...")
await asyncio.sleep(retry_delay) await asyncio.sleep(retry_delay)
raise Exception("Failed to connect to the server after multiple attempts") raise Exception("Failed to connect to the server after multiple attempts")
@ -37,7 +41,7 @@ class Device:
try: try:
# Send start flag # Send start flag
await self.websocket.send(json.dumps({"role": "user", "type": "audio", "format": "bytes.wav", "start": True})) await self.websocket.send(json.dumps({"role": "user", "type": "audio", "format": "bytes.wav", "start": True}))
print("Sending audio start message") #print("Sending audio start message")
while self.recording: while self.recording:
data = self.input_stream.read(CHUNK, exception_on_overflow=False) data = self.input_stream.read(CHUNK, exception_on_overflow=False)
@ -45,7 +49,7 @@ class Device:
# Send stop flag # Send stop flag
await self.websocket.send(json.dumps({"role": "user", "type": "audio", "format": "bytes.wav", "end": True})) await self.websocket.send(json.dumps({"role": "user", "type": "audio", "format": "bytes.wav", "end": True}))
print("Sending audio end message") #print("Sending audio end message")
except Exception as e: except Exception as e:
print(f"Error in send_audio: {e}") print(f"Error in send_audio: {e}")
await asyncio.sleep(0.01) await asyncio.sleep(0.01)
@ -56,26 +60,30 @@ class Device:
try: try:
data = await self.websocket.recv() data = await self.websocket.recv()
if isinstance(data, bytes) and not self.recording: if isinstance(data, bytes) and not self.recording:
if self.play_audio:
self.output_stream.write(data) self.output_stream.write(data)
except Exception as e: except Exception as e:
print(f"Error in receive_audio: {e}") print(f"Error in receive_audio: {e}")
def on_press(self, key): def on_press(self, key):
if key == keyboard.Key.space and not self.recording: if key == keyboard.Key.space and not self.recording:
print("Space pressed, starting recording") #print("Space pressed, starting recording")
print("\n")
self.spinner.start()
self.recording = True self.recording = True
def on_release(self, key): def on_release(self, key):
if key == keyboard.Key.space: if key == keyboard.Key.space:
print("Space released, stopping recording") self.spinner.stop()
#print("Space released, stopping recording")
self.recording = False self.recording = False
elif key == keyboard.Key.esc: # elif key == keyboard.Key.esc:
print("Esc pressed, stopping the program") # print("Esc pressed, stopping the program")
return False # return False
async def main(self): async def main(self):
await self.connect_with_retry() await self.connect_with_retry()
print("Hold spacebar to record. Press 'Esc' to quit.") print("Hold spacebar to record. Press 'CTRL-C' to quit.")
listener = keyboard.Listener(on_press=self.on_press, on_release=self.on_release) listener = keyboard.Listener(on_press=self.on_press, on_release=self.on_release)
listener.start() listener.start()
await asyncio.gather(self.send_audio(), self.receive_audio()) await asyncio.gather(self.send_audio(), self.receive_audio())

@ -3,9 +3,10 @@ from ..base_device import Device
device = Device() device = Device()
def main(server_url, debug): def main(server_url, debug, play_audio):
device.server_url = server_url device.server_url = server_url
device.debug = debug device.debug = debug
device.play_audio = play_audio
device.start() device.start()

@ -10,7 +10,7 @@ import wave
import asyncio import asyncio
from fastapi.responses import PlainTextResponse from fastapi.responses import PlainTextResponse
def start_server(server_host, server_port, profile, debug): def start_server(server_host, server_port, profile, debug, play_audio):
# Load the profile module from the provided path # Load the profile module from the provided path
spec = importlib.util.spec_from_file_location("profile", profile) spec = importlib.util.spec_from_file_location("profile", profile)
@ -47,6 +47,8 @@ def start_server(server_host, server_port, profile, debug):
interpreter.server.host = server_host interpreter.server.host = server_host
interpreter.server.port = server_port interpreter.server.port = server_port
interpreter.play_audio = play_audio
interpreter.audio_chunks = [] interpreter.audio_chunks = []
@ -100,12 +102,12 @@ def start_server(server_host, server_port, profile, debug):
if output["type"] == "message" and len(output.get("content", "")) > 0: if output["type"] == "message" and len(output.get("content", "")) > 0:
self.tts.feed(output.get("content")) self.tts.feed(output.get("content"))
if not self.tts.is_playing() and any([c in delimiters for c in output.get("content")]): # Start playing once the first delimiter is encountered. if not self.tts.is_playing() and any([c in delimiters for c in output.get("content")]): # Start playing once the first delimiter is encountered.
self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=True, sentence_fragment_delimiters=delimiters) self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=not self.play_audio, sentence_fragment_delimiters=delimiters)
return {"role": "assistant", "type": "audio", "format": "bytes.wav", "start": True} return {"role": "assistant", "type": "audio", "format": "bytes.wav", "start": True}
if output == {"role": "assistant", "type": "message", "end": True}: if output == {"role": "assistant", "type": "message", "end": True}:
if not self.tts.is_playing(): # We put this here in case it never outputs a delimiter and never triggers play_async^ if not self.tts.is_playing(): # We put this here in case it never outputs a delimiter and never triggers play_async^
self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=True, sentence_fragment_delimiters=delimiters) self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=not self.play_audio, sentence_fragment_delimiters=delimiters)
return {"role": "assistant", "type": "audio", "format": "bytes.wav", "start": True} return {"role": "assistant", "type": "audio", "format": "bytes.wav", "start": True}
return {"role": "assistant", "type": "audio", "format": "bytes.wav", "end": True} return {"role": "assistant", "type": "audio", "format": "bytes.wav", "end": True}

@ -134,6 +134,11 @@ def _run(
signal.signal(signal.SIGINT, handle_exit) signal.signal(signal.SIGINT, handle_exit)
if server: if server:
# Have the server play audio if we're running this on the same device. Needless pops and clicks otherwise!
if client:
play_audio = True
else:
play_audio = False
server_thread = threading.Thread( server_thread = threading.Thread(
target=start_server, target=start_server,
args=( args=(
@ -141,6 +146,7 @@ def _run(
server_port, server_port,
profile, profile,
debug, debug,
play_audio,
), ),
) )
server_thread.start() server_thread.start()
@ -172,7 +178,13 @@ def _run(
f".clients.{client_type}.device", package="source" f".clients.{client_type}.device", package="source"
) )
client_thread = threading.Thread(target=module.main, args=[server_url, debug]) # Have the server play audio if we're running this on the same device. Needless pops and clicks otherwise!
if server:
play_audio = False
else:
play_audio = True
client_thread = threading.Thread(target=module.main, args=[server_url, debug, play_audio])
client_thread.start() client_thread.start()
try: try:

Loading…
Cancel
Save