Removed print statements, better audio playback

pull/266/merge
killian 6 months ago
parent 4640b4f1a0
commit d2496fa8a2

10
software/poetry.lock generated

@ -2243,13 +2243,13 @@ socks = ["socksio (==1.*)"]
[[package]]
name = "huggingface-hub"
version = "0.23.4"
version = "0.23.5"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false
python-versions = ">=3.8.0"
files = [
{file = "huggingface_hub-0.23.4-py3-none-any.whl", hash = "sha256:3a0b957aa87150addf0cc7bd71b4d954b78e749850e1e7fb29ebbd2db64ca037"},
{file = "huggingface_hub-0.23.4.tar.gz", hash = "sha256:35d99016433900e44ae7efe1c209164a5a81dbbcd53a52f99c281dcd7ce22431"},
{file = "huggingface_hub-0.23.5-py3-none-any.whl", hash = "sha256:d7a7d337615e11a45cc14a0ce5a605db6b038dc24af42866f731684825226e90"},
{file = "huggingface_hub-0.23.5.tar.gz", hash = "sha256:67a9caba79b71235be3752852ca27da86bd54311d2424ca8afdb8dda056edf98"},
]
[package.dependencies]
@ -3988,7 +3988,7 @@ server = ["fastapi (>=0.111.0,<0.112.0)", "janus (>=1.0.0,<2.0.0)", "uvicorn (>=
type = "git"
url = "https://github.com/OpenInterpreter/open-interpreter.git"
reference = "development"
resolved_reference = "3db7e4b2dd93f48e1761ccbd24cd2b5a7985b06f"
resolved_reference = "59409c2ddccb1a8d457099de7e24021afcba9ad8"
[[package]]
name = "openai"
@ -4032,10 +4032,10 @@ files = [
[package.dependencies]
numpy = [
{version = ">=1.21.0", markers = "python_version == \"3.9\" and platform_system == \"Darwin\" and platform_machine == \"arm64\""},
{version = ">=1.23.5", markers = "python_version >= \"3.11\""},
{version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""},
{version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""},
{version = ">=1.19.3", markers = "platform_system == \"Linux\" and platform_machine == \"aarch64\" and python_version >= \"3.8\" and python_version < \"3.10\" or python_version > \"3.9\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_system != \"Darwin\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_machine != \"arm64\" and python_version < \"3.10\""},
{version = ">=1.23.5", markers = "python_version >= \"3.11\""},
]
[[package]]

@ -3,6 +3,7 @@ import websockets
import pyaudio
from pynput import keyboard
import json
from yaspin import yaspin
CHUNK = 1024
FORMAT = pyaudio.paInt16
@ -18,6 +19,8 @@ class Device:
self.recording = False
self.input_stream = None
self.output_stream = None
self.spinner = yaspin()
self.play_audio = True
async def connect_with_retry(self, max_retries=50, retry_delay=2):
for attempt in range(max_retries):
@ -26,7 +29,8 @@ class Device:
print("Connected to server.")
return
except ConnectionRefusedError:
print(f"Waiting for the server to be ready. Retrying in {retry_delay} seconds...")
if attempt % 4 == 0:
print(f"Waiting for the server to be ready...")
await asyncio.sleep(retry_delay)
raise Exception("Failed to connect to the server after multiple attempts")
@ -37,7 +41,7 @@ class Device:
try:
# Send start flag
await self.websocket.send(json.dumps({"role": "user", "type": "audio", "format": "bytes.wav", "start": True}))
print("Sending audio start message")
#print("Sending audio start message")
while self.recording:
data = self.input_stream.read(CHUNK, exception_on_overflow=False)
@ -45,7 +49,7 @@ class Device:
# Send stop flag
await self.websocket.send(json.dumps({"role": "user", "type": "audio", "format": "bytes.wav", "end": True}))
print("Sending audio end message")
#print("Sending audio end message")
except Exception as e:
print(f"Error in send_audio: {e}")
await asyncio.sleep(0.01)
@ -56,26 +60,30 @@ class Device:
try:
data = await self.websocket.recv()
if isinstance(data, bytes) and not self.recording:
self.output_stream.write(data)
if self.play_audio:
self.output_stream.write(data)
except Exception as e:
print(f"Error in receive_audio: {e}")
def on_press(self, key):
if key == keyboard.Key.space and not self.recording:
print("Space pressed, starting recording")
#print("Space pressed, starting recording")
print("\n")
self.spinner.start()
self.recording = True
def on_release(self, key):
if key == keyboard.Key.space:
print("Space released, stopping recording")
self.spinner.stop()
#print("Space released, stopping recording")
self.recording = False
elif key == keyboard.Key.esc:
print("Esc pressed, stopping the program")
return False
# elif key == keyboard.Key.esc:
# print("Esc pressed, stopping the program")
# return False
async def main(self):
await self.connect_with_retry()
print("Hold spacebar to record. Press 'Esc' to quit.")
print("Hold spacebar to record. Press 'CTRL-C' to quit.")
listener = keyboard.Listener(on_press=self.on_press, on_release=self.on_release)
listener.start()
await asyncio.gather(self.send_audio(), self.receive_audio())

@ -3,9 +3,10 @@ from ..base_device import Device
device = Device()
def main(server_url, debug):
def main(server_url, debug, play_audio):
device.server_url = server_url
device.debug = debug
device.play_audio = play_audio
device.start()

@ -10,7 +10,7 @@ import wave
import asyncio
from fastapi.responses import PlainTextResponse
def start_server(server_host, server_port, profile, debug):
def start_server(server_host, server_port, profile, debug, play_audio):
# Load the profile module from the provided path
spec = importlib.util.spec_from_file_location("profile", profile)
@ -47,6 +47,8 @@ def start_server(server_host, server_port, profile, debug):
interpreter.server.host = server_host
interpreter.server.port = server_port
interpreter.play_audio = play_audio
interpreter.audio_chunks = []
@ -100,12 +102,12 @@ def start_server(server_host, server_port, profile, debug):
if output["type"] == "message" and len(output.get("content", "")) > 0:
self.tts.feed(output.get("content"))
if not self.tts.is_playing() and any([c in delimiters for c in output.get("content")]): # Start playing once the first delimiter is encountered.
self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=True, sentence_fragment_delimiters=delimiters)
self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=not self.play_audio, sentence_fragment_delimiters=delimiters)
return {"role": "assistant", "type": "audio", "format": "bytes.wav", "start": True}
if output == {"role": "assistant", "type": "message", "end": True}:
if not self.tts.is_playing(): # We put this here in case it never outputs a delimiter and never triggers play_async^
self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=True, sentence_fragment_delimiters=delimiters)
self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=not self.play_audio, sentence_fragment_delimiters=delimiters)
return {"role": "assistant", "type": "audio", "format": "bytes.wav", "start": True}
return {"role": "assistant", "type": "audio", "format": "bytes.wav", "end": True}

@ -134,6 +134,11 @@ def _run(
signal.signal(signal.SIGINT, handle_exit)
if server:
# Have the server play audio if we're running this on the same device. Needless pops and clicks otherwise!
if client:
play_audio = True
else:
play_audio = False
server_thread = threading.Thread(
target=start_server,
args=(
@ -141,6 +146,7 @@ def _run(
server_port,
profile,
debug,
play_audio,
),
)
server_thread.start()
@ -172,7 +178,13 @@ def _run(
f".clients.{client_type}.device", package="source"
)
client_thread = threading.Thread(target=module.main, args=[server_url, debug])
# Have the server play audio if we're running this on the same device. Needless pops and clicks otherwise!
if server:
play_audio = False
else:
play_audio = True
client_thread = threading.Thread(target=module.main, args=[server_url, debug, play_audio])
client_thread.start()
try:

Loading…
Cancel
Save