diff --git a/software/poetry.lock b/software/poetry.lock index e9bdbe8..e7df50e 100644 --- a/software/poetry.lock +++ b/software/poetry.lock @@ -2243,13 +2243,13 @@ socks = ["socksio (==1.*)"] [[package]] name = "huggingface-hub" -version = "0.23.4" +version = "0.23.5" description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" optional = false python-versions = ">=3.8.0" files = [ - {file = "huggingface_hub-0.23.4-py3-none-any.whl", hash = "sha256:3a0b957aa87150addf0cc7bd71b4d954b78e749850e1e7fb29ebbd2db64ca037"}, - {file = "huggingface_hub-0.23.4.tar.gz", hash = "sha256:35d99016433900e44ae7efe1c209164a5a81dbbcd53a52f99c281dcd7ce22431"}, + {file = "huggingface_hub-0.23.5-py3-none-any.whl", hash = "sha256:d7a7d337615e11a45cc14a0ce5a605db6b038dc24af42866f731684825226e90"}, + {file = "huggingface_hub-0.23.5.tar.gz", hash = "sha256:67a9caba79b71235be3752852ca27da86bd54311d2424ca8afdb8dda056edf98"}, ] [package.dependencies] @@ -3988,7 +3988,7 @@ server = ["fastapi (>=0.111.0,<0.112.0)", "janus (>=1.0.0,<2.0.0)", "uvicorn (>= type = "git" url = "https://github.com/OpenInterpreter/open-interpreter.git" reference = "development" -resolved_reference = "3db7e4b2dd93f48e1761ccbd24cd2b5a7985b06f" +resolved_reference = "59409c2ddccb1a8d457099de7e24021afcba9ad8" [[package]] name = "openai" @@ -4032,10 +4032,10 @@ files = [ [package.dependencies] numpy = [ {version = ">=1.21.0", markers = "python_version == \"3.9\" and platform_system == \"Darwin\" and platform_machine == \"arm64\""}, - {version = ">=1.23.5", markers = "python_version >= \"3.11\""}, {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""}, {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""}, {version = ">=1.19.3", markers = "platform_system == \"Linux\" and platform_machine == \"aarch64\" and python_version >= \"3.8\" and python_version < \"3.10\" or python_version > \"3.9\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_system != \"Darwin\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_machine != \"arm64\" and python_version < \"3.10\""}, + {version = ">=1.23.5", markers = "python_version >= \"3.11\""}, ] [[package]] diff --git a/software/source/clients/base_device.py b/software/source/clients/base_device.py index a0597e6..68ea1cc 100644 --- a/software/source/clients/base_device.py +++ b/software/source/clients/base_device.py @@ -3,6 +3,7 @@ import websockets import pyaudio from pynput import keyboard import json +from yaspin import yaspin CHUNK = 1024 FORMAT = pyaudio.paInt16 @@ -18,6 +19,8 @@ class Device: self.recording = False self.input_stream = None self.output_stream = None + self.spinner = yaspin() + self.play_audio = True async def connect_with_retry(self, max_retries=50, retry_delay=2): for attempt in range(max_retries): @@ -26,7 +29,8 @@ class Device: print("Connected to server.") return except ConnectionRefusedError: - print(f"Waiting for the server to be ready. Retrying in {retry_delay} seconds...") + if attempt % 4 == 0: + print(f"Waiting for the server to be ready...") await asyncio.sleep(retry_delay) raise Exception("Failed to connect to the server after multiple attempts") @@ -37,7 +41,7 @@ class Device: try: # Send start flag await self.websocket.send(json.dumps({"role": "user", "type": "audio", "format": "bytes.wav", "start": True})) - print("Sending audio start message") + #print("Sending audio start message") while self.recording: data = self.input_stream.read(CHUNK, exception_on_overflow=False) @@ -45,7 +49,7 @@ class Device: # Send stop flag await self.websocket.send(json.dumps({"role": "user", "type": "audio", "format": "bytes.wav", "end": True})) - print("Sending audio end message") + #print("Sending audio end message") except Exception as e: print(f"Error in send_audio: {e}") await asyncio.sleep(0.01) @@ -56,26 +60,30 @@ class Device: try: data = await self.websocket.recv() if isinstance(data, bytes) and not self.recording: - self.output_stream.write(data) + if self.play_audio: + self.output_stream.write(data) except Exception as e: print(f"Error in receive_audio: {e}") def on_press(self, key): if key == keyboard.Key.space and not self.recording: - print("Space pressed, starting recording") + #print("Space pressed, starting recording") + print("\n") + self.spinner.start() self.recording = True def on_release(self, key): if key == keyboard.Key.space: - print("Space released, stopping recording") + self.spinner.stop() + #print("Space released, stopping recording") self.recording = False - elif key == keyboard.Key.esc: - print("Esc pressed, stopping the program") - return False + # elif key == keyboard.Key.esc: + # print("Esc pressed, stopping the program") + # return False async def main(self): await self.connect_with_retry() - print("Hold spacebar to record. Press 'Esc' to quit.") + print("Hold spacebar to record. Press 'CTRL-C' to quit.") listener = keyboard.Listener(on_press=self.on_press, on_release=self.on_release) listener.start() await asyncio.gather(self.send_audio(), self.receive_audio()) diff --git a/software/source/clients/mac/device.py b/software/source/clients/mac/device.py index 36182fb..006d181 100644 --- a/software/source/clients/mac/device.py +++ b/software/source/clients/mac/device.py @@ -3,9 +3,10 @@ from ..base_device import Device device = Device() -def main(server_url, debug): +def main(server_url, debug, play_audio): device.server_url = server_url device.debug = debug + device.play_audio = play_audio device.start() diff --git a/software/source/server/async_server.py b/software/source/server/async_server.py index dedd1cb..79581d6 100644 --- a/software/source/server/async_server.py +++ b/software/source/server/async_server.py @@ -10,7 +10,7 @@ import wave import asyncio from fastapi.responses import PlainTextResponse -def start_server(server_host, server_port, profile, debug): +def start_server(server_host, server_port, profile, debug, play_audio): # Load the profile module from the provided path spec = importlib.util.spec_from_file_location("profile", profile) @@ -47,6 +47,8 @@ def start_server(server_host, server_port, profile, debug): interpreter.server.host = server_host interpreter.server.port = server_port + interpreter.play_audio = play_audio + interpreter.audio_chunks = [] @@ -100,12 +102,12 @@ def start_server(server_host, server_port, profile, debug): if output["type"] == "message" and len(output.get("content", "")) > 0: self.tts.feed(output.get("content")) if not self.tts.is_playing() and any([c in delimiters for c in output.get("content")]): # Start playing once the first delimiter is encountered. - self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=True, sentence_fragment_delimiters=delimiters) + self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=not self.play_audio, sentence_fragment_delimiters=delimiters) return {"role": "assistant", "type": "audio", "format": "bytes.wav", "start": True} if output == {"role": "assistant", "type": "message", "end": True}: if not self.tts.is_playing(): # We put this here in case it never outputs a delimiter and never triggers play_async^ - self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=True, sentence_fragment_delimiters=delimiters) + self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=not self.play_audio, sentence_fragment_delimiters=delimiters) return {"role": "assistant", "type": "audio", "format": "bytes.wav", "start": True} return {"role": "assistant", "type": "audio", "format": "bytes.wav", "end": True} diff --git a/software/start.py b/software/start.py index 3a55d63..fab153f 100644 --- a/software/start.py +++ b/software/start.py @@ -134,6 +134,11 @@ def _run( signal.signal(signal.SIGINT, handle_exit) if server: + # Have the server play audio if we're running this on the same device. Needless pops and clicks otherwise! + if client: + play_audio = True + else: + play_audio = False server_thread = threading.Thread( target=start_server, args=( @@ -141,6 +146,7 @@ def _run( server_port, profile, debug, + play_audio, ), ) server_thread.start() @@ -172,7 +178,13 @@ def _run( f".clients.{client_type}.device", package="source" ) - client_thread = threading.Thread(target=module.main, args=[server_url, debug]) + # Have the server play audio if we're running this on the same device. Needless pops and clicks otherwise! + if server: + play_audio = False + else: + play_audio = True + + client_thread = threading.Thread(target=module.main, args=[server_url, debug, play_audio]) client_thread.start() try: