From b76b66c733414378d95240dab31b5d113d8ab92d Mon Sep 17 00:00:00 2001 From: Tom Chapin Date: Sat, 3 Feb 2024 15:05:34 -0800 Subject: [PATCH 1/6] Fixing requirements --- OS/01/requirements.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/OS/01/requirements.txt b/OS/01/requirements.txt index 4a7cc7b..adc928a 100644 --- a/OS/01/requirements.txt +++ b/OS/01/requirements.txt @@ -1,4 +1,4 @@ -pip install git+https://github.com/KillianLucas/open-interpreter.git -pip install redis -pip install fastapi -pip install uvicorn +open-interpreter==0.2.0 +redis==5.0.1 +fastapi==0.109.1 +uvicorn==0.27.0.post1 From 161abb14d12673fbe64a2275a9a1a5d308dcd369 Mon Sep 17 00:00:00 2001 From: Tom Chapin Date: Sat, 3 Feb 2024 15:05:52 -0800 Subject: [PATCH 2/6] creating separate start scripts for linux vs osx --- OS/01/{start.sh => start_linux.sh} | 2 +- OS/01/start_osx.sh | 47 ++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) rename OS/01/{start.sh => start_linux.sh} (91%) mode change 100644 => 100755 create mode 100755 OS/01/start_osx.sh diff --git a/OS/01/start.sh b/OS/01/start_linux.sh old mode 100644 new mode 100755 similarity index 91% rename from OS/01/start.sh rename to OS/01/start_linux.sh index 9689c4b..f73c055 --- a/OS/01/start.sh +++ b/OS/01/start_linux.sh @@ -16,7 +16,7 @@ redis-cli -h localhost -p 6379 rpush to_core "" # START KERNEL WATCHER -python core/kernel_watcher.py & +python core/kernel_watch.py & # START SST AND TTS SERVICES diff --git a/OS/01/start_osx.sh b/OS/01/start_osx.sh new file mode 100755 index 0000000..43dba03 --- /dev/null +++ b/OS/01/start_osx.sh @@ -0,0 +1,47 @@ +### SETUP + +# INSTALL REQUIREMENTS + +brew update +brew install redis +pip install -r requirements.txt + +# START REDIS + +redis-server & + +redis-cli -h localhost -p 6379 rpush to_interface "" +redis-cli -h localhost -p 6379 rpush to_core "" + + +### CORE + +# START KERNEL WATCHER + +python core/kernel_watcher.py & + +# START SST AND TTS SERVICES + +# (todo) +# (i think we should start with hosted services) + +# START LLM + +# (disabled, we'll start with hosted services) +# python core/llm/start.py & + +# START CORE + +python core/start_core.py & + + +### INTERFACE + +# START INTERFACE + +python interface/interface.py & + +# START DISPLAY + +# (this should be changed to run it in fullscreen / kiosk mode) +open interface/display.html \ No newline at end of file From da87851e80e0dbb2a506eb1337356c2888265c3a Mon Sep 17 00:00:00 2001 From: killian <63927363+KillianLucas@users.noreply.github.com> Date: Sat, 3 Feb 2024 20:05:01 -0800 Subject: [PATCH 3/6] listen > assistant --- OS/01/{assistant => _archive}/listen.py | 0 OS/01/assistant/assistant.py | 10 +++++----- OS/01/user/record.py | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) rename OS/01/{assistant => _archive}/listen.py (100%) diff --git a/OS/01/assistant/listen.py b/OS/01/_archive/listen.py similarity index 100% rename from OS/01/assistant/listen.py rename to OS/01/_archive/listen.py diff --git a/OS/01/assistant/assistant.py b/OS/01/assistant/assistant.py index ca1167f..8ec1ee3 100644 --- a/OS/01/assistant/assistant.py +++ b/OS/01/assistant/assistant.py @@ -53,7 +53,7 @@ async def websocket_endpoint(websocket: WebSocket): message = to_user.get() await websocket.send_json(message) -audio_chunks = [] +audio_file = bytearray() def queue_listener(): while True: @@ -65,11 +65,11 @@ def queue_listener(): # Hold the audio in a buffer. If it's ready (we got end flag, stt it) if message["type"] == "audio": if "content" in message: - audio_chunks.append(message) + audio_file.extend(message["content"]) if "end" in message: - text = stt(audio_chunks) - audio_chunks = [] - message = {"role": "user", "type": "message", "content": text} + content = stt(audio_file, message["format"]) + audio_file = bytearray() + message = {"role": "user", "type": "message", "content": content} else: continue diff --git a/OS/01/user/record.py b/OS/01/user/record.py index f376e49..5d18c50 100644 --- a/OS/01/user/record.py +++ b/OS/01/user/record.py @@ -52,7 +52,7 @@ async def start_recording(): print("Recording started...") async with websockets.connect("ws://localhost:8000/user") as websocket: # Send the start command with mime type - await websocket.send(json.dumps({"action": "command", "state": "start", "mimeType": "audio/wav"})) + await websocket.send(json.dumps({"role": "user", "type": "audio", "format": "audio/wav", "start": True})) while recording: data = stream.read(chunk) frames.append(data) @@ -65,13 +65,13 @@ async def start_recording(): with open(file_path, 'rb') as audio_file: byte_chunk = audio_file.read(ws_chunk_size) while byte_chunk: - await websocket.send(byte_chunk) + await websocket.send({"role": "user", "type": "audio", "format": "audio/wav", "content": byte_chunk}) byte_chunk = audio_file.read(ws_chunk_size) finally: os.remove(file_path) # Send the end command - await websocket.send(json.dumps({"action": "command", "state": "end"})) + await websocket.send(json.dumps({"role": "user", "type": "audio", "format": "audio/wav", "end": True})) # Receive a json message and then close the connection message = await websocket.recv() From 92e39b243240d18f466455f2be42bdf136672ff0 Mon Sep 17 00:00:00 2001 From: killian <63927363+KillianLucas@users.noreply.github.com> Date: Sat, 3 Feb 2024 20:06:27 -0800 Subject: [PATCH 4/6] start scripts --- OS/01/start_linux.sh | 2 +- OS/01/start_osx.sh | 24 +++++++++++------------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/OS/01/start_linux.sh b/OS/01/start_linux.sh index 41c5ce3..0b60c83 100755 --- a/OS/01/start_linux.sh +++ b/OS/01/start_linux.sh @@ -14,7 +14,7 @@ python computer/kernel_watcher.py & # START RUN ENDPOINT -python computer/run.py +python computer/run.py & # START SST AND TTS SERVICES diff --git a/OS/01/start_osx.sh b/OS/01/start_osx.sh index 40132fa..52fb432 100755 --- a/OS/01/start_osx.sh +++ b/OS/01/start_osx.sh @@ -6,11 +6,15 @@ brew update brew install portaudio ffmpeg pip install -r requirements.txt -### CORE +### COMPUTER # START KERNEL WATCHER -python core/kernel_watcher.py & +python computer/kernel_watcher.py & + +# START RUN ENDPOINT + +python computer/run.py & # START SST AND TTS SERVICES @@ -22,18 +26,12 @@ python core/kernel_watcher.py & # (disabled, we'll start with hosted services) # python core/llm/start.py & -# START CORE - -python core/start_core.py & - - -### INTERFACE +# START ASSISTANT -# START INTERFACE +python assistant/assistant.py & -python interface/interface.py & +### USER -# START DISPLAY +# START USER -# (this should be changed to run it in fullscreen / kiosk mode) -open interface/display.html \ No newline at end of file +python user/user.py & \ No newline at end of file From a9196c6f2a891eacb032e2cf86be779d2375b7db Mon Sep 17 00:00:00 2001 From: killian <63927363+KillianLucas@users.noreply.github.com> Date: Sat, 3 Feb 2024 20:09:01 -0800 Subject: [PATCH 5/6] start script --- OS/01/requirements.txt | 1 - OS/01/{start_linux.sh => start.sh} | 6 +++-- OS/01/start_osx.sh | 37 ------------------------------ 3 files changed, 4 insertions(+), 40 deletions(-) rename OS/01/{start_linux.sh => start.sh} (79%) delete mode 100755 OS/01/start_osx.sh diff --git a/OS/01/requirements.txt b/OS/01/requirements.txt index bfb61e6..a341655 100644 --- a/OS/01/requirements.txt +++ b/OS/01/requirements.txt @@ -2,7 +2,6 @@ git+https://github.com/KillianLucas/open-interpreter.git asyncio pyaudio pynput -redis fastapi uvicorn websockets diff --git a/OS/01/start_linux.sh b/OS/01/start.sh similarity index 79% rename from OS/01/start_linux.sh rename to OS/01/start.sh index 0b60c83..f0d693d 100755 --- a/OS/01/start_linux.sh +++ b/OS/01/start.sh @@ -2,8 +2,10 @@ # INSTALL REQUIREMENTS -sudo apt-get update -sudo apt-get install redis-server +if [[ "$OSTYPE" == "darwin"* ]]; then + brew update + brew install portaudio ffmpeg +fi pip install -r requirements.txt ### COMPUTER diff --git a/OS/01/start_osx.sh b/OS/01/start_osx.sh deleted file mode 100755 index 52fb432..0000000 --- a/OS/01/start_osx.sh +++ /dev/null @@ -1,37 +0,0 @@ -### SETUP - -# INSTALL REQUIREMENTS - -brew update -brew install portaudio ffmpeg -pip install -r requirements.txt - -### COMPUTER - -# START KERNEL WATCHER - -python computer/kernel_watcher.py & - -# START RUN ENDPOINT - -python computer/run.py & - -# START SST AND TTS SERVICES - -# (todo) -# (i think we should start with hosted services) - -# START LLM - -# (disabled, we'll start with hosted services) -# python core/llm/start.py & - -# START ASSISTANT - -python assistant/assistant.py & - -### USER - -# START USER - -python user/user.py & \ No newline at end of file From 062d288d33a37f4c5ae43e5e8b36b22df66e83c6 Mon Sep 17 00:00:00 2001 From: birbbit Date: Sat, 3 Feb 2024 20:35:28 -0800 Subject: [PATCH 6/6] runs locally --- OS/01/assistant/assistant.py | 11 ++++++----- OS/01/assistant/create_interpreter.py | 10 +++++----- OS/01/assistant/tts.py | 2 +- OS/01/user/record.py | 2 +- 4 files changed, 13 insertions(+), 12 deletions(-) diff --git a/OS/01/assistant/assistant.py b/OS/01/assistant/assistant.py index 8ec1ee3..681776b 100644 --- a/OS/01/assistant/assistant.py +++ b/OS/01/assistant/assistant.py @@ -6,6 +6,7 @@ Exposes a ws endpoint called /user. Things from there go into the queue. We also In a while loop we watch the queue and handle it. """ +import ast import json import time import queue @@ -31,9 +32,9 @@ to_user = queue.Queue() to_assistant = queue.Queue() # This is so we only say() full sentences -accumulated_text = "" def is_full_sentence(text): return text.endswith(('.', '!', '?')) + def split_into_sentences(text): return re.split(r'(?<=[.!?])\s+', text) @@ -49,13 +50,13 @@ async def websocket_endpoint(websocket: WebSocket): while True: data = await websocket.receive_json() to_assistant.put(data) - if not to_user.empty(): + while not to_user.empty(): message = to_user.get() await websocket.send_json(message) -audio_file = bytearray() def queue_listener(): + audio_file = bytearray() while True: # Check 10x a second for new messages while to_assistant.empty(): @@ -65,7 +66,7 @@ def queue_listener(): # Hold the audio in a buffer. If it's ready (we got end flag, stt it) if message["type"] == "audio": if "content" in message: - audio_file.extend(message["content"]) + audio_file.extend(bytes(ast.literal_eval(message["content"]))) if "end" in message: content = stt(audio_file, message["format"]) audio_file = bytearray() @@ -122,4 +123,4 @@ queue_thread.start() # Run the FastAPI app if __name__ == "__main__": - uvicorn.run(app, host="0.0.0.0", port=8000) \ No newline at end of file + uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/OS/01/assistant/create_interpreter.py b/OS/01/assistant/create_interpreter.py index df4197a..413d125 100644 --- a/OS/01/assistant/create_interpreter.py +++ b/OS/01/assistant/create_interpreter.py @@ -55,7 +55,7 @@ Remember: You can run Python code. Be very concise. Ensure that you actually run interpreter.llm.api_key = os.getenv('OPENAI_API_KEY') interpreter.llm.model = "gpt-4" interpreter.auto_run = True - interpreter.force_task_completion = True + interpreter.force_task_completion = False ### MISC SETTINGS @@ -94,12 +94,12 @@ Remember: You can run Python code. Be very concise. Ensure that you actually run data = {"language": "python", "code": code} # Send the data to the /run endpoint - response = requests.post("http://localhost:8000/run", json=data, stream=True) + response = requests.post("http://localhost:9000/run", json=data, stream=True) # Stream the response - for line in response.iter_lines(): - if line: # filter out keep-alive new lines - yield json.loads(line) + for chunk in response.iter_content(chunk_size=100000000): + if chunk: # filter out keep-alive new lines + yield json.loads(chunk.decode()) def stop(self): """Stops the code.""" diff --git a/OS/01/assistant/tts.py b/OS/01/assistant/tts.py index 1a1f55c..a71d530 100644 --- a/OS/01/assistant/tts.py +++ b/OS/01/assistant/tts.py @@ -3,4 +3,4 @@ Defines a function which takes text and returns a path to an audio file. """ def tts(text): - return path_to_audio \ No newline at end of file + return [] diff --git a/OS/01/user/record.py b/OS/01/user/record.py index 5d18c50..d9b4120 100644 --- a/OS/01/user/record.py +++ b/OS/01/user/record.py @@ -65,7 +65,7 @@ async def start_recording(): with open(file_path, 'rb') as audio_file: byte_chunk = audio_file.read(ws_chunk_size) while byte_chunk: - await websocket.send({"role": "user", "type": "audio", "format": "audio/wav", "content": byte_chunk}) + await websocket.send(json.dumps({"role": "user", "type": "audio", "format": "audio/wav", "content": str(byte_chunk)})) byte_chunk = audio_file.read(ws_chunk_size) finally: os.remove(file_path)