diff --git a/OS/01/assistant/listen.py b/OS/01/_archive/listen.py similarity index 100% rename from OS/01/assistant/listen.py rename to OS/01/_archive/listen.py diff --git a/OS/01/assistant/assistant.py b/OS/01/assistant/assistant.py index ca1167f..681776b 100644 --- a/OS/01/assistant/assistant.py +++ b/OS/01/assistant/assistant.py @@ -6,6 +6,7 @@ Exposes a ws endpoint called /user. Things from there go into the queue. We also In a while loop we watch the queue and handle it. """ +import ast import json import time import queue @@ -31,9 +32,9 @@ to_user = queue.Queue() to_assistant = queue.Queue() # This is so we only say() full sentences -accumulated_text = "" def is_full_sentence(text): return text.endswith(('.', '!', '?')) + def split_into_sentences(text): return re.split(r'(?<=[.!?])\s+', text) @@ -49,13 +50,13 @@ async def websocket_endpoint(websocket: WebSocket): while True: data = await websocket.receive_json() to_assistant.put(data) - if not to_user.empty(): + while not to_user.empty(): message = to_user.get() await websocket.send_json(message) -audio_chunks = [] def queue_listener(): + audio_file = bytearray() while True: # Check 10x a second for new messages while to_assistant.empty(): @@ -65,11 +66,11 @@ def queue_listener(): # Hold the audio in a buffer. If it's ready (we got end flag, stt it) if message["type"] == "audio": if "content" in message: - audio_chunks.append(message) + audio_file.extend(bytes(ast.literal_eval(message["content"]))) if "end" in message: - text = stt(audio_chunks) - audio_chunks = [] - message = {"role": "user", "type": "message", "content": text} + content = stt(audio_file, message["format"]) + audio_file = bytearray() + message = {"role": "user", "type": "message", "content": content} else: continue @@ -122,4 +123,4 @@ queue_thread.start() # Run the FastAPI app if __name__ == "__main__": - uvicorn.run(app, host="0.0.0.0", port=8000) \ No newline at end of file + uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/OS/01/assistant/create_interpreter.py b/OS/01/assistant/create_interpreter.py index df4197a..413d125 100644 --- a/OS/01/assistant/create_interpreter.py +++ b/OS/01/assistant/create_interpreter.py @@ -55,7 +55,7 @@ Remember: You can run Python code. Be very concise. Ensure that you actually run interpreter.llm.api_key = os.getenv('OPENAI_API_KEY') interpreter.llm.model = "gpt-4" interpreter.auto_run = True - interpreter.force_task_completion = True + interpreter.force_task_completion = False ### MISC SETTINGS @@ -94,12 +94,12 @@ Remember: You can run Python code. Be very concise. Ensure that you actually run data = {"language": "python", "code": code} # Send the data to the /run endpoint - response = requests.post("http://localhost:8000/run", json=data, stream=True) + response = requests.post("http://localhost:9000/run", json=data, stream=True) # Stream the response - for line in response.iter_lines(): - if line: # filter out keep-alive new lines - yield json.loads(line) + for chunk in response.iter_content(chunk_size=100000000): + if chunk: # filter out keep-alive new lines + yield json.loads(chunk.decode()) def stop(self): """Stops the code.""" diff --git a/OS/01/assistant/tts.py b/OS/01/assistant/tts.py index 4117d06..8d58619 100644 --- a/OS/01/assistant/tts.py +++ b/OS/01/assistant/tts.py @@ -15,4 +15,4 @@ def tts(text, file_path): ) response.stream_to_file(file_path) - \ No newline at end of file + diff --git a/OS/01/requirements.txt b/OS/01/requirements.txt index 4500632..a341655 100644 --- a/OS/01/requirements.txt +++ b/OS/01/requirements.txt @@ -2,9 +2,8 @@ git+https://github.com/KillianLucas/open-interpreter.git asyncio pyaudio pynput -redis fastapi uvicorn websockets python-dotenv -ffmpeg-python \ No newline at end of file +ffmpeg-python diff --git a/OS/01/start.sh b/OS/01/start.sh index 41c5ce3..f0d693d 100755 --- a/OS/01/start.sh +++ b/OS/01/start.sh @@ -2,8 +2,10 @@ # INSTALL REQUIREMENTS -sudo apt-get update -sudo apt-get install redis-server +if [[ "$OSTYPE" == "darwin"* ]]; then + brew update + brew install portaudio ffmpeg +fi pip install -r requirements.txt ### COMPUTER @@ -14,7 +16,7 @@ python computer/kernel_watcher.py & # START RUN ENDPOINT -python computer/run.py +python computer/run.py & # START SST AND TTS SERVICES diff --git a/OS/01/user/record.py b/OS/01/user/record.py index f376e49..d9b4120 100644 --- a/OS/01/user/record.py +++ b/OS/01/user/record.py @@ -52,7 +52,7 @@ async def start_recording(): print("Recording started...") async with websockets.connect("ws://localhost:8000/user") as websocket: # Send the start command with mime type - await websocket.send(json.dumps({"action": "command", "state": "start", "mimeType": "audio/wav"})) + await websocket.send(json.dumps({"role": "user", "type": "audio", "format": "audio/wav", "start": True})) while recording: data = stream.read(chunk) frames.append(data) @@ -65,13 +65,13 @@ async def start_recording(): with open(file_path, 'rb') as audio_file: byte_chunk = audio_file.read(ws_chunk_size) while byte_chunk: - await websocket.send(byte_chunk) + await websocket.send(json.dumps({"role": "user", "type": "audio", "format": "audio/wav", "content": str(byte_chunk)})) byte_chunk = audio_file.read(ws_chunk_size) finally: os.remove(file_path) # Send the end command - await websocket.send(json.dumps({"action": "command", "state": "end"})) + await websocket.send(json.dumps({"role": "user", "type": "audio", "format": "audio/wav", "end": True})) # Receive a json message and then close the connection message = await websocket.recv()