diff --git a/OS/01/assistant/assistant.py b/OS/01/assistant/assistant.py index 8ec1ee3..681776b 100644 --- a/OS/01/assistant/assistant.py +++ b/OS/01/assistant/assistant.py @@ -6,6 +6,7 @@ Exposes a ws endpoint called /user. Things from there go into the queue. We also In a while loop we watch the queue and handle it. """ +import ast import json import time import queue @@ -31,9 +32,9 @@ to_user = queue.Queue() to_assistant = queue.Queue() # This is so we only say() full sentences -accumulated_text = "" def is_full_sentence(text): return text.endswith(('.', '!', '?')) + def split_into_sentences(text): return re.split(r'(?<=[.!?])\s+', text) @@ -49,13 +50,13 @@ async def websocket_endpoint(websocket: WebSocket): while True: data = await websocket.receive_json() to_assistant.put(data) - if not to_user.empty(): + while not to_user.empty(): message = to_user.get() await websocket.send_json(message) -audio_file = bytearray() def queue_listener(): + audio_file = bytearray() while True: # Check 10x a second for new messages while to_assistant.empty(): @@ -65,7 +66,7 @@ def queue_listener(): # Hold the audio in a buffer. If it's ready (we got end flag, stt it) if message["type"] == "audio": if "content" in message: - audio_file.extend(message["content"]) + audio_file.extend(bytes(ast.literal_eval(message["content"]))) if "end" in message: content = stt(audio_file, message["format"]) audio_file = bytearray() @@ -122,4 +123,4 @@ queue_thread.start() # Run the FastAPI app if __name__ == "__main__": - uvicorn.run(app, host="0.0.0.0", port=8000) \ No newline at end of file + uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/OS/01/assistant/create_interpreter.py b/OS/01/assistant/create_interpreter.py index df4197a..413d125 100644 --- a/OS/01/assistant/create_interpreter.py +++ b/OS/01/assistant/create_interpreter.py @@ -55,7 +55,7 @@ Remember: You can run Python code. Be very concise. Ensure that you actually run interpreter.llm.api_key = os.getenv('OPENAI_API_KEY') interpreter.llm.model = "gpt-4" interpreter.auto_run = True - interpreter.force_task_completion = True + interpreter.force_task_completion = False ### MISC SETTINGS @@ -94,12 +94,12 @@ Remember: You can run Python code. Be very concise. Ensure that you actually run data = {"language": "python", "code": code} # Send the data to the /run endpoint - response = requests.post("http://localhost:8000/run", json=data, stream=True) + response = requests.post("http://localhost:9000/run", json=data, stream=True) # Stream the response - for line in response.iter_lines(): - if line: # filter out keep-alive new lines - yield json.loads(line) + for chunk in response.iter_content(chunk_size=100000000): + if chunk: # filter out keep-alive new lines + yield json.loads(chunk.decode()) def stop(self): """Stops the code.""" diff --git a/OS/01/assistant/tts.py b/OS/01/assistant/tts.py index 1a1f55c..a71d530 100644 --- a/OS/01/assistant/tts.py +++ b/OS/01/assistant/tts.py @@ -3,4 +3,4 @@ Defines a function which takes text and returns a path to an audio file. """ def tts(text): - return path_to_audio \ No newline at end of file + return [] diff --git a/OS/01/user/record.py b/OS/01/user/record.py index 5d18c50..d9b4120 100644 --- a/OS/01/user/record.py +++ b/OS/01/user/record.py @@ -65,7 +65,7 @@ async def start_recording(): with open(file_path, 'rb') as audio_file: byte_chunk = audio_file.read(ws_chunk_size) while byte_chunk: - await websocket.send({"role": "user", "type": "audio", "format": "audio/wav", "content": byte_chunk}) + await websocket.send(json.dumps({"role": "user", "type": "audio", "format": "audio/wav", "content": str(byte_chunk)})) byte_chunk = audio_file.read(ws_chunk_size) finally: os.remove(file_path)