From 63ab61608266b12d1a3a8512b5e66abbf6700cd0 Mon Sep 17 00:00:00 2001 From: killian <63927363+KillianLucas@users.noreply.github.com> Date: Sat, 3 Feb 2024 23:32:55 -0800 Subject: [PATCH] Two way websocket in user + settings --- OS/01/assistant/assistant.py | 4 +- OS/01/assistant/conversations/user.json | 2 +- OS/01/assistant/create_interpreter.py | 4 +- OS/01/assistant/stt.py | 4 +- OS/01/computer/kernel_watcher.py | 5 +- OS/01/computer/run.py | 6 +- OS/01/requirements.txt | 1 + OS/01/start.sh | 22 +++- OS/01/user/record.py | 17 ++-- OS/01/user/user.py | 128 ++++++++++++++++++++++-- 10 files changed, 162 insertions(+), 31 deletions(-) diff --git a/OS/01/assistant/assistant.py b/OS/01/assistant/assistant.py index 681776b..1d623c7 100644 --- a/OS/01/assistant/assistant.py +++ b/OS/01/assistant/assistant.py @@ -6,6 +6,7 @@ Exposes a ws endpoint called /user. Things from there go into the queue. We also In a while loop we watch the queue and handle it. """ +import os import ast import json import time @@ -54,7 +55,6 @@ async def websocket_endpoint(websocket: WebSocket): message = to_user.get() await websocket.send_json(message) - def queue_listener(): audio_file = bytearray() while True: @@ -123,4 +123,4 @@ queue_thread.start() # Run the FastAPI app if __name__ == "__main__": - uvicorn.run(app, host="0.0.0.0", port=8000) + uvicorn.run(app, host="0.0.0.0", port=int(os.getenv('ASSISTANT_PORT', 8000))) diff --git a/OS/01/assistant/conversations/user.json b/OS/01/assistant/conversations/user.json index b52c39e..494fe84 100644 --- a/OS/01/assistant/conversations/user.json +++ b/OS/01/assistant/conversations/user.json @@ -1 +1 @@ -[{"role": "user", "type": "message", "content": "Help me get my laundry done.\n\n"}, {"role": "assistant", "type": "message", "content": "To help you get your laundry done efficiently, we'll break this task down into smaller, manageable steps and schedule reminders as necessary. Here's a possible breakdown:\n\n1. Gather all dirty laundry items.\n2. Sort laundry into appropriate piles (e.g., whites, colors, delicates).\n3. Check laundry labels for any special washing instructions.\n4. Start the first load of laundry in the washing machine.\n5. Transfer the first load to the dryer when the wash cycle is complete.\n6. Fold or hang clothes immediately after they dry.\n7. Put all laundered items away.\n\nI'll add these steps to your task list and focus on guiding you through each step one at a time. Let's start with the first task: gathering all dirty laundry items. I'll check in with you in 15 minutes to see if you've completed this task.\n\n"}, {"role": "assistant", "type": "code", "format": "python", "content": "\nfrom datetime import datetime, timedelta\n\ntasks = [\n \"Gather all dirty laundry items.\",\n \"Sort laundry into appropriate piles.\",\n \"Check laundry labels for any special instructions.\",\n \"Start the first load of laundry in the washing machine.\",\n \"Transfer the first load to the dryer when wash is complete.\",\n \"Fold or hang clothes after they dry.\",\n \"Put all laundered items away.\"\n]\n\nschedule(datetime.now() + timedelta(minutes=15), \"Have you gathered all the dirty laundry items?\")\ntasks[0]\n"}, {"role": "computer", "type": "console", "format": "output", "content": "'Gather all dirty laundry items.'"}, {"role": "assistant", "type": "message", "content": "The output means that your current task is to gather all dirty laundry items. This is the first step in getting your laundry done. Focus on collecting all the laundry that needs washing from around your home. I've scheduled a reminder to check in with you in 15 minutes to see if you've completed this task. Once you're done with gathering all the laundry items, we'll move on to the next step. Keep up the good work!"}, {"role": "user", "type": "message", "content": "Check in with me in 30 seconds for that."}, {"role": "assistant", "type": "code", "format": "python", "content": "\n# Rescheduling the check-in reminder for gathering laundry to 30 seconds from now\nschedule(datetime.now() + timedelta(seconds=30), \"Have you gathered all the dirty laundry items?\")\n"}, {"role": "computer", "type": "console", "format": "output", "content": ""}, {"role": "assistant", "type": "message", "content": "The absence of text output signifies that the reminder to check in on your task (\"Have you gathered all the dirty laundry items?\") has been successfully scheduled for 30 seconds from now. There's nothing further you need to do until that reminder comes through. Once you've received the reminder, please update me on your progress, and we'll move forward with the next step in completing your laundry. Great job on getting started!"}, {"role": "computer", "type": "console", "format": "output", "content": "Have you gathered all the dirty laundry items?"}, {"role": "assistant", "type": "message", "content": "The output indicates that it's time for the check-in reminder to see if you've completed gathering all the dirty laundry items, as scheduled. If you have completed this task, we will move on to the next step in your laundry process, which is sorting the laundry into appropriate piles. If you haven't finished gathering the laundry yet, please take a moment to do so.\n\nCould you let me know if you've finished gathering all the laundry?"}, {"role": "user", "type": "message", "content": "Did you need to run code? It's okay if not, but please do if you did."}, {"role": "assistant", "type": "message", "content": "Since we are at a check-in point where I need your input to proceed, technically, I don't need to run code right this moment. The next steps revolve around your response. I'll provide guidance based on whether you've completed gathering all the dirty laundry items. If you have, I can then update the task list and advise on sorting the laundry, which involves running code to manage the task progression.\n\nSo, have you finished gathering all the dirty laundry items?"}, {"role": "user", "type": "message", "content": "hi"}, {"role": "assistant", "type": "message", "content": "Hello! It seems we were checking in to see if you've finished gathering all the dirty laundry items for your laundry task. How did it go? Are you ready to move on to the next step, or is there anything else you need assistance with right now?"}, {"role": "user", "type": "message", "content": "Did you need to run code? It's okay if not, but please do if you did."}, {"role": "assistant", "type": "message", "content": "Given your update, let's progress with the task list. The next step after gathering all dirty laundry items is to sort them into appropriate piles. Since you're ready to move on, I will update the task list to reflect the completion of the first task and provide the details of the next task.\n\nLet's run the code to update your task progression.\n\n"}, {"role": "assistant", "type": "code", "format": "python", "content": "\n# Update tasks list by removing the completed task\ntasks = tasks[1:]\n\n# Check which task is next\nnext_task = tasks[0] if tasks else \"No more laundry tasks.\"\n\nnext_task\n"}, {"role": "computer", "type": "console", "format": "output", "content": "'Sort laundry into appropriate piles.'"}, {"role": "assistant", "type": "message", "content": "The output means that your next task is to sort the laundry into appropriate piles. This step is crucial for an efficient laundry process as it helps to ensure that clothes are washed under the right settings, preserving their quality and color. Here's a quick guide on sorting:\n\n1. **Whites:** Keep white shirts, sheets, and other similar items in one pile.\n2. **Colors:** Separate colored clothes, ensuring to differentiate between darks and lights if necessary.\n3. **Delicates:** These are items that need gentle handling. Look for fabrics like silk or items with embellishments.\n4. **Heavy Items:** Jeans and towels often need their own cycle due to their weight and drying times.\n\nOnce you've sorted the laundry into these piles, you'll be ready to start the first load in the washing machine. How much time do you think you'll need for sorting? I can set a reminder for you to check back in once you're done with this task."}, {"role": "user", "type": "message", "content": "20 seconds"}, {"role": "assistant", "type": "code", "format": "python", "content": "\n# Scheduling a reminder to check on the sorting task in 20 seconds\nschedule(datetime.now() + timedelta(seconds=20), \"Have you finished sorting the laundry into appropriate piles?\")\n"}, {"role": "computer", "type": "console", "format": "output", "content": ""}, {"role": "assistant", "type": "message", "content": "Since the reminder has been successfully scheduled for 20 seconds to check if you've finished sorting the laundry into appropriate piles, there's nothing more you need to do at the moment except complete the sorting task. Once the reminder comes through, please let me know if you've finished sorting the laundry, and we can move forward with the next step in your laundry process. Keep up the great work!"}, {"role": "computer", "type": "console", "format": "output", "content": "Have you finished sorting the laundry into appropriate piles?"}, {"role": "assistant", "type": "message", "content": "The output indicates that it's time for our check-in to see if you've completed the task of sorting the laundry into appropriate piles. If you have finished this task, we can move on to the next step, which involves starting a laundry cycle with one of the sorted piles. \n\nPlease let me know if the sorting is complete, and we can proceed with the next step in your laundry process."}, {"role": "user", "type": "message", "content": "Did you need to run code? It's okay if not, but please do if you did."}, {"role": "assistant", "type": "message", "content": "Since we are at a decision point depending on whether you've completed sorting the laundry, the next step doesn't involve running code until I know the status of the task. \n\nIf you've finished sorting the laundry, I'll update our task list and guide you through starting the washing machine with one of the sorted piles. Let's confirm your progress first.\n\nHave you finished sorting the laundry into appropriate piles?"}] \ No newline at end of file +[{"role": "user", "type": "message", "content": ".\n"}, {"role": "user", "type": "message", "content": "Oh wait, does this work?\n"}] \ No newline at end of file diff --git a/OS/01/assistant/create_interpreter.py b/OS/01/assistant/create_interpreter.py index 413d125..a78dbc0 100644 --- a/OS/01/assistant/create_interpreter.py +++ b/OS/01/assistant/create_interpreter.py @@ -94,8 +94,8 @@ Remember: You can run Python code. Be very concise. Ensure that you actually run data = {"language": "python", "code": code} # Send the data to the /run endpoint - response = requests.post("http://localhost:9000/run", json=data, stream=True) - + computer_port = os.getenv('COMPUTER_PORT', '9000') + response = requests.post(f"http://localhost:{computer_port}/run", json=data, stream=True) # Stream the response for chunk in response.iter_content(chunk_size=100000000): if chunk: # filter out keep-alive new lines diff --git a/OS/01/assistant/stt.py b/OS/01/assistant/stt.py index adbad7f..2e12d6c 100644 --- a/OS/01/assistant/stt.py +++ b/OS/01/assistant/stt.py @@ -30,6 +30,9 @@ def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str: with open(input_path, 'wb') as f: f.write(audio) + # Check if the input file exists + assert os.path.exists(input_path), f"Input file does not exist: {input_path}" + # Export to wav output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav") ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run() @@ -42,7 +45,6 @@ def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str: os.remove(input_path) os.remove(output_path) - def stt(audio_bytes: bytearray, mime_type): with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path: audio_file = open(wav_file_path, "rb") diff --git a/OS/01/computer/kernel_watcher.py b/OS/01/computer/kernel_watcher.py index 9264e49..2a3b99f 100644 --- a/OS/01/computer/kernel_watcher.py +++ b/OS/01/computer/kernel_watcher.py @@ -7,6 +7,7 @@ import subprocess import time import requests import platform +import os class Device: def __init__(self, device_type, device_info): @@ -118,8 +119,8 @@ def run_kernel_watch_linux(): if custom_filter(message): messages_for_core.append(message) if messages_for_core: - requests.post('http://localhost:8000/computer', json = {'messages': messages_for_core}) - + port = os.getenv('ASSISTANT_PORT', 8000) + requests.post(f'http://localhost:{port}/computer', json = {'messages': messages_for_core}) time.sleep(2) diff --git a/OS/01/computer/run.py b/OS/01/computer/run.py index 2c7adaa..44a365b 100644 --- a/OS/01/computer/run.py +++ b/OS/01/computer/run.py @@ -2,7 +2,7 @@ Exposes a SSE streaming server endpoint at /run, which recieves language and code, and streams the output. """ - +import os import json from interpreter import interpreter import uvicorn @@ -20,9 +20,9 @@ app = FastAPI() @app.post("/run") async def run_code(code: Code): def generator(): - for chunk in interpreter.computer.run(code.language, code.code, stream=True): + for chunk in interpreter.computer.run(code.language, code.code): yield json.dumps(chunk) return StreamingResponse(generator()) if __name__ == "__main__": - uvicorn.run(app, host="0.0.0.0", port=9000) + uvicorn.run(app, host="0.0.0.0", port=int(os.getenv('COMPUTER_PORT', 9000))) diff --git a/OS/01/requirements.txt b/OS/01/requirements.txt index a341655..7022549 100644 --- a/OS/01/requirements.txt +++ b/OS/01/requirements.txt @@ -7,3 +7,4 @@ uvicorn websockets python-dotenv ffmpeg-python +textual \ No newline at end of file diff --git a/OS/01/start.sh b/OS/01/start.sh index f0d693d..01a261a 100755 --- a/OS/01/start.sh +++ b/OS/01/start.sh @@ -1,12 +1,22 @@ +### SETTINGS + +export MODE_01=LIGHT +export ASSISTANT_PORT=8000 +export COMPUTER_PORT=8001 + +# Kill whatever's on the ASSISTANT_PORT and COMPUTER_PORT +lsof -ti tcp:$ASSISTANT_PORT | xargs kill +lsof -ti tcp:$COMPUTER_PORT | xargs kill + ### SETUP # INSTALL REQUIREMENTS -if [[ "$OSTYPE" == "darwin"* ]]; then - brew update - brew install portaudio ffmpeg -fi -pip install -r requirements.txt +# if [[ "$OSTYPE" == "darwin"* ]]; then +# brew update +# brew install portaudio ffmpeg +# fi +# pip install -r requirements.txt ### COMPUTER @@ -28,6 +38,8 @@ python computer/run.py & # (disabled, we'll start with hosted services) # python core/llm/start.py & +sleep 6 + # START ASSISTANT python assistant/assistant.py & diff --git a/OS/01/user/record.py b/OS/01/user/record.py index d9b4120..3ee59c7 100644 --- a/OS/01/user/record.py +++ b/OS/01/user/record.py @@ -5,18 +5,14 @@ Connects to a websocket at /user. Sends shit to it, and displays/plays the shit For now, just handles a spacebar being pressed— for the duration it's pressed, it should record audio. - -SIMPLEST POSSIBLE: Sends that audio to OpenAI whisper, gets the transcript, -sends it to /user in LMC format (role: user, etc) - -MOST FUTUREPROOF: Streams chunks of audio to /user, which will then handle stt in stt.py. """ import os import pyaudio import threading import asyncio -import websockets +import websocket +import time import json from pynput import keyboard import wave @@ -35,6 +31,15 @@ recording = False # Flag to control recording state ws_chunk_size = 4096 # Websocket stream chunk size +port = os.getenv('ASSISTANT_PORT', 8000) +ws_url = f"ws://localhost:{port}/user" +while True: + try: + ws = websocket.create_connection(ws_url) + break + except ConnectionRefusedError: + time.sleep(1) + async def start_recording(): global recording diff --git a/OS/01/user/user.py b/OS/01/user/user.py index ee3529f..42bbda1 100644 --- a/OS/01/user/user.py +++ b/OS/01/user/user.py @@ -1,13 +1,123 @@ -""" -Handles everything the user interacts through. +import asyncio +import threading +import websockets +import os +import pyaudio +from queue import Queue +from pynput import keyboard +import json -Connects to a websocket at /user. Sends shit to it, and displays/plays the shit it sends back. +# Configuration for Audio Recording +CHUNK = 1024 # Record in chunks of 1024 samples +FORMAT = pyaudio.paInt16 # 16 bits per sample +CHANNELS = 1 # Mono +RATE = 44100 # Sample rate +RECORDING = False # Flag to control recording state +SPACEBAR_PRESSED = False # Flag to track spacebar press state -For now, just handles a spacebar being pressed— for the duration it's pressed, -it should record audio. +# Configuration for WebSocket +PORT = os.getenv('ASSISTANT_PORT', '8000') +WS_URL = f"ws://localhost:{PORT}/user" -SIMPLEST POSSIBLE: Sends that audio to OpenAI whisper, gets the transcript, -sends it to /user in LMC format (role: user, etc) +# Initialize PyAudio +p = pyaudio.PyAudio() -MOST FUTUREPROOF: Streams chunks of audio to /user, which will then handle stt in stt.py. -""" \ No newline at end of file +# Queue for sending data +data_queue = Queue() + +import wave +import tempfile +from datetime import datetime + + +def record_audio(): + """Record audio from the microphone and add it to the queue.""" + stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) + print("Recording started...") + global RECORDING + + # Create a temporary WAV file to store the audio data + temp_dir = tempfile.gettempdir() + wav_path = os.path.join(temp_dir, f"audio_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav") + wav_file = wave.open(wav_path, 'wb') + wav_file.setnchannels(CHANNELS) + wav_file.setsampwidth(p.get_sample_size(FORMAT)) + wav_file.setframerate(RATE) + + while RECORDING: + data = stream.read(CHUNK, exception_on_overflow=False) + wav_file.writeframes(data) + + wav_file.close() + stream.stop_stream() + stream.close() + print("Recording stopped.") + + # After recording is done, read and stream the audio file in chunks + with open(wav_path, 'rb') as audio_file: + byte_data = audio_file.read(CHUNK) + while byte_data: + data_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "content": str(byte_data)}) + byte_data = audio_file.read(CHUNK) + + data_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "end": True}) + + +def toggle_recording(state): + """Toggle the recording state.""" + global RECORDING, SPACEBAR_PRESSED + if state and not SPACEBAR_PRESSED: + SPACEBAR_PRESSED = True + if not RECORDING: + RECORDING = True + threading.Thread(target=record_audio).start() + elif not state and SPACEBAR_PRESSED: + SPACEBAR_PRESSED = False + RECORDING = False + +async def websocket_communication(): + """Handle WebSocket communication and listen for incoming messages.""" + async with websockets.connect(WS_URL) as websocket: + while True: + # Send data from the queue to the server + while not data_queue.empty(): + data = data_queue.get_nowait() + await websocket.send(json.dumps(data)) + + # Listen for incoming messages from the server + try: + incoming_message = await asyncio.wait_for(websocket.recv(), timeout=1.0) + print(f"Received from server: {incoming_message}") + except asyncio.TimeoutError: + # No message received within timeout period + pass + + await asyncio.sleep(0.1) + + +def on_press(key): + """Detect spacebar press.""" + if key == keyboard.Key.space: + toggle_recording(True) + +def on_release(key): + """Detect spacebar release.""" + if key == keyboard.Key.space: + toggle_recording(False) + +def main(): + import time + time.sleep(10) + # Start the WebSocket communication in a separate asyncio event loop + ws_thread = threading.Thread(target=lambda: asyncio.run(websocket_communication()), daemon=True) + ws_thread.start() + + # Keyboard listener for spacebar press/release + with keyboard.Listener(on_press=on_press, on_release=on_release) as listener: + print("Press the spacebar to start/stop recording. Press ESC to exit.") + listener.join() + + p.terminate() + +if __name__ == "__main__": + main()