Two way websocket in user + settings

1 year ago · 63ab616082
parent 5f7d53f0b9
commit 63ab616082
10 changed files with 162 additions and 31 deletions
--- a/OS/01/assistant/assistant.py
+++ b/OS/01/assistant/assistant.py
@ -6,6 +6,7 @@ Exposes a ws endpoint called /user. Things from there go into the queue. We also
 In a while loop we watch the queue and handle it.
 """

+import os
 import ast
 import json
 import time
@ -54,7 +55,6 @@ async def websocket_endpoint(websocket: WebSocket):
            message = to_user.get()
            await websocket.send_json(message)

-
 def queue_listener():
    audio_file = bytearray()
    while True:
@ -123,4 +123,4 @@ queue_thread.start()

 # Run the FastAPI app
 if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=8000)
+    uvicorn.run(app, host="0.0.0.0", port=int(os.getenv('ASSISTANT_PORT', 8000)))
--- a/OS/01/assistant/conversations/user.json
+++ b/OS/01/assistant/conversations/user.json
--- a/OS/01/assistant/create_interpreter.py
+++ b/OS/01/assistant/create_interpreter.py
@ -94,8 +94,8 @@ Remember: You can run Python code. Be very concise. Ensure that you actually run
            data = {"language": "python", "code": code}

            # Send the data to the /run endpoint
-            response = requests.post("http://localhost:9000/run", json=data, stream=True)
-
+            computer_port = os.getenv('COMPUTER_PORT', '9000')
+            response = requests.post(f"http://localhost:{computer_port}/run", json=data, stream=True)
            # Stream the response
            for chunk in response.iter_content(chunk_size=100000000):
                if chunk:  # filter out keep-alive new lines
--- a/OS/01/assistant/stt.py
+++ b/OS/01/assistant/stt.py
@ -30,6 +30,9 @@ def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
    with open(input_path, 'wb') as f:
        f.write(audio)

+    # Check if the input file exists
+    assert os.path.exists(input_path), f"Input file does not exist: {input_path}"
+
    # Export to wav
    output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
    ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
@ -42,7 +45,6 @@ def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
        os.remove(input_path)
        os.remove(output_path)

-
 def stt(audio_bytes: bytearray, mime_type):
    with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path:
        audio_file = open(wav_file_path, "rb")
--- a/OS/01/computer/kernel_watcher.py
+++ b/OS/01/computer/kernel_watcher.py
@ -7,6 +7,7 @@ import subprocess
 import time
 import requests
 import platform
+import os

 class Device:
    def __init__(self, device_type, device_info):
@ -118,8 +119,8 @@ def run_kernel_watch_linux():
            if custom_filter(message):
                messages_for_core.append(message)
        if messages_for_core:
-            requests.post('http://localhost:8000/computer', json = {'messages': messages_for_core})
-
+            port = os.getenv('ASSISTANT_PORT', 8000)
+            requests.post(f'http://localhost:{port}/computer', json = {'messages': messages_for_core})
        time.sleep(2)


--- a/OS/01/computer/run.py
+++ b/OS/01/computer/run.py
@ -2,7 +2,7 @@
 Exposes a SSE streaming server endpoint at /run, which recieves language and code,
 and streams the output.
 """
-
+import os
 import json
 from interpreter import interpreter
 import uvicorn
@ -20,9 +20,9 @@ app = FastAPI()
@app.post("/run")
 async def run_code(code: Code):
    def generator():
-        for chunk in interpreter.computer.run(code.language, code.code, stream=True):
+        for chunk in interpreter.computer.run(code.language, code.code):
            yield json.dumps(chunk)
    return StreamingResponse(generator())

 if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=9000)
+    uvicorn.run(app, host="0.0.0.0", port=int(os.getenv('COMPUTER_PORT', 9000)))
--- a/OS/01/requirements.txt
+++ b/OS/01/requirements.txt
@ -7,3 +7,4 @@ uvicorn
 websockets
 python-dotenv
 ffmpeg-python
+textual
--- a/OS/01/start.sh
+++ b/OS/01/start.sh
@ -1,12 +1,22 @@
+### SETTINGS
+
+export MODE_01=LIGHT
+export ASSISTANT_PORT=8000
+export COMPUTER_PORT=8001
+
+# Kill whatever's on the ASSISTANT_PORT and COMPUTER_PORT
+lsof -ti tcp:$ASSISTANT_PORT | xargs kill
+lsof -ti tcp:$COMPUTER_PORT | xargs kill
+
 ### SETUP

 # INSTALL REQUIREMENTS

-if [[ "$OSTYPE" == "darwin"* ]]; then
-    brew update
-    brew install portaudio ffmpeg
-fi
-pip install -r requirements.txt
+# if [[ "$OSTYPE" == "darwin"* ]]; then
+#     brew update
+#     brew install portaudio ffmpeg
+# fi
+# pip install -r requirements.txt

 ### COMPUTER

@ -28,6 +38,8 @@ python computer/run.py &
 # (disabled, we'll start with hosted services)
 # python core/llm/start.py &

+sleep 6
+
 # START ASSISTANT

 python assistant/assistant.py &
--- a/OS/01/user/record.py
+++ b/OS/01/user/record.py
@ -5,18 +5,14 @@ Connects to a websocket at /user. Sends shit to it, and displays/plays the shit

 For now, just handles a spacebar being pressed— for the duration it's pressed,
 it should record audio.
-
-SIMPLEST POSSIBLE: Sends that audio to OpenAI whisper, gets the transcript,
-sends it to /user in LMC format (role: user, etc)
-
-MOST FUTUREPROOF: Streams chunks of audio to /user, which will then handle stt in stt.py.
 """

 import os
 import pyaudio
 import threading
 import asyncio
-import websockets
+import websocket
+import time
 import json
 from pynput import keyboard
 import wave
@ -35,6 +31,15 @@ recording = False  # Flag to control recording state

 ws_chunk_size = 4096 # Websocket stream chunk size

+port = os.getenv('ASSISTANT_PORT', 8000)
+ws_url = f"ws://localhost:{port}/user"
+while True:
+    try:
+        ws = websocket.create_connection(ws_url)
+        break
+    except ConnectionRefusedError:
+        time.sleep(1)
+
 async def start_recording():
    global recording

--- a/OS/01/user/user.py
+++ b/OS/01/user/user.py
@ -1,13 +1,123 @@
-"""
-Handles everything the user interacts through.
+import asyncio
+import threading
+import websockets
+import os
+import pyaudio
+from queue import Queue
+from pynput import keyboard
+import json

-Connects to a websocket at /user. Sends shit to it, and displays/plays the shit it sends back.
+# Configuration for Audio Recording
+CHUNK = 1024  # Record in chunks of 1024 samples
+FORMAT = pyaudio.paInt16  # 16 bits per sample
+CHANNELS = 1  # Mono
+RATE = 44100  # Sample rate
+RECORDING = False  # Flag to control recording state
+SPACEBAR_PRESSED = False  # Flag to track spacebar press state

-For now, just handles a spacebar being pressed— for the duration it's pressed,
-it should record audio.
+# Configuration for WebSocket
+PORT = os.getenv('ASSISTANT_PORT', '8000')
+WS_URL = f"ws://localhost:{PORT}/user"

-SIMPLEST POSSIBLE: Sends that audio to OpenAI whisper, gets the transcript,
-sends it to /user in LMC format (role: user, etc)
+# Initialize PyAudio
+p = pyaudio.PyAudio()

-MOST FUTUREPROOF: Streams chunks of audio to /user, which will then handle stt in stt.py.
-"""
+# Queue for sending data
+data_queue = Queue()
+
+import wave
+import tempfile
+from datetime import datetime
+
+
+def record_audio():
+    """Record audio from the microphone and add it to the queue."""
+    stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
+    print("Recording started...")
+    global RECORDING
+
+    # Create a temporary WAV file to store the audio data
+    temp_dir = tempfile.gettempdir()
+    wav_path = os.path.join(temp_dir, f"audio_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
+    wav_file = wave.open(wav_path, 'wb')
+    wav_file.setnchannels(CHANNELS)
+    wav_file.setsampwidth(p.get_sample_size(FORMAT))
+    wav_file.setframerate(RATE)
+
+    while RECORDING:
+        data = stream.read(CHUNK, exception_on_overflow=False)
+        wav_file.writeframes(data)
+
+    wav_file.close()
+    stream.stop_stream()
+    stream.close()
+    print("Recording stopped.")
+
+    # After recording is done, read and stream the audio file in chunks
+    with open(wav_path, 'rb') as audio_file:
+        byte_data = audio_file.read(CHUNK)
+        while byte_data:
+            data_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "content": str(byte_data)})
+            byte_data = audio_file.read(CHUNK)
+
+    data_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "end": True})
+
+
+def toggle_recording(state):
+    """Toggle the recording state."""
+    global RECORDING, SPACEBAR_PRESSED
+    if state and not SPACEBAR_PRESSED:
+        SPACEBAR_PRESSED = True
+        if not RECORDING:
+            RECORDING = True
+            threading.Thread(target=record_audio).start()
+    elif not state and SPACEBAR_PRESSED:
+        SPACEBAR_PRESSED = False
+        RECORDING = False
+
+async def websocket_communication():
+    """Handle WebSocket communication and listen for incoming messages."""
+    async with websockets.connect(WS_URL) as websocket:
+        while True:
+            # Send data from the queue to the server
+            while not data_queue.empty():
+                data = data_queue.get_nowait()
+                await websocket.send(json.dumps(data))
+
+            # Listen for incoming messages from the server
+            try:
+                incoming_message = await asyncio.wait_for(websocket.recv(), timeout=1.0)
+                print(f"Received from server: {incoming_message}")
+            except asyncio.TimeoutError:
+                # No message received within timeout period
+                pass
+
+            await asyncio.sleep(0.1)
+
+
+def on_press(key):
+    """Detect spacebar press."""
+    if key == keyboard.Key.space:
+        toggle_recording(True)
+
+def on_release(key):
+    """Detect spacebar release."""
+    if key == keyboard.Key.space:
+        toggle_recording(False)
+
+def main():
+    import time
+    time.sleep(10)
+    # Start the WebSocket communication in a separate asyncio event loop
+    ws_thread = threading.Thread(target=lambda: asyncio.run(websocket_communication()), daemon=True)
+    ws_thread.start()
+
+    # Keyboard listener for spacebar press/release
+    with keyboard.Listener(on_press=on_press, on_release=on_release) as listener:
+        print("Press the spacebar to start/stop recording. Press ESC to exit.")
+        listener.join()
+
+    p.terminate()
+
+if __name__ == "__main__":
+    main()