Two way websocket in user + settings

1 year ago · 63ab616082
parent 5f7d53f0b9
commit 63ab616082
10 changed files with 162 additions and 31 deletions
--- a/OS/01/assistant/assistant.py
+++ b/OS/01/assistant/assistant.py
@ -6,6 +6,7 @@ Exposes a ws endpoint called /user. Things from there go into the queue. We also
 In a while loop we watch the queue and handle it.
 """
 import os
 import ast
 import json
 import time
@ -54,7 +55,6 @@ async def websocket_endpoint(websocket: WebSocket):
            message = to_user.get()
            await websocket.send_json(message)
 def queue_listener():
    audio_file = bytearray()
    while True:
@ -123,4 +123,4 @@ queue_thread.start()
 # Run the FastAPI app
 if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=8000)
+    uvicorn.run(app, host="0.0.0.0", port=int(os.getenv('ASSISTANT_PORT', 8000)))
--- a/OS/01/assistant/conversations/user.json
+++ b/OS/01/assistant/conversations/user.json
--- a/OS/01/assistant/create_interpreter.py
+++ b/OS/01/assistant/create_interpreter.py
@ -94,8 +94,8 @@ Remember: You can run Python code. Be very concise. Ensure that you actually run
            data = {"language": "python", "code": code}
            # Send the data to the /run endpoint
-            response = requests.post("http://localhost:9000/run", json=data, stream=True)
+            computer_port = os.getenv('COMPUTER_PORT', '9000')
-
+            response = requests.post(f"http://localhost:{computer_port}/run", json=data, stream=True)
            # Stream the response
            for chunk in response.iter_content(chunk_size=100000000):
                if chunk:  # filter out keep-alive new lines
--- a/OS/01/assistant/stt.py
+++ b/OS/01/assistant/stt.py
@ -30,6 +30,9 @@ def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
    with open(input_path, 'wb') as f:
        f.write(audio)
    # Check if the input file exists
    assert os.path.exists(input_path), f"Input file does not exist: {input_path}"
    # Export to wav
    output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
    ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
@ -42,7 +45,6 @@ def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
        os.remove(input_path)
        os.remove(output_path)
 def stt(audio_bytes: bytearray, mime_type):
    with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path:
        audio_file = open(wav_file_path, "rb")
--- a/OS/01/computer/kernel_watcher.py
+++ b/OS/01/computer/kernel_watcher.py
@ -7,6 +7,7 @@ import subprocess
 import time
 import requests
 import platform
 import os
 class Device:
    def __init__(self, device_type, device_info):
@ -118,8 +119,8 @@ def run_kernel_watch_linux():
            if custom_filter(message):
                messages_for_core.append(message)
        if messages_for_core:
-            requests.post('http://localhost:8000/computer', json = {'messages': messages_for_core})
+            port = os.getenv('ASSISTANT_PORT', 8000)
-
+            requests.post(f'http://localhost:{port}/computer', json = {'messages': messages_for_core})
        time.sleep(2)
--- a/OS/01/computer/run.py
+++ b/OS/01/computer/run.py
@ -2,7 +2,7 @@
 Exposes a SSE streaming server endpoint at /run, which recieves language and code,
 and streams the output.
 """
-
+import os
 import json
 from interpreter import interpreter
 import uvicorn
@ -20,9 +20,9 @@ app = FastAPI()
@app.post("/run")
 async def run_code(code: Code):
    def generator():
-        for chunk in interpreter.computer.run(code.language, code.code, stream=True):
+        for chunk in interpreter.computer.run(code.language, code.code):
            yield json.dumps(chunk)
    return StreamingResponse(generator())
 if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=9000)
+    uvicorn.run(app, host="0.0.0.0", port=int(os.getenv('COMPUTER_PORT', 9000)))
--- a/OS/01/requirements.txt
+++ b/OS/01/requirements.txt
@ -7,3 +7,4 @@ uvicorn
 websockets
 python-dotenv
 ffmpeg-python
 textual
--- a/OS/01/start.sh
+++ b/OS/01/start.sh
@ -1,12 +1,22 @@
 ### SETTINGS
 export MODE_01=LIGHT
 export ASSISTANT_PORT=8000
 export COMPUTER_PORT=8001
 # Kill whatever's on the ASSISTANT_PORT and COMPUTER_PORT
 lsof -ti tcp:$ASSISTANT_PORT | xargs kill
 lsof -ti tcp:$COMPUTER_PORT | xargs kill
 ### SETUP
 # INSTALL REQUIREMENTS
-if [[ "$OSTYPE" == "darwin"* ]]; then
+# if [[ "$OSTYPE" == "darwin"* ]]; then
-    brew update
+#     brew update
-    brew install portaudio ffmpeg
+#     brew install portaudio ffmpeg
-fi
+# fi
-pip install -r requirements.txt
+# pip install -r requirements.txt
 ### COMPUTER
@ -28,6 +38,8 @@ python computer/run.py &
 # (disabled, we'll start with hosted services)
 # python core/llm/start.py &
 sleep 6
 # START ASSISTANT
 python assistant/assistant.py &
--- a/OS/01/user/record.py
+++ b/OS/01/user/record.py
@ -5,18 +5,14 @@ Connects to a websocket at /user. Sends shit to it, and displays/plays the shit
 For now, just handles a spacebar being pressed— for the duration it's pressed,
 it should record audio.
 SIMPLEST POSSIBLE: Sends that audio to OpenAI whisper, gets the transcript,
 sends it to /user in LMC format (role: user, etc)
 MOST FUTUREPROOF: Streams chunks of audio to /user, which will then handle stt in stt.py.
 """
 import os
 import pyaudio
 import threading
 import asyncio
-import websockets
+import websocket
 import time
 import json
 from pynput import keyboard
 import wave
@ -35,6 +31,15 @@ recording = False  # Flag to control recording state
 ws_chunk_size = 4096 # Websocket stream chunk size
 port = os.getenv('ASSISTANT_PORT', 8000)
 ws_url = f"ws://localhost:{port}/user"
 while True:
    try:
        ws = websocket.create_connection(ws_url)
        break
    except ConnectionRefusedError:
        time.sleep(1)
 async def start_recording():
    global recording
--- a/OS/01/user/user.py
+++ b/OS/01/user/user.py
@ -1,13 +1,123 @@
-"""
+import asyncio
-Handles everything the user interacts through.
+import threading
 import websockets
 import os
 import pyaudio
 from queue import Queue
 from pynput import keyboard
 import json
-Connects to a websocket at /user. Sends shit to it, and displays/plays the shit it sends back.
+# Configuration for Audio Recording
 CHUNK = 1024  # Record in chunks of 1024 samples
 FORMAT = pyaudio.paInt16  # 16 bits per sample
 CHANNELS = 1  # Mono
 RATE = 44100  # Sample rate
 RECORDING = False  # Flag to control recording state
 SPACEBAR_PRESSED = False  # Flag to track spacebar press state
-For now, just handles a spacebar being pressed— for the duration it's pressed,
+# Configuration for WebSocket
-it should record audio.
+PORT = os.getenv('ASSISTANT_PORT', '8000')
 WS_URL = f"ws://localhost:{PORT}/user"
-SIMPLEST POSSIBLE: Sends that audio to OpenAI whisper, gets the transcript,
+# Initialize PyAudio
-sends it to /user in LMC format (role: user, etc)
+p = pyaudio.PyAudio()
-MOST FUTUREPROOF: Streams chunks of audio to /user, which will then handle stt in stt.py.
+# Queue for sending data
-"""
+data_queue = Queue()
 import wave
 import tempfile
 from datetime import datetime
 def record_audio():
    """Record audio from the microphone and add it to the queue."""
    stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
    print("Recording started...")
    global RECORDING
    # Create a temporary WAV file to store the audio data
    temp_dir = tempfile.gettempdir()
    wav_path = os.path.join(temp_dir, f"audio_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
    wav_file = wave.open(wav_path, 'wb')
    wav_file.setnchannels(CHANNELS)
    wav_file.setsampwidth(p.get_sample_size(FORMAT))
    wav_file.setframerate(RATE)
    while RECORDING:
        data = stream.read(CHUNK, exception_on_overflow=False)
        wav_file.writeframes(data)
    wav_file.close()
    stream.stop_stream()
    stream.close()
    print("Recording stopped.")
    # After recording is done, read and stream the audio file in chunks
    with open(wav_path, 'rb') as audio_file:
        byte_data = audio_file.read(CHUNK)
        while byte_data:
            data_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "content": str(byte_data)})
            byte_data = audio_file.read(CHUNK)
    data_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "end": True})
 def toggle_recording(state):
    """Toggle the recording state."""
    global RECORDING, SPACEBAR_PRESSED
    if state and not SPACEBAR_PRESSED:
        SPACEBAR_PRESSED = True
        if not RECORDING:
            RECORDING = True
            threading.Thread(target=record_audio).start()
    elif not state and SPACEBAR_PRESSED:
        SPACEBAR_PRESSED = False
        RECORDING = False
 async def websocket_communication():
    """Handle WebSocket communication and listen for incoming messages."""
    async with websockets.connect(WS_URL) as websocket:
        while True:
            # Send data from the queue to the server
            while not data_queue.empty():
                data = data_queue.get_nowait()
                await websocket.send(json.dumps(data))
            # Listen for incoming messages from the server
            try:
                incoming_message = await asyncio.wait_for(websocket.recv(), timeout=1.0)
                print(f"Received from server: {incoming_message}")
            except asyncio.TimeoutError:
                # No message received within timeout period
                pass
            await asyncio.sleep(0.1)
 def on_press(key):
    """Detect spacebar press."""
    if key == keyboard.Key.space:
        toggle_recording(True)
 def on_release(key):
    """Detect spacebar release."""
    if key == keyboard.Key.space:
        toggle_recording(False)
 def main():
    import time
    time.sleep(10)
    # Start the WebSocket communication in a separate asyncio event loop
    ws_thread = threading.Thread(target=lambda: asyncio.run(websocket_communication()), daemon=True)
    ws_thread.start()
    # Keyboard listener for spacebar press/release
    with keyboard.Listener(on_press=on_press, on_release=on_release) as listener:
        print("Press the spacebar to start/stop recording. Press ESC to exit.")
        listener.join()
    p.terminate()
 if __name__ == "__main__":
    main()