Two way websocket in user + settings

pull/11/head
killian 11 months ago
parent 5f7d53f0b9
commit 63ab616082

@ -6,6 +6,7 @@ Exposes a ws endpoint called /user. Things from there go into the queue. We also
In a while loop we watch the queue and handle it.
"""
import os
import ast
import json
import time
@ -54,7 +55,6 @@ async def websocket_endpoint(websocket: WebSocket):
message = to_user.get()
await websocket.send_json(message)
def queue_listener():
audio_file = bytearray()
while True:
@ -123,4 +123,4 @@ queue_thread.start()
# Run the FastAPI app
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8000)
uvicorn.run(app, host="0.0.0.0", port=int(os.getenv('ASSISTANT_PORT', 8000)))

File diff suppressed because one or more lines are too long

@ -94,8 +94,8 @@ Remember: You can run Python code. Be very concise. Ensure that you actually run
data = {"language": "python", "code": code}
# Send the data to the /run endpoint
response = requests.post("http://localhost:9000/run", json=data, stream=True)
computer_port = os.getenv('COMPUTER_PORT', '9000')
response = requests.post(f"http://localhost:{computer_port}/run", json=data, stream=True)
# Stream the response
for chunk in response.iter_content(chunk_size=100000000):
if chunk: # filter out keep-alive new lines

@ -30,6 +30,9 @@ def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
with open(input_path, 'wb') as f:
f.write(audio)
# Check if the input file exists
assert os.path.exists(input_path), f"Input file does not exist: {input_path}"
# Export to wav
output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
@ -42,7 +45,6 @@ def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
os.remove(input_path)
os.remove(output_path)
def stt(audio_bytes: bytearray, mime_type):
with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path:
audio_file = open(wav_file_path, "rb")

@ -7,6 +7,7 @@ import subprocess
import time
import requests
import platform
import os
class Device:
def __init__(self, device_type, device_info):
@ -118,8 +119,8 @@ def run_kernel_watch_linux():
if custom_filter(message):
messages_for_core.append(message)
if messages_for_core:
requests.post('http://localhost:8000/computer', json = {'messages': messages_for_core})
port = os.getenv('ASSISTANT_PORT', 8000)
requests.post(f'http://localhost:{port}/computer', json = {'messages': messages_for_core})
time.sleep(2)

@ -2,7 +2,7 @@
Exposes a SSE streaming server endpoint at /run, which recieves language and code,
and streams the output.
"""
import os
import json
from interpreter import interpreter
import uvicorn
@ -20,9 +20,9 @@ app = FastAPI()
@app.post("/run")
async def run_code(code: Code):
def generator():
for chunk in interpreter.computer.run(code.language, code.code, stream=True):
for chunk in interpreter.computer.run(code.language, code.code):
yield json.dumps(chunk)
return StreamingResponse(generator())
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=9000)
uvicorn.run(app, host="0.0.0.0", port=int(os.getenv('COMPUTER_PORT', 9000)))

@ -7,3 +7,4 @@ uvicorn
websockets
python-dotenv
ffmpeg-python
textual

@ -1,12 +1,22 @@
### SETTINGS
export MODE_01=LIGHT
export ASSISTANT_PORT=8000
export COMPUTER_PORT=8001
# Kill whatever's on the ASSISTANT_PORT and COMPUTER_PORT
lsof -ti tcp:$ASSISTANT_PORT | xargs kill
lsof -ti tcp:$COMPUTER_PORT | xargs kill
### SETUP
# INSTALL REQUIREMENTS
if [[ "$OSTYPE" == "darwin"* ]]; then
brew update
brew install portaudio ffmpeg
fi
pip install -r requirements.txt
# if [[ "$OSTYPE" == "darwin"* ]]; then
# brew update
# brew install portaudio ffmpeg
# fi
# pip install -r requirements.txt
### COMPUTER
@ -28,6 +38,8 @@ python computer/run.py &
# (disabled, we'll start with hosted services)
# python core/llm/start.py &
sleep 6
# START ASSISTANT
python assistant/assistant.py &

@ -5,18 +5,14 @@ Connects to a websocket at /user. Sends shit to it, and displays/plays the shit
For now, just handles a spacebar being pressed for the duration it's pressed,
it should record audio.
SIMPLEST POSSIBLE: Sends that audio to OpenAI whisper, gets the transcript,
sends it to /user in LMC format (role: user, etc)
MOST FUTUREPROOF: Streams chunks of audio to /user, which will then handle stt in stt.py.
"""
import os
import pyaudio
import threading
import asyncio
import websockets
import websocket
import time
import json
from pynput import keyboard
import wave
@ -35,6 +31,15 @@ recording = False # Flag to control recording state
ws_chunk_size = 4096 # Websocket stream chunk size
port = os.getenv('ASSISTANT_PORT', 8000)
ws_url = f"ws://localhost:{port}/user"
while True:
try:
ws = websocket.create_connection(ws_url)
break
except ConnectionRefusedError:
time.sleep(1)
async def start_recording():
global recording

@ -1,13 +1,123 @@
"""
Handles everything the user interacts through.
import asyncio
import threading
import websockets
import os
import pyaudio
from queue import Queue
from pynput import keyboard
import json
Connects to a websocket at /user. Sends shit to it, and displays/plays the shit it sends back.
# Configuration for Audio Recording
CHUNK = 1024 # Record in chunks of 1024 samples
FORMAT = pyaudio.paInt16 # 16 bits per sample
CHANNELS = 1 # Mono
RATE = 44100 # Sample rate
RECORDING = False # Flag to control recording state
SPACEBAR_PRESSED = False # Flag to track spacebar press state
For now, just handles a spacebar being pressed for the duration it's pressed,
it should record audio.
# Configuration for WebSocket
PORT = os.getenv('ASSISTANT_PORT', '8000')
WS_URL = f"ws://localhost:{PORT}/user"
SIMPLEST POSSIBLE: Sends that audio to OpenAI whisper, gets the transcript,
sends it to /user in LMC format (role: user, etc)
# Initialize PyAudio
p = pyaudio.PyAudio()
MOST FUTUREPROOF: Streams chunks of audio to /user, which will then handle stt in stt.py.
"""
# Queue for sending data
data_queue = Queue()
import wave
import tempfile
from datetime import datetime
def record_audio():
"""Record audio from the microphone and add it to the queue."""
stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
print("Recording started...")
global RECORDING
# Create a temporary WAV file to store the audio data
temp_dir = tempfile.gettempdir()
wav_path = os.path.join(temp_dir, f"audio_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
wav_file = wave.open(wav_path, 'wb')
wav_file.setnchannels(CHANNELS)
wav_file.setsampwidth(p.get_sample_size(FORMAT))
wav_file.setframerate(RATE)
while RECORDING:
data = stream.read(CHUNK, exception_on_overflow=False)
wav_file.writeframes(data)
wav_file.close()
stream.stop_stream()
stream.close()
print("Recording stopped.")
# After recording is done, read and stream the audio file in chunks
with open(wav_path, 'rb') as audio_file:
byte_data = audio_file.read(CHUNK)
while byte_data:
data_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "content": str(byte_data)})
byte_data = audio_file.read(CHUNK)
data_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "end": True})
def toggle_recording(state):
"""Toggle the recording state."""
global RECORDING, SPACEBAR_PRESSED
if state and not SPACEBAR_PRESSED:
SPACEBAR_PRESSED = True
if not RECORDING:
RECORDING = True
threading.Thread(target=record_audio).start()
elif not state and SPACEBAR_PRESSED:
SPACEBAR_PRESSED = False
RECORDING = False
async def websocket_communication():
"""Handle WebSocket communication and listen for incoming messages."""
async with websockets.connect(WS_URL) as websocket:
while True:
# Send data from the queue to the server
while not data_queue.empty():
data = data_queue.get_nowait()
await websocket.send(json.dumps(data))
# Listen for incoming messages from the server
try:
incoming_message = await asyncio.wait_for(websocket.recv(), timeout=1.0)
print(f"Received from server: {incoming_message}")
except asyncio.TimeoutError:
# No message received within timeout period
pass
await asyncio.sleep(0.1)
def on_press(key):
"""Detect spacebar press."""
if key == keyboard.Key.space:
toggle_recording(True)
def on_release(key):
"""Detect spacebar release."""
if key == keyboard.Key.space:
toggle_recording(False)
def main():
import time
time.sleep(10)
# Start the WebSocket communication in a separate asyncio event loop
ws_thread = threading.Thread(target=lambda: asyncio.run(websocket_communication()), daemon=True)
ws_thread.start()
# Keyboard listener for spacebar press/release
with keyboard.Listener(on_press=on_press, on_release=on_release) as listener:
print("Press the spacebar to start/stop recording. Press ESC to exit.")
listener.join()
p.terminate()
if __name__ == "__main__":
main()

Loading…
Cancel
Save