Two way websocket in user + settings

pull/11/head
killian 12 months ago
parent 5f7d53f0b9
commit 63ab616082

@ -6,6 +6,7 @@ Exposes a ws endpoint called /user. Things from there go into the queue. We also
In a while loop we watch the queue and handle it. In a while loop we watch the queue and handle it.
""" """
import os
import ast import ast
import json import json
import time import time
@ -54,7 +55,6 @@ async def websocket_endpoint(websocket: WebSocket):
message = to_user.get() message = to_user.get()
await websocket.send_json(message) await websocket.send_json(message)
def queue_listener(): def queue_listener():
audio_file = bytearray() audio_file = bytearray()
while True: while True:
@ -123,4 +123,4 @@ queue_thread.start()
# Run the FastAPI app # Run the FastAPI app
if __name__ == "__main__": if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8000) uvicorn.run(app, host="0.0.0.0", port=int(os.getenv('ASSISTANT_PORT', 8000)))

File diff suppressed because one or more lines are too long

@ -94,8 +94,8 @@ Remember: You can run Python code. Be very concise. Ensure that you actually run
data = {"language": "python", "code": code} data = {"language": "python", "code": code}
# Send the data to the /run endpoint # Send the data to the /run endpoint
response = requests.post("http://localhost:9000/run", json=data, stream=True) computer_port = os.getenv('COMPUTER_PORT', '9000')
response = requests.post(f"http://localhost:{computer_port}/run", json=data, stream=True)
# Stream the response # Stream the response
for chunk in response.iter_content(chunk_size=100000000): for chunk in response.iter_content(chunk_size=100000000):
if chunk: # filter out keep-alive new lines if chunk: # filter out keep-alive new lines

@ -30,6 +30,9 @@ def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
with open(input_path, 'wb') as f: with open(input_path, 'wb') as f:
f.write(audio) f.write(audio)
# Check if the input file exists
assert os.path.exists(input_path), f"Input file does not exist: {input_path}"
# Export to wav # Export to wav
output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav") output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run() ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
@ -42,7 +45,6 @@ def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
os.remove(input_path) os.remove(input_path)
os.remove(output_path) os.remove(output_path)
def stt(audio_bytes: bytearray, mime_type): def stt(audio_bytes: bytearray, mime_type):
with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path: with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path:
audio_file = open(wav_file_path, "rb") audio_file = open(wav_file_path, "rb")

@ -7,6 +7,7 @@ import subprocess
import time import time
import requests import requests
import platform import platform
import os
class Device: class Device:
def __init__(self, device_type, device_info): def __init__(self, device_type, device_info):
@ -118,8 +119,8 @@ def run_kernel_watch_linux():
if custom_filter(message): if custom_filter(message):
messages_for_core.append(message) messages_for_core.append(message)
if messages_for_core: if messages_for_core:
requests.post('http://localhost:8000/computer', json = {'messages': messages_for_core}) port = os.getenv('ASSISTANT_PORT', 8000)
requests.post(f'http://localhost:{port}/computer', json = {'messages': messages_for_core})
time.sleep(2) time.sleep(2)

@ -2,7 +2,7 @@
Exposes a SSE streaming server endpoint at /run, which recieves language and code, Exposes a SSE streaming server endpoint at /run, which recieves language and code,
and streams the output. and streams the output.
""" """
import os
import json import json
from interpreter import interpreter from interpreter import interpreter
import uvicorn import uvicorn
@ -20,9 +20,9 @@ app = FastAPI()
@app.post("/run") @app.post("/run")
async def run_code(code: Code): async def run_code(code: Code):
def generator(): def generator():
for chunk in interpreter.computer.run(code.language, code.code, stream=True): for chunk in interpreter.computer.run(code.language, code.code):
yield json.dumps(chunk) yield json.dumps(chunk)
return StreamingResponse(generator()) return StreamingResponse(generator())
if __name__ == "__main__": if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=9000) uvicorn.run(app, host="0.0.0.0", port=int(os.getenv('COMPUTER_PORT', 9000)))

@ -7,3 +7,4 @@ uvicorn
websockets websockets
python-dotenv python-dotenv
ffmpeg-python ffmpeg-python
textual

@ -1,12 +1,22 @@
### SETTINGS
export MODE_01=LIGHT
export ASSISTANT_PORT=8000
export COMPUTER_PORT=8001
# Kill whatever's on the ASSISTANT_PORT and COMPUTER_PORT
lsof -ti tcp:$ASSISTANT_PORT | xargs kill
lsof -ti tcp:$COMPUTER_PORT | xargs kill
### SETUP ### SETUP
# INSTALL REQUIREMENTS # INSTALL REQUIREMENTS
if [[ "$OSTYPE" == "darwin"* ]]; then # if [[ "$OSTYPE" == "darwin"* ]]; then
brew update # brew update
brew install portaudio ffmpeg # brew install portaudio ffmpeg
fi # fi
pip install -r requirements.txt # pip install -r requirements.txt
### COMPUTER ### COMPUTER
@ -28,6 +38,8 @@ python computer/run.py &
# (disabled, we'll start with hosted services) # (disabled, we'll start with hosted services)
# python core/llm/start.py & # python core/llm/start.py &
sleep 6
# START ASSISTANT # START ASSISTANT
python assistant/assistant.py & python assistant/assistant.py &

@ -5,18 +5,14 @@ Connects to a websocket at /user. Sends shit to it, and displays/plays the shit
For now, just handles a spacebar being pressed for the duration it's pressed, For now, just handles a spacebar being pressed for the duration it's pressed,
it should record audio. it should record audio.
SIMPLEST POSSIBLE: Sends that audio to OpenAI whisper, gets the transcript,
sends it to /user in LMC format (role: user, etc)
MOST FUTUREPROOF: Streams chunks of audio to /user, which will then handle stt in stt.py.
""" """
import os import os
import pyaudio import pyaudio
import threading import threading
import asyncio import asyncio
import websockets import websocket
import time
import json import json
from pynput import keyboard from pynput import keyboard
import wave import wave
@ -35,6 +31,15 @@ recording = False # Flag to control recording state
ws_chunk_size = 4096 # Websocket stream chunk size ws_chunk_size = 4096 # Websocket stream chunk size
port = os.getenv('ASSISTANT_PORT', 8000)
ws_url = f"ws://localhost:{port}/user"
while True:
try:
ws = websocket.create_connection(ws_url)
break
except ConnectionRefusedError:
time.sleep(1)
async def start_recording(): async def start_recording():
global recording global recording

@ -1,13 +1,123 @@
""" import asyncio
Handles everything the user interacts through. import threading
import websockets
import os
import pyaudio
from queue import Queue
from pynput import keyboard
import json
Connects to a websocket at /user. Sends shit to it, and displays/plays the shit it sends back. # Configuration for Audio Recording
CHUNK = 1024 # Record in chunks of 1024 samples
FORMAT = pyaudio.paInt16 # 16 bits per sample
CHANNELS = 1 # Mono
RATE = 44100 # Sample rate
RECORDING = False # Flag to control recording state
SPACEBAR_PRESSED = False # Flag to track spacebar press state
For now, just handles a spacebar being pressed for the duration it's pressed, # Configuration for WebSocket
it should record audio. PORT = os.getenv('ASSISTANT_PORT', '8000')
WS_URL = f"ws://localhost:{PORT}/user"
SIMPLEST POSSIBLE: Sends that audio to OpenAI whisper, gets the transcript, # Initialize PyAudio
sends it to /user in LMC format (role: user, etc) p = pyaudio.PyAudio()
MOST FUTUREPROOF: Streams chunks of audio to /user, which will then handle stt in stt.py. # Queue for sending data
""" data_queue = Queue()
import wave
import tempfile
from datetime import datetime
def record_audio():
"""Record audio from the microphone and add it to the queue."""
stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
print("Recording started...")
global RECORDING
# Create a temporary WAV file to store the audio data
temp_dir = tempfile.gettempdir()
wav_path = os.path.join(temp_dir, f"audio_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
wav_file = wave.open(wav_path, 'wb')
wav_file.setnchannels(CHANNELS)
wav_file.setsampwidth(p.get_sample_size(FORMAT))
wav_file.setframerate(RATE)
while RECORDING:
data = stream.read(CHUNK, exception_on_overflow=False)
wav_file.writeframes(data)
wav_file.close()
stream.stop_stream()
stream.close()
print("Recording stopped.")
# After recording is done, read and stream the audio file in chunks
with open(wav_path, 'rb') as audio_file:
byte_data = audio_file.read(CHUNK)
while byte_data:
data_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "content": str(byte_data)})
byte_data = audio_file.read(CHUNK)
data_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "end": True})
def toggle_recording(state):
"""Toggle the recording state."""
global RECORDING, SPACEBAR_PRESSED
if state and not SPACEBAR_PRESSED:
SPACEBAR_PRESSED = True
if not RECORDING:
RECORDING = True
threading.Thread(target=record_audio).start()
elif not state and SPACEBAR_PRESSED:
SPACEBAR_PRESSED = False
RECORDING = False
async def websocket_communication():
"""Handle WebSocket communication and listen for incoming messages."""
async with websockets.connect(WS_URL) as websocket:
while True:
# Send data from the queue to the server
while not data_queue.empty():
data = data_queue.get_nowait()
await websocket.send(json.dumps(data))
# Listen for incoming messages from the server
try:
incoming_message = await asyncio.wait_for(websocket.recv(), timeout=1.0)
print(f"Received from server: {incoming_message}")
except asyncio.TimeoutError:
# No message received within timeout period
pass
await asyncio.sleep(0.1)
def on_press(key):
"""Detect spacebar press."""
if key == keyboard.Key.space:
toggle_recording(True)
def on_release(key):
"""Detect spacebar release."""
if key == keyboard.Key.space:
toggle_recording(False)
def main():
import time
time.sleep(10)
# Start the WebSocket communication in a separate asyncio event loop
ws_thread = threading.Thread(target=lambda: asyncio.run(websocket_communication()), daemon=True)
ws_thread.start()
# Keyboard listener for spacebar press/release
with keyboard.Listener(on_press=on_press, on_release=on_release) as listener:
print("Press the spacebar to start/stop recording. Press ESC to exit.")
listener.join()
p.terminate()
if __name__ == "__main__":
main()

Loading…
Cancel
Save