From 777ab42f75d0e4ad44e79b383bec92d1fde039e2 Mon Sep 17 00:00:00 2001 From: killian <63927363+KillianLucas@users.noreply.github.com> Date: Thu, 8 Feb 2024 23:46:33 -0800 Subject: [PATCH] 8th Architecture --- OS/01/conversations/user.json | 2 +- OS/01/device.py | 43 +++++++++++-------- OS/01/i.py | 1 - OS/01/server.py | 2 +- OS/01/start.sh | 2 +- OS/01/stt.py | 2 +- .../{check_filtered_kernel.py => kernel.py} | 40 ++++++++++------- OS/01/utils/put_kernel_messages_into_queue.py | 17 -------- OS/01/utils/run_endpoint.py | 28 ------------ 9 files changed, 52 insertions(+), 85 deletions(-) rename OS/01/utils/{check_filtered_kernel.py => kernel.py} (53%) delete mode 100644 OS/01/utils/put_kernel_messages_into_queue.py delete mode 100644 OS/01/utils/run_endpoint.py diff --git a/OS/01/conversations/user.json b/OS/01/conversations/user.json index 79efc11..4ee5589 100644 --- a/OS/01/conversations/user.json +++ b/OS/01/conversations/user.json @@ -1 +1 @@ -[{"role": "user", "type": "message", "content": "Hi.\n"}] \ No newline at end of file +[{"role": "user", "type": "message", "content": "Hi, can you hear me?\n"}] \ No newline at end of file diff --git a/OS/01/device.py b/OS/01/device.py index 299dc04..176c501 100644 --- a/OS/01/device.py +++ b/OS/01/device.py @@ -19,7 +19,7 @@ import wave import tempfile from datetime import datetime from interpreter import interpreter # Just for code execution. Maybe we should let people do from interpreter.computer import run? -from utils.put_kernel_messages_into_queue import put_kernel_messages_into_queue +from utils.kernel import put_kernel_messages_into_queue from stt import stt_wav # Configuration for Audio Recording @@ -71,28 +71,33 @@ def record_audio(): stream.close() print("Recording stopped.") - # After recording is done, read and stream the audio file in chunks - with open(wav_path, 'rb') as audio_file: - byte_data = audio_file.read(CHUNK) - while byte_data: - send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "content": str(byte_data)}) - byte_data = audio_file.read(CHUNK) - - if os.getenv('STT_RUNNER') == "device": - text = stt_wav(wav_path) - send_queue.put({"role": "user", "type": "message", "content": text}) - - if os.getenv('STT_RUNNER') == "server": - # STT will happen on the server. we sent audio. - send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "end": True}) - elif os.getenv('STT_RUNNER') == "device": - # STT will happen here, on the device. we sent text. - send_queue.put({"role": "user", "type": "message", "end": True}) + duration = wav_file.getnframes() / RATE + if duration < 0.3: + # Just pressed it. Send stop message + if os.getenv('STT_RUNNER') == "device": + send_queue.put({"role": "user", "type": "message", "content": "stop"}) + send_queue.put({"role": "user", "type": "message", "end": True}) + else: + send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "content": ""}) + send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "end": True}) + else: + if os.getenv('STT_RUNNER') == "device": + # Run stt then send text + text = stt_wav(wav_path) + send_queue.put({"role": "user", "type": "message", "content": text}) + send_queue.put({"role": "user", "type": "message", "end": True}) + else: + # Stream audio + with open(wav_path, 'rb') as audio_file: + byte_data = audio_file.read(CHUNK) + while byte_data: + send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "content": str(byte_data)}) + byte_data = audio_file.read(CHUNK) + send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "end": True}) if os.path.exists(wav_path): os.remove(wav_path) - def toggle_recording(state): """Toggle the recording state.""" global RECORDING, SPACEBAR_PRESSED diff --git a/OS/01/i.py b/OS/01/i.py index 9bfd478..339298f 100644 --- a/OS/01/i.py +++ b/OS/01/i.py @@ -1,7 +1,6 @@ import os import glob import json -import requests from pathlib import Path def configure_interpreter(interpreter): diff --git a/OS/01/server.py b/OS/01/server.py index 10142d2..48b3582 100644 --- a/OS/01/server.py +++ b/OS/01/server.py @@ -18,7 +18,7 @@ from tts import tts from pathlib import Path import asyncio import urllib.parse -from utils.put_kernel_messages_into_queue import put_kernel_messages_into_queue +from utils.kernel import put_kernel_messages_into_queue from i import configure_interpreter from interpreter import interpreter diff --git a/OS/01/start.sh b/OS/01/start.sh index 77a66a8..b81bf4a 100755 --- a/OS/01/start.sh +++ b/OS/01/start.sh @@ -49,7 +49,7 @@ fi # brew update # brew install portaudio ffmpeg # fi -python -m pip install -r requirements.txt +# python -m pip install -r requirements.txt ### START diff --git a/OS/01/stt.py b/OS/01/stt.py index d9ac3fa..d53134a 100644 --- a/OS/01/stt.py +++ b/OS/01/stt.py @@ -60,5 +60,5 @@ def stt_wav(wav_file_path: str): print("openai.BadRequestError:", e) return None - print("Exciting transcription result:", transcript) + print("Transcription result:", transcript) return transcript diff --git a/OS/01/utils/check_filtered_kernel.py b/OS/01/utils/kernel.py similarity index 53% rename from OS/01/utils/check_filtered_kernel.py rename to OS/01/utils/kernel.py index f0a441d..da7d55f 100644 --- a/OS/01/utils/check_filtered_kernel.py +++ b/OS/01/utils/kernel.py @@ -1,13 +1,6 @@ -""" -Watches the kernel. When it sees something that passes a filter, -it sends POST request with that to /computer. -""" - +import asyncio import subprocess -import time -import requests import platform -import os def get_kernel_messages(): """ @@ -43,13 +36,28 @@ def custom_filter(message): last_messages = "" def check_filtered_kernel(): + messages = get_kernel_messages() + messages.replace(last_messages, "") + messages = messages.split("\n") + + filtered_messages = [] + for message in messages: + if custom_filter(message): + filtered_messages.append(message) + + return "\n".join(filtered_messages) + +async def put_kernel_messages_into_queue(queue): while True: - messages = get_kernel_messages() - messages.replace(last_messages, "") - messages = messages.split("\n") + text = check_filtered_kernel() + if text: + if isinstance(queue, asyncio.Queue): + await queue.put({"role": "computer", "type": "console", "start": True}) + await queue.put({"role": "computer", "type": "console", "format": "output", "content": text}) + await queue.put({"role": "computer", "type": "console", "end": True}) + else: + queue.put({"role": "computer", "type": "console", "start": True}) + queue.put({"role": "computer", "type": "console", "format": "output", "content": text}) + queue.put({"role": "computer", "type": "console", "end": True}) - filtered_messages = [] - for message in messages: - if custom_filter(message): - filtered_messages.append(message) - return "\n".join(filtered_messages) + await asyncio.sleep(5) \ No newline at end of file diff --git a/OS/01/utils/put_kernel_messages_into_queue.py b/OS/01/utils/put_kernel_messages_into_queue.py deleted file mode 100644 index e3a2702..0000000 --- a/OS/01/utils/put_kernel_messages_into_queue.py +++ /dev/null @@ -1,17 +0,0 @@ -from .check_filtered_kernel import check_filtered_kernel -import asyncio - -async def put_kernel_messages_into_queue(queue): - while True: - text = check_filtered_kernel() - if text: - if isinstance(queue, asyncio.Queue): - await queue.put({"role": "computer", "type": "console", "start": True}) - await queue.put({"role": "computer", "type": "console", "format": "output", "content": text}) - await queue.put({"role": "computer", "type": "console", "end": True}) - else: - queue.put({"role": "computer", "type": "console", "start": True}) - queue.put({"role": "computer", "type": "console", "format": "output", "content": text}) - queue.put({"role": "computer", "type": "console", "end": True}) - - await asyncio.sleep(5) \ No newline at end of file diff --git a/OS/01/utils/run_endpoint.py b/OS/01/utils/run_endpoint.py deleted file mode 100644 index d3e9ead..0000000 --- a/OS/01/utils/run_endpoint.py +++ /dev/null @@ -1,28 +0,0 @@ -""" -Exposes a SSE streaming server endpoint at /run, which recieves language and code, -and streams the output. -""" -import os -import json -from interpreter import interpreter -import uvicorn - -from fastapi import FastAPI -from fastapi.responses import StreamingResponse -from pydantic import BaseModel - -class Code(BaseModel): - language: str - code: str - -app = FastAPI() - -@app.post("/run") -async def run_code(code: Code): - def generator(): - for chunk in interpreter.computer.run(code.language, code.code): - yield json.dumps(chunk) - return StreamingResponse(generator()) - -if __name__ == "__main__": - uvicorn.run(app, host="0.0.0.0", port=int(os.getenv('COMPUTER_PORT', 9000))) \ No newline at end of file