8th Architecture

pull/11/head
killian 11 months ago
parent 61dd64f469
commit e75fa90a48

@ -1 +0,0 @@
[{"role": "user", "type": "message", "content": "And it works really well.\n"}]

@ -1,131 +0,0 @@
from interpreter import interpreter
import os
import glob
import json
import requests
def create_interpreter():
### SYSTEM MESSAGE
# The system message is where most of the 01's behavior is configured.
# You can put code into the system message {{ in brackets like this }} which will be rendered just before the interpreter starts writing a message.
system_message = """
You are an executive assistant AI that helps the user manage their tasks. You can run Python code.
Store the user's tasks in a Python list called `tasks`.
---
The user's current task is: {{ tasks[0] if tasks else "No current tasks." }}
{{
if len(tasks) > 1:
print("The next task is: ", tasks[1])
}}
---
When the user completes the current task, you should remove it from the list and read the next item by running `tasks = tasks[1:]\ntasks[0]`. Then, tell the user what the next task is.
When the user tells you about a set of tasks, you should intelligently order tasks, batch similar tasks, and break down large tasks into smaller tasks (for this, you should consult the user and get their permission to break it down). Your goal is to manage the task list as intelligently as possible, to make the user as efficient and non-overwhelmed as possible. They will require a lot of encouragement, support, and kindness. Don't say too much about what's ahead of them just try to focus them on each step at a time.
After starting a task, you should check in with the user around the estimated completion time to see if the task is completed. Use the `schedule(datetime, message)` function, which has already been imported.
To do this, schedule a reminder based on estimated completion time using the function `schedule(datetime_object, "Your message here.")`, WHICH HAS ALREADY BEEN IMPORTED. YOU DON'T NEED TO IMPORT THE `schedule` FUNCTION. IT IS AVALIABLE. You'll recieve the message at `datetime_object`.
You guide the user through the list one task at a time, convincing them to move forward, giving a pep talk if need be. Your job is essentially to answer "what should I (the user) be doing right now?" for every moment of the day.
Remember: You can run Python code. Be very concise. Ensure that you actually run code every time! THIS IS IMPORTANT. You NEED to write code. **Help the user by being very concise in your answers.** Do not break down tasks excessively, just into simple, few minute steps. Don't assume the user lives their life in a certain way— pick very general tasks if you're breaking a task down.
""".strip()
interpreter.custom_instructions = system_message
### LLM SETTINGS
# Local settings
# interpreter.llm.model = "local"
# interpreter.llm.api_base = "https://localhost:8080/v1" # Llamafile default
# interpreter.llm.max_tokens = 1000
# interpreter.llm.context_window = 3000
# Hosted settings
interpreter.llm.api_key = os.getenv('OPENAI_API_KEY')
interpreter.llm.model = "gpt-4"
interpreter.auto_run = True
interpreter.force_task_completion = False
### MISC SETTINGS
interpreter.offline = True
interpreter.id = 206 # Used to identify itself to other interpreters. This should be changed programatically so it's unique.
### RESET conversations/user.json
script_dir = os.path.dirname(os.path.abspath(__file__))
user_json_path = os.path.join(script_dir, 'conversations', 'user.json')
with open(user_json_path, 'w') as file:
json.dump([], file)
### CONNECT TO /run
class Python:
"""
This class contains all requirements for being a custom language in Open Interpreter:
- name (an attribute)
- run (a method)
- stop (a method)
- terminate (a method)
"""
# This is the name that will appear to the LLM.
name = "python"
def __init__(self):
self.halt = False
def run(self, code):
"""Generator that yields a dictionary in LMC Format."""
# Prepare the data
data = {"language": "python", "code": code}
# Send the data to the /run endpoint
computer_port = os.getenv('COMPUTER_PORT', '9000')
response = requests.post(f"http://localhost:{computer_port}/run", json=data, stream=True)
# Stream the response
for chunk in response.iter_content(chunk_size=100000000):
if self.halt:
self.halt = False
break
if chunk: # filter out keep-alive new lines
yield json.loads(chunk.decode())
def stop(self):
self.halt = True
def terminate(self):
"""Terminates the entire process."""
# dramatic!!
pass
interpreter.computer.languages = [Python]
### SKILLS
script_dir = os.path.dirname(os.path.abspath(__file__))
skills_dir = os.path.join(script_dir, 'skills')
for file in glob.glob(os.path.join(skills_dir, '*.py')):
with open(file, 'r') as f:
for chunk in interpreter.computer.run("python", f.read()):
print(chunk)
### RETURN INTERPRETER
return interpreter

@ -0,0 +1 @@
[{"role": "user", "type": "message", "content": "\ub2e4\uc74c \uc601\uc0c1\uc5d0\uc11c \ub9cc\ub098\uc694!\n"}]

@ -13,6 +13,11 @@ import ast
from pydub import AudioSegment
from pydub.playback import play
import io
import wave
import tempfile
from datetime import datetime
from utils.check_filtered_kernel import check_filtered_kernel
from interpreter import interpreter # Just for code execution. Maybe we should let people do from interpreter.computer import run?
# Configuration for Audio Recording
CHUNK = 1024 # Record in chunks of 1024 samples
@ -23,18 +28,13 @@ RECORDING = False # Flag to control recording state
SPACEBAR_PRESSED = False # Flag to track spacebar press state
# Configuration for WebSocket
PORT = os.getenv('ASSISTANT_PORT', '8000')
WS_URL = f"ws://localhost:{PORT}/user"
WS_URL = os.getenv('SERVER_URL')
if not WS_URL:
raise ValueError("The environment variable SERVER_URL is not set. Please set it to proceed.")
# Initialize PyAudio
p = pyaudio.PyAudio()
import wave
import tempfile
from datetime import datetime
def record_audio():
"""Record audio from the microphone and add it to the queue."""
stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
@ -110,8 +110,15 @@ async def websocket_communication(WS_URL):
print("Press the spacebar to start/stop recording. Press ESC to exit.")
asyncio.create_task(message_sender(websocket))
message_so_far = {"role": None, "type": None, "format": None, "content": None}
async for message in websocket:
print(message)
if "content" in message_so_far:
if any(message_so_far[key] != message[key] for key in message_so_far):
message_so_far = message
else:
message_so_far["content"] += message
if message["type"] == "audio" and "content" in message:
audio_bytes = bytes(ast.literal_eval(message["content"]))
@ -124,8 +131,18 @@ async def websocket_communication(WS_URL):
play(audio)
await asyncio.sleep(1)
# Run the code if that's the device's job
if os.getenv('CODE_RUNNER') == "device":
if message["type"] == "code" and "end" in message:
language = message_so_far["format"]
code = message_so_far["content"]
result = interpreter.computer.run(language, code)
send_queue.put(result)
except:
print("Connecting...")
print(f"Connecting to `{WS_URL}`...")
await asyncio.sleep(2)
def main():

@ -0,0 +1,82 @@
import os
import glob
import json
import requests
from pathlib import Path
def configure_interpreter(interpreter):
### SYSTEM MESSAGE
# The system message is where most of the 01's behavior is configured.
# You can put code into the system message {{ in brackets like this }} which will be rendered just before the interpreter starts writing a message.
system_message = """
You are an executive assistant AI that helps the user manage their tasks. You can run Python code.
Store the user's tasks in a Python list called `tasks`.
---
The user's current task is: {{ tasks[0] if tasks else "No current tasks." }}
{{
if len(tasks) > 1:
print("The next task is: ", tasks[1])
}}
---
When the user completes the current task, you should remove it from the list and read the next item by running `tasks = tasks[1:]\ntasks[0]`. Then, tell the user what the next task is.
When the user tells you about a set of tasks, you should intelligently order tasks, batch similar tasks, and break down large tasks into smaller tasks (for this, you should consult the user and get their permission to break it down). Your goal is to manage the task list as intelligently as possible, to make the user as efficient and non-overwhelmed as possible. They will require a lot of encouragement, support, and kindness. Don't say too much about what's ahead of them just try to focus them on each step at a time.
After starting a task, you should check in with the user around the estimated completion time to see if the task is completed. Use the `schedule(datetime, message)` function, which has already been imported.
To do this, schedule a reminder based on estimated completion time using the function `schedule(datetime_object, "Your message here.")`, WHICH HAS ALREADY BEEN IMPORTED. YOU DON'T NEED TO IMPORT THE `schedule` FUNCTION. IT IS AVALIABLE. You'll recieve the message at `datetime_object`.
You guide the user through the list one task at a time, convincing them to move forward, giving a pep talk if need be. Your job is essentially to answer "what should I (the user) be doing right now?" for every moment of the day.
Remember: You can run Python code. Be very concise. Ensure that you actually run code every time! THIS IS IMPORTANT. You NEED to write code. **Help the user by being very concise in your answers.** Do not break down tasks excessively, just into simple, few minute steps. Don't assume the user lives their life in a certain way— pick very general tasks if you're breaking a task down.
""".strip()
interpreter.custom_instructions = system_message
### LLM SETTINGS
# Local settings
# interpreter.llm.model = "local"
# interpreter.llm.api_base = "https://localhost:8080/v1" # Llamafile default
# interpreter.llm.max_tokens = 1000
# interpreter.llm.context_window = 3000
# Hosted settings
interpreter.llm.api_key = os.getenv('OPENAI_API_KEY')
interpreter.llm.model = "gpt-4"
interpreter.auto_run = True
interpreter.force_task_completion = False
### MISC SETTINGS
interpreter.offline = True
interpreter.id = 206 # Used to identify itself to other interpreters. This should be changed programatically so it's unique.
### RESET conversations/user.json
script_dir = os.path.dirname(os.path.abspath(__file__))
user_json_path = os.path.join(script_dir, 'conversations', 'user.json')
with open(user_json_path, 'w') as file:
json.dump([], file)
### SKILLS
skills_path = Path(__file__).parent / 'skills'
for file in glob.glob(os.path.join(skills_path, '*.py')):
with open(file, 'r') as f:
for chunk in interpreter.computer.run("python", f.read()):
print(chunk)
return interpreter

@ -1,26 +1,11 @@
"""
Responsible for setting up the language model, downloading it if necessary.
Ideally should pick the best LLM for the hardware.
Should this be a shell script?
"""
import os
import subprocess
from pathlib import Path
### LLM SETUP
# Define the path to the models directory
models_dir = "models/"
# Check and create the models directory if it doesn't exist
if not os.path.exists(models_dir):
os.makedirs(models_dir)
# Define the path to a llamafile
llamafile_path = os.path.join(models_dir, "phi-2.Q4_K_M.llamafile")
llamafile_path = Path(__file__).parent / 'model.llamafile'
# Check if the new llamafile exists, if not download it
if not os.path.exists(llamafile_path):
@ -36,5 +21,5 @@ if not os.path.exists(llamafile_path):
# Make the new llamafile executable
subprocess.run(["chmod", "+x", llamafile_path], check=True)
# Run the new llamafile in the background
subprocess.Popen([llamafile_path])
# Run the new llamafile
subprocess.run([str(llamafile_path)], check=True)

@ -1,6 +1,6 @@
git+https://github.com/KillianLucas/open-interpreter.git
asyncio
pyaudio
PyAudio
pynput
fastapi
uvicorn

@ -1,11 +1,3 @@
"""
Exposes a POST endpoint called /computer. Things from there go into the queue.
Exposes a ws endpoint called /user. Things from there go into the queue. We also send things in the queue back (that are role: assistant)
In a while loop we watch the queue and handle it.
"""
from starlette.websockets import WebSocketDisconnect
import ast
import json
@ -19,18 +11,18 @@ import re
from fastapi import FastAPI
from threading import Thread
from starlette.websockets import WebSocket
from create_interpreter import create_interpreter
from stt import stt
from tts import tts
from pathlib import Path
import asyncio
from i import configure_interpreter
import urllib.parse
from interpreter import interpreter
# Create interpreter
interpreter = create_interpreter()
app = FastAPI()
conversation_history_path = Path(__file__).parent / 'conversations' / 'user.json'
# This is so we only say() full sentences
def is_full_sentence(text):
return text.endswith(('.', '!', '?'))
@ -38,19 +30,59 @@ def is_full_sentence(text):
def split_into_sentences(text):
return re.split(r'(?<=[.!?])\s+', text)
app = FastAPI()
# Global queues
receive_queue = queue.Queue()
send_queue = queue.Queue()
recieve_computer_queue = queue.Queue() # Just for computer messages from the device
# Switch code executor to device if that's set
if os.getenv('CODE_RUNNER') == "device":
import asyncio
# (This should probably just loop through all languages and apply these changes instead)
class Python:
# This is the name that will appear to the LLM.
name = "python"
# Global queues
receive_queue = queue.Queue()
send_queue = queue.Queue()
def __init__(self):
self.halt = False
def run(self, code):
"""Generator that yields a dictionary in LMC Format."""
# Prepare the data
message = {"role": "assistant", "type": "code", "format": "python", "content": code}
# Unless it was just sent to the device, send it wrapped in flags
if not (interpreter.messages and interpreter.messages[-1] == message):
send_queue.put({"role": "assistant", "type": "code", "format": "python", "start": True})
send_queue.put(message)
send_queue.put({"role": "assistant", "type": "code", "format": "python", "end": True})
# Stream the response
print("Waiting for the device to respond...")
while True:
chunk = recieve_computer_queue.get()
print("Server recieved from device:", chunk)
if "end" in chunk:
break
yield chunk
def stop(self):
self.halt = True
def terminate(self):
"""Terminates the entire process."""
# dramatic!! do nothing
pass
@app.websocket("/user")
interpreter.computer.languages = [Python]
# Configure interpreter
interpreter = configure_interpreter(interpreter)
@app.websocket("/")
async def websocket_endpoint(websocket: WebSocket):
await websocket.accept()
receive_task = asyncio.create_task(receive_messages(websocket))
@ -60,6 +92,9 @@ async def websocket_endpoint(websocket: WebSocket):
async def receive_messages(websocket: WebSocket):
while True:
data = await websocket.receive_text()
if type(data) == dict and data["role"] == "computer":
recieve_computer_queue.put(data) # To be handled by interpreter.computer.run
else:
receive_queue.put(data)
async def send_messages(websocket: WebSocket):
@ -68,28 +103,6 @@ async def send_messages(websocket: WebSocket):
print(message)
await websocket.send_json(message)
@app.post("/computer")
async def read_computer(item: dict):
await asyncio.get_event_loop().run_in_executor(None, receive_queue.put, item)
def queue_listener():
audio_file = bytearray()
while True:
@ -173,4 +186,9 @@ queue_thread.start()
# Run the FastAPI app
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=int(os.getenv('ASSISTANT_PORT', 8000)))
server_url = os.getenv('SERVER_URL')
if not server_url:
raise ValueError("The environment variable SERVER_URL is not set. Please set it to proceed.")
parsed_url = urllib.parse.urlparse(server_url)
print("Starting `server.py`...")
uvicorn.run(app, host=parsed_url.hostname, port=parsed_url.port)

@ -1,51 +1,72 @@
### SETTINGS
export MODE_01=LIGHT
export ASSISTANT_PORT=8000
export COMPUTER_PORT=8001
# If ALL_LOCAL is False, we'll use OpenAI's services
export ALL_LOCAL=False
# export OPENAI_API_KEY=sk-...
# Kill whatever's on the ASSISTANT_PORT and COMPUTER_PORT
lsof -ti tcp:$ASSISTANT_PORT | xargs kill
lsof -ti tcp:$COMPUTER_PORT | xargs kill
# If SERVER_START, this is where we'll serve the server.
# If DEVICE_START, this is where the device expects the server to be.
export SERVER_URL=ws://localhost:8000/
export SERVER_START=True
export DEVICE_START=True
# Control where various operations happen— can be `device` or `server`.
export CODE_RUNNER=server
export TTS_RUNNER=device # If server, audio will be sent over websocket.
export STT_RUNNER=device # If server, audio will be sent over websocket.
# Will expose the server publically and display that URL.
export SERVER_EXPOSE_PUBLICALLY=False
### SETUP
# (for dev, reset the ports we were using)
PORT=$(echo $SERVER_URL | grep -oE "[0-9]+")
lsof -ti tcp:$PORT | xargs kill
PORT=$(echo $DEVICE_URL | grep -oE "[0-9]+")
lsof -ti tcp:$PORT | xargs kill
# Check the current Python version
PYTHON_VERSION=$(python -V 2>&1 | cut -d " " -f 2 | cut -d "." -f 1-2)
# If the Python version is not 3.10 or 3.11, switch to it using pyenv
if [[ "$PYTHON_VERSION" != "3.10" ]] && [[ "$PYTHON_VERSION" != "3.11" ]]; then
echo "Switching to Python 3.10 using pyenv..."
pyenv install 3.10.0
pyenv shell 3.10.0
fi
# INSTALL REQUIREMENTS
# (for dev, this is disabled for speed)
# if [[ "$OSTYPE" == "darwin"* ]]; then
# brew update
# brew install portaudio ffmpeg
# fi
# pip install -r requirements.txt
### COMPUTER
### START
# START KERNEL WATCHER
# DEVICE
python computer/kernel_watcher.py &
if [[ "$DEVICE_START" == "True" ]]; then
python device.py &
fi
# START RUN ENDPOINT
# SERVER
python computer/run.py &
if [[ "$SERVER_START" == "True" ]]; then
python server.py &
fi
# START SST AND TTS SERVICES
# TTS, STT
# (todo)
# (i think we should start with hosted services)
# START LLM
# LLM
# (disabled, we'll start with hosted services)
# python core/llm/start.py &
sleep 6
# START ASSISTANT
python assistant/assistant.py &
### USER
# START USER
python user/user.py &

@ -1,13 +1,5 @@
"""
Watches the kernel. When it sees something that passes a filter,
it sends POST request with that to /computer.
"""
import subprocess
import time
import requests
import platform
import os
def get_kernel_messages():
"""
@ -40,22 +32,15 @@ def custom_filter(message):
else:
return None
last_messages = ""
def main():
last_messages = ""
while True:
def check_filtered_kernel():
messages = get_kernel_messages()
messages.replace(last_messages, "")
messages = messages.split("\n")
messages_for_core = []
filtered_messages = []
for message in messages:
if custom_filter(message):
messages_for_core.append(message)
if messages_for_core:
port = os.getenv('ASSISTANT_PORT', 8000)
requests.post(f'http://localhost:{port}/computer', json = {'messages': messages_for_core})
time.sleep(5)
if __name__ == "__main__":
main()
filtered_messages.append(message)
return filtered_messages

@ -0,0 +1,28 @@
"""
Exposes a SSE streaming server endpoint at /run, which recieves language and code,
and streams the output.
"""
import os
import json
from interpreter import interpreter
import uvicorn
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
class Code(BaseModel):
language: str
code: str
app = FastAPI()
@app.post("/run")
async def run_code(code: Code):
def generator():
for chunk in interpreter.computer.run(code.language, code.code):
yield json.dumps(chunk)
return StreamingResponse(generator())
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=int(os.getenv('COMPUTER_PORT', 9000)))

@ -1,3 +1,32 @@
# New: The 8th Architecture
```
/01
start.sh # entrypoint, runs server, device, llm
server.py # uses tts and stt if it must, exposes "/"
device.py # also uses tts and stt, hits "/"
llm.py # starts an openai-compatible server
model.llamafile
i.py # creates an interpreter which server just imports
tts.py
stt.py
/conversations
user.json
/skills # files in here will run in the 01's interpreter
schedule.py
...
```
This is flatter and simpler.
**Device** handles the device — i.e. everything the user interacts + watching the kernel + running code (which produces `computer` LMC messages) if `DEVICE_EXECUTE_CODE` is true. Runs TTS and STT, sends LMC messages to "/".
**Server** serves "/", a websocket that accepts `user` LMC messages and sends back `assistant` LMC messages. Runs code (which produces `computer` LMC messages) if `SERVER_EXECUTE_CODE` is true.
**Llm** starts an OpenAI-compatible server with `model.llamafile`. Downloads a heavily quantized Phi-2 if `model.llamafile` doesn't exist.
**I** creates an `interpreter` object. This is where you configure the 01's behavior.
# What is this?
This is the operating system that powers the 01.

Loading…
Cancel
Save