parent
2e5c3f59f1
commit
f67d64c091
@ -0,0 +1,7 @@
|
||||
{
|
||||
"python.testing.pytestArgs": [
|
||||
"_01OS"
|
||||
],
|
||||
"python.testing.unittestEnabled": false,
|
||||
"python.testing.pytestEnabled": true
|
||||
}
|
@ -1,4 +0,0 @@
|
||||
_archive
|
||||
__pycache__
|
||||
.idea
|
||||
|
@ -1,9 +0,0 @@
|
||||
The open-source language model computer.
|
||||
|
||||
```bash
|
||||
pip install 01OS
|
||||
```
|
||||
|
||||
```bash
|
||||
01 # Runs the 01 server and client
|
||||
```
|
@ -1,351 +0,0 @@
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv() # take environment variables from .env.
|
||||
|
||||
import os
|
||||
import asyncio
|
||||
import threading
|
||||
import os
|
||||
import pyaudio
|
||||
from starlette.websockets import WebSocket
|
||||
from queue import Queue
|
||||
from pynput import keyboard
|
||||
import json
|
||||
import traceback
|
||||
import websockets
|
||||
import queue
|
||||
import pydub
|
||||
import ast
|
||||
from pydub import AudioSegment
|
||||
from pydub.playback import play
|
||||
import io
|
||||
import time
|
||||
import wave
|
||||
import tempfile
|
||||
from datetime import datetime
|
||||
import cv2
|
||||
import base64
|
||||
from interpreter import interpreter # Just for code execution. Maybe we should let people do from interpreter.computer import run?
|
||||
# In the future, I guess kernel watching code should be elsewhere? Somewhere server / client agnostic?
|
||||
from ..server.utils.kernel import put_kernel_messages_into_queue
|
||||
from ..server.utils.get_system_info import get_system_info
|
||||
from ..server.utils.process_utils import kill_process_tree
|
||||
|
||||
from ..server.utils.logs import setup_logging
|
||||
from ..server.utils.logs import logger
|
||||
setup_logging()
|
||||
|
||||
os.environ["STT_RUNNER"] = "server"
|
||||
os.environ["TTS_RUNNER"] = "server"
|
||||
|
||||
from ..utils.accumulator import Accumulator
|
||||
|
||||
accumulator = Accumulator()
|
||||
|
||||
# Configuration for Audio Recording
|
||||
CHUNK = 1024 # Record in chunks of 1024 samples
|
||||
FORMAT = pyaudio.paInt16 # 16 bits per sample
|
||||
CHANNELS = 1 # Mono
|
||||
RATE = 44100 # Sample rate
|
||||
RECORDING = False # Flag to control recording state
|
||||
SPACEBAR_PRESSED = False # Flag to track spacebar press state
|
||||
|
||||
# Camera configuration
|
||||
CAMERA_ENABLED = os.getenv('CAMERA_ENABLED', False)
|
||||
if type(CAMERA_ENABLED) == str:
|
||||
CAMERA_ENABLED = (CAMERA_ENABLED.lower() == "true")
|
||||
CAMERA_DEVICE_INDEX = int(os.getenv('CAMERA_DEVICE_INDEX', 0))
|
||||
CAMERA_WARMUP_SECONDS = float(os.getenv('CAMERA_WARMUP_SECONDS', 0))
|
||||
|
||||
# Specify OS
|
||||
current_platform = get_system_info()
|
||||
|
||||
# Initialize PyAudio
|
||||
p = pyaudio.PyAudio()
|
||||
|
||||
send_queue = queue.Queue()
|
||||
|
||||
class Device:
|
||||
def __init__(self):
|
||||
self.pressed_keys = set()
|
||||
self.captured_images = []
|
||||
self.audiosegments = []
|
||||
self.server_url = ""
|
||||
|
||||
def fetch_image_from_camera(self, camera_index=CAMERA_DEVICE_INDEX):
|
||||
"""Captures an image from the specified camera device and saves it to a temporary file. Adds the image to the captured_images list."""
|
||||
image_path = None
|
||||
|
||||
cap = cv2.VideoCapture(camera_index)
|
||||
ret, frame = cap.read() # Capture a single frame to initialize the camera
|
||||
|
||||
if CAMERA_WARMUP_SECONDS > 0:
|
||||
# Allow camera to warm up, then snap a picture again
|
||||
# This is a workaround for some cameras that don't return a properly exposed
|
||||
# picture immediately when they are first turned on
|
||||
time.sleep(CAMERA_WARMUP_SECONDS)
|
||||
ret, frame = cap.read()
|
||||
|
||||
if ret:
|
||||
temp_dir = tempfile.gettempdir()
|
||||
image_path = os.path.join(temp_dir, f"01_photo_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.png")
|
||||
self.captured_images.append(image_path)
|
||||
cv2.imwrite(image_path, frame)
|
||||
logger.info(f"Camera image captured to {image_path}")
|
||||
logger.info(f"You now have {len(self.captured_images)} images which will be sent along with your next audio message.")
|
||||
else:
|
||||
logger.error(f"Error: Couldn't capture an image from camera ({camera_index})")
|
||||
|
||||
cap.release()
|
||||
|
||||
return image_path
|
||||
|
||||
|
||||
def encode_image_to_base64(self, image_path):
|
||||
"""Encodes an image file to a base64 string."""
|
||||
with open(image_path, "rb") as image_file:
|
||||
return base64.b64encode(image_file.read()).decode('utf-8')
|
||||
|
||||
def add_image_to_send_queue(self, image_path):
|
||||
"""Encodes an image and adds an LMC message to the send queue with the image data."""
|
||||
base64_image = self.encode_image_to_base64(image_path)
|
||||
image_message = {
|
||||
"role": "user",
|
||||
"type": "image",
|
||||
"format": "base64.png",
|
||||
"content": base64_image
|
||||
}
|
||||
send_queue.put(image_message)
|
||||
# Delete the image file from the file system after sending it
|
||||
os.remove(image_path)
|
||||
|
||||
def queue_all_captured_images(self):
|
||||
"""Queues all captured images to be sent."""
|
||||
for image_path in self.captured_images:
|
||||
self.add_image_to_send_queue(image_path)
|
||||
self.captured_images.clear() # Clear the list after sending
|
||||
|
||||
|
||||
async def play_audiosegments(self):
|
||||
"""Plays them sequentially."""
|
||||
while True:
|
||||
try:
|
||||
for audio in self.audiosegments:
|
||||
play(audio)
|
||||
self.audiosegments.remove(audio)
|
||||
await asyncio.sleep(0.1)
|
||||
except asyncio.exceptions.CancelledError:
|
||||
# This happens once at the start?
|
||||
pass
|
||||
except:
|
||||
logger.info(traceback.format_exc())
|
||||
|
||||
|
||||
def record_audio(self):
|
||||
|
||||
if os.getenv('STT_RUNNER') == "server":
|
||||
# STT will happen on the server. we're sending audio.
|
||||
send_queue.put({"role": "user", "type": "audio", "format": "bytes.wav", "start": True})
|
||||
elif os.getenv('STT_RUNNER') == "client":
|
||||
# STT will happen here, on the client. we're sending text.
|
||||
send_queue.put({"role": "user", "type": "message", "start": True})
|
||||
else:
|
||||
raise Exception("STT_RUNNER must be set to either 'client' or 'server'.")
|
||||
|
||||
"""Record audio from the microphone and add it to the queue."""
|
||||
stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
|
||||
logger.info("Recording started...")
|
||||
global RECORDING
|
||||
|
||||
# Create a temporary WAV file to store the audio data
|
||||
temp_dir = tempfile.gettempdir()
|
||||
wav_path = os.path.join(temp_dir, f"audio_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
|
||||
wav_file = wave.open(wav_path, 'wb')
|
||||
wav_file.setnchannels(CHANNELS)
|
||||
wav_file.setsampwidth(p.get_sample_size(FORMAT))
|
||||
wav_file.setframerate(RATE)
|
||||
|
||||
while RECORDING:
|
||||
data = stream.read(CHUNK, exception_on_overflow=False)
|
||||
wav_file.writeframes(data)
|
||||
|
||||
wav_file.close()
|
||||
stream.stop_stream()
|
||||
stream.close()
|
||||
logger.info("Recording stopped.")
|
||||
|
||||
duration = wav_file.getnframes() / RATE
|
||||
if duration < 0.3:
|
||||
# Just pressed it. Send stop message
|
||||
if os.getenv('STT_RUNNER') == "client":
|
||||
send_queue.put({"role": "user", "type": "message", "content": "stop"})
|
||||
send_queue.put({"role": "user", "type": "message", "end": True})
|
||||
else:
|
||||
send_queue.put({"role": "user", "type": "audio", "format": "bytes.wav", "content": ""})
|
||||
send_queue.put({"role": "user", "type": "audio", "format": "bytes.wav", "end": True})
|
||||
else:
|
||||
self.queue_all_captured_images()
|
||||
|
||||
if os.getenv('STT_RUNNER') == "client":
|
||||
|
||||
# THIS DOES NOT WORK. We moved to this very cool stt_service, llm_service
|
||||
# way of doing things. stt_wav is not a thing anymore. Needs work to work
|
||||
|
||||
# Run stt then send text
|
||||
text = stt_wav(wav_path)
|
||||
logger.debug(f"STT result: {text}")
|
||||
send_queue.put({"role": "user", "type": "message", "content": text})
|
||||
send_queue.put({"role": "user", "type": "message", "end": True})
|
||||
else:
|
||||
# Stream audio
|
||||
with open(wav_path, 'rb') as audio_file:
|
||||
byte_data = audio_file.read(CHUNK)
|
||||
while byte_data:
|
||||
send_queue.put(byte_data)
|
||||
byte_data = audio_file.read(CHUNK)
|
||||
send_queue.put({"role": "user", "type": "audio", "format": "bytes.wav", "end": True})
|
||||
|
||||
if os.path.exists(wav_path):
|
||||
os.remove(wav_path)
|
||||
|
||||
def toggle_recording(self, state):
|
||||
"""Toggle the recording state."""
|
||||
global RECORDING, SPACEBAR_PRESSED
|
||||
if state and not SPACEBAR_PRESSED:
|
||||
SPACEBAR_PRESSED = True
|
||||
if not RECORDING:
|
||||
RECORDING = True
|
||||
threading.Thread(target=self.record_audio).start()
|
||||
elif not state and SPACEBAR_PRESSED:
|
||||
SPACEBAR_PRESSED = False
|
||||
RECORDING = False
|
||||
|
||||
def on_press(self, key):
|
||||
"""Detect spacebar press and Ctrl+C combination."""
|
||||
self.pressed_keys.add(key) # Add the pressed key to the set
|
||||
|
||||
if keyboard.Key.space in self.pressed_keys:
|
||||
self.toggle_recording(True)
|
||||
elif {keyboard.Key.ctrl, keyboard.KeyCode.from_char('c')} <= self.pressed_keys:
|
||||
logger.info("Ctrl+C pressed. Exiting...")
|
||||
kill_process_tree()
|
||||
os._exit(0)
|
||||
|
||||
def on_release(self, key):
|
||||
"""Detect spacebar release and 'c' key press for camera, and handle key release."""
|
||||
self.pressed_keys.discard(key) # Remove the released key from the key press tracking set
|
||||
|
||||
if key == keyboard.Key.space:
|
||||
self.toggle_recording(False)
|
||||
elif CAMERA_ENABLED and key == keyboard.KeyCode.from_char('c'):
|
||||
self.fetch_image_from_camera()
|
||||
|
||||
|
||||
async def message_sender(self, websocket):
|
||||
while True:
|
||||
message = await asyncio.get_event_loop().run_in_executor(None, send_queue.get)
|
||||
if isinstance(message, bytes):
|
||||
await websocket.send(message)
|
||||
else:
|
||||
await websocket.send(json.dumps(message))
|
||||
send_queue.task_done()
|
||||
await asyncio.sleep(0.01)
|
||||
|
||||
async def websocket_communication(self, WS_URL):
|
||||
while True:
|
||||
try:
|
||||
async with websockets.connect(WS_URL) as websocket:
|
||||
if CAMERA_ENABLED:
|
||||
logger.info("Press the spacebar to start/stop recording. Press 'c' to capture an image from the camera. Press CTRL-C to exit.")
|
||||
else:
|
||||
logger.info("Press the spacebar to start/stop recording. Press CTRL-C to exit.")
|
||||
|
||||
asyncio.create_task(self.message_sender(websocket))
|
||||
|
||||
while True:
|
||||
await asyncio.sleep(0.01)
|
||||
chunk = await websocket.recv()
|
||||
|
||||
logger.debug(f"Got this message from the server: {type(chunk)} {chunk}")
|
||||
|
||||
if type(chunk) == str:
|
||||
chunk = json.loads(chunk)
|
||||
|
||||
message = accumulator.accumulate(chunk)
|
||||
if message == None:
|
||||
# Will be None until we have a full message ready
|
||||
continue
|
||||
|
||||
# At this point, we have our message
|
||||
|
||||
if message["type"] == "audio" and message["format"].startswith("bytes"):
|
||||
|
||||
# Convert bytes to audio file
|
||||
|
||||
audio_bytes = message["content"]
|
||||
|
||||
# Create an AudioSegment instance with the raw data
|
||||
audio = AudioSegment(
|
||||
# raw audio data (bytes)
|
||||
data=audio_bytes,
|
||||
# signed 16-bit little-endian format
|
||||
sample_width=2,
|
||||
# 16,000 Hz frame rate
|
||||
frame_rate=16000,
|
||||
# mono sound
|
||||
channels=1
|
||||
)
|
||||
|
||||
self.audiosegments.append(audio)
|
||||
|
||||
# Run the code if that's the client's job
|
||||
if os.getenv('CODE_RUNNER') == "client":
|
||||
if message["type"] == "code" and "end" in message:
|
||||
language = message["format"]
|
||||
code = message["content"]
|
||||
result = interpreter.computer.run(language, code)
|
||||
send_queue.put(result)
|
||||
except:
|
||||
logger.debug(traceback.format_exc())
|
||||
logger.info(f"Connecting to `{WS_URL}`...")
|
||||
await asyncio.sleep(2)
|
||||
|
||||
async def start_async(self):
|
||||
# Configuration for WebSocket
|
||||
WS_URL = f"ws://{self.server_url}"
|
||||
# Start the WebSocket communication
|
||||
asyncio.create_task(self.websocket_communication(WS_URL))
|
||||
|
||||
# Start watching the kernel if it's your job to do that
|
||||
if os.getenv('CODE_RUNNER') == "client":
|
||||
asyncio.create_task(put_kernel_messages_into_queue(send_queue))
|
||||
|
||||
asyncio.create_task(self.play_audiosegments())
|
||||
|
||||
# If Raspberry Pi, add the button listener, otherwise use the spacebar
|
||||
if current_platform.startswith("raspberry-pi"):
|
||||
logger.info("Raspberry Pi detected, using button on GPIO pin 15")
|
||||
# Use GPIO pin 15
|
||||
pindef = ["gpiochip4", "15"] # gpiofind PIN15
|
||||
print("PINDEF", pindef)
|
||||
|
||||
# HACK: needs passwordless sudo
|
||||
process = await asyncio.create_subprocess_exec("sudo", "gpiomon", "-brf", *pindef, stdout=asyncio.subprocess.PIPE)
|
||||
while True:
|
||||
line = await process.stdout.readline()
|
||||
if line:
|
||||
line = line.decode().strip()
|
||||
if "FALLING" in line:
|
||||
self.toggle_recording(False)
|
||||
elif "RISING" in line:
|
||||
self.toggle_recording(True)
|
||||
else:
|
||||
break
|
||||
else:
|
||||
# Keyboard listener for spacebar press/release
|
||||
listener = keyboard.Listener(on_press=self.on_press, on_release=self.on_release)
|
||||
listener.start()
|
||||
|
||||
def start(self):
|
||||
if os.getenv('TEACH_MODE') != "True":
|
||||
asyncio.run(self.start_async())
|
||||
p.terminate()
|
@ -1,11 +0,0 @@
|
||||
# ESP32 Playback
|
||||
|
||||
To set up audio recording + playback on the ESP32 (M5 Atom), do the following:
|
||||
|
||||
1. Open Arduino IDE, and open the `client/client.ino` file
|
||||
2. Go to Tools -> Board -> Boards Manager, search "esp32", then install the boards by Arduino and Espressif
|
||||
3. Go to Tools -> Manage Libraries, then install the following:
|
||||
- M5Atom by M5Stack
|
||||
- WebSockets by Markus Sattler
|
||||
4. The board needs to connect to WiFi. Once you flash, connect to ESP32 wifi "captive" which will get wifi details. Once it connects, it will ask you to enter 01OS server address in the format "domain.com:port" or "ip:port". Once its able to connect you can use the device.
|
||||
5. To flash the .ino to the board, connect the board to the USB port, select the port from the dropdown on the IDE, then select the M5Atom board (or M5Stack-ATOM if you have that). Click on upload to flash the board.
|
@ -1,47 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
"""A basic echo server for testing the device."""
|
||||
|
||||
import asyncio
|
||||
import uuid
|
||||
import websockets
|
||||
from websockets.server import serve
|
||||
import traceback
|
||||
|
||||
|
||||
def divide_chunks(l, n):
|
||||
# looping till length l
|
||||
for i in range(0, len(l), n):
|
||||
yield l[i : i + n]
|
||||
|
||||
|
||||
buffers: dict[uuid.UUID, bytearray] = {}
|
||||
|
||||
|
||||
async def echo(websocket: websockets.WebSocketServerProtocol):
|
||||
async for message in websocket:
|
||||
try:
|
||||
if message == "s":
|
||||
print("starting stream for", websocket.id)
|
||||
buffers[websocket.id] = bytearray()
|
||||
elif message == "e":
|
||||
print("end, echoing stream for", websocket.id)
|
||||
await websocket.send("s")
|
||||
for chunk in divide_chunks(buffers[websocket.id], 1000):
|
||||
await websocket.send(chunk)
|
||||
await websocket.send("e")
|
||||
elif type(message) is bytes:
|
||||
print("recvd", len(message), "bytes from", websocket.id)
|
||||
buffers[websocket.id].extend(message)
|
||||
else:
|
||||
print("ERR: recvd unknown message", message[:10], "from", websocket.id)
|
||||
except Exception as _e:
|
||||
traceback.print_exc()
|
||||
|
||||
|
||||
async def main():
|
||||
async with serve(echo, "0.0.0.0", 9001):
|
||||
await asyncio.Future() # run forever
|
||||
|
||||
|
||||
asyncio.run(main())
|
@ -1,10 +0,0 @@
|
||||
from ..base_device import Device
|
||||
|
||||
device = Device()
|
||||
|
||||
def main(server_url):
|
||||
device.server_url = server_url
|
||||
device.start()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -1,10 +0,0 @@
|
||||
from ..base_device import Device
|
||||
|
||||
device = Device()
|
||||
|
||||
def main(server_url):
|
||||
device.server_url = server_url
|
||||
device.start()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -1,9 +0,0 @@
|
||||
from ..base_device import Device
|
||||
|
||||
device = Device()
|
||||
|
||||
def main():
|
||||
device.start()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -1,10 +0,0 @@
|
||||
import os
|
||||
import sys
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
from .server import app
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def client():
|
||||
return TestClient(app)
|
@ -1,58 +0,0 @@
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv() # take environment variables from .env.
|
||||
|
||||
from platformdirs import user_data_dir
|
||||
import os
|
||||
import glob
|
||||
import json
|
||||
from pathlib import Path
|
||||
from interpreter import OpenInterpreter
|
||||
from .system_messages.BaseSystemMessage import system_message
|
||||
|
||||
|
||||
def configure_interpreter(interpreter: OpenInterpreter):
|
||||
|
||||
### SYSTEM MESSAGE
|
||||
interpreter.system_message = system_message
|
||||
|
||||
### LLM SETTINGS
|
||||
|
||||
# Local settings
|
||||
# interpreter.llm.model = "local"
|
||||
# interpreter.llm.api_base = "https://localhost:8080/v1" # Llamafile default
|
||||
# interpreter.llm.max_tokens = 1000
|
||||
# interpreter.llm.context_window = 3000
|
||||
|
||||
# Hosted settings
|
||||
interpreter.llm.api_key = os.getenv('OPENAI_API_KEY')
|
||||
interpreter.llm.model = "gpt-4"
|
||||
|
||||
### MISC SETTINGS
|
||||
|
||||
interpreter.auto_run = True
|
||||
interpreter.computer.languages = [l for l in interpreter.computer.languages if l.name.lower() in ["applescript", "shell", "zsh", "bash", "python"]]
|
||||
interpreter.force_task_completion = False
|
||||
interpreter.offline = True
|
||||
interpreter.id = 206 # Used to identify itself to other interpreters. This should be changed programatically so it's unique.
|
||||
|
||||
### RESET conversations/user.json
|
||||
|
||||
|
||||
app_dir = user_data_dir('01')
|
||||
conversations_dir = os.path.join(app_dir, 'conversations')
|
||||
os.makedirs(conversations_dir, exist_ok=True)
|
||||
user_json_path = os.path.join(conversations_dir, 'user.json')
|
||||
with open(user_json_path, 'w') as file:
|
||||
json.dump([], file)
|
||||
|
||||
### SKILLS
|
||||
skills_dir = user_data_dir('01', 'skills')
|
||||
interpreter.computer.skills.path = skills_dir
|
||||
interpreter.computer.skills.import_skills()
|
||||
|
||||
interpreter.computer.run("python", "tasks=[]")
|
||||
|
||||
interpreter.computer.api_base = "https://oi-video-frame.vercel.app/"
|
||||
interpreter.computer.run("python","print('test')")
|
||||
|
||||
return interpreter
|
@ -1,28 +0,0 @@
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv() # take environment variables from .env.
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
### LLM SETUP
|
||||
|
||||
# Define the path to a llamafile
|
||||
llamafile_path = Path(__file__).parent / 'model.llamafile'
|
||||
|
||||
# Check if the new llamafile exists, if not download it
|
||||
if not os.path.exists(llamafile_path):
|
||||
subprocess.run(
|
||||
[
|
||||
"wget",
|
||||
"-O",
|
||||
llamafile_path,
|
||||
"https://huggingface.co/jartine/phi-2-llamafile/resolve/main/phi-2.Q4_K_M.llamafile",
|
||||
],
|
||||
check=True,
|
||||
)
|
||||
# Make the new llamafile executable
|
||||
subprocess.run(["chmod", "+x", llamafile_path], check=True)
|
||||
|
||||
# Run the new llamafile
|
||||
subprocess.run([str(llamafile_path)], check=True)
|
@ -1,453 +0,0 @@
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv() # take environment variables from .env.
|
||||
|
||||
from platformdirs import user_data_dir
|
||||
import ast
|
||||
import json
|
||||
import queue
|
||||
import os
|
||||
import traceback
|
||||
from .utils.bytes_to_wav import bytes_to_wav
|
||||
import re
|
||||
from fastapi import FastAPI, Request
|
||||
from fastapi.responses import PlainTextResponse
|
||||
from starlette.websockets import WebSocket, WebSocketDisconnect
|
||||
from pathlib import Path
|
||||
import asyncio
|
||||
import urllib.parse
|
||||
from .utils.kernel import put_kernel_messages_into_queue
|
||||
from .i import configure_interpreter
|
||||
from interpreter import interpreter
|
||||
from ..utils.accumulator import Accumulator
|
||||
from .utils.logs import setup_logging
|
||||
from .utils.logs import logger
|
||||
|
||||
from ..utils.print_markdown import print_markdown
|
||||
|
||||
markdown = """
|
||||
○
|
||||
|
||||
*Starting...*
|
||||
"""
|
||||
print("")
|
||||
print_markdown(markdown)
|
||||
print("")
|
||||
|
||||
|
||||
setup_logging()
|
||||
|
||||
accumulator = Accumulator()
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
app_dir = user_data_dir('01')
|
||||
conversation_history_path = os.path.join(app_dir, 'conversations', 'user.json')
|
||||
|
||||
SERVER_LOCAL_PORT = int(os.getenv('SERVER_LOCAL_PORT', 8000))
|
||||
|
||||
|
||||
# This is so we only say() full sentences
|
||||
def is_full_sentence(text):
|
||||
return text.endswith(('.', '!', '?'))
|
||||
|
||||
def split_into_sentences(text):
|
||||
return re.split(r'(?<=[.!?])\s+', text)
|
||||
|
||||
# Queues
|
||||
from_computer = queue.Queue() # Just for computer messages from the device. Sync queue because interpreter.run is synchronous
|
||||
from_user = asyncio.Queue() # Just for user messages from the device.
|
||||
to_device = asyncio.Queue() # For messages we send.
|
||||
|
||||
# Switch code executor to device if that's set
|
||||
|
||||
if os.getenv('CODE_RUNNER') == "device":
|
||||
|
||||
# (This should probably just loop through all languages and apply these changes instead)
|
||||
|
||||
class Python:
|
||||
# This is the name that will appear to the LLM.
|
||||
name = "python"
|
||||
|
||||
def __init__(self):
|
||||
self.halt = False
|
||||
|
||||
def run(self, code):
|
||||
"""Generator that yields a dictionary in LMC Format."""
|
||||
|
||||
# Prepare the data
|
||||
message = {"role": "assistant", "type": "code", "format": "python", "content": code}
|
||||
|
||||
# Unless it was just sent to the device, send it wrapped in flags
|
||||
if not (interpreter.messages and interpreter.messages[-1] == message):
|
||||
to_device.put({"role": "assistant", "type": "code", "format": "python", "start": True})
|
||||
to_device.put(message)
|
||||
to_device.put({"role": "assistant", "type": "code", "format": "python", "end": True})
|
||||
|
||||
# Stream the response
|
||||
logger.info("Waiting for the device to respond...")
|
||||
while True:
|
||||
chunk = from_computer.get()
|
||||
logger.info(f"Server received from device: {chunk}")
|
||||
if "end" in chunk:
|
||||
break
|
||||
yield chunk
|
||||
|
||||
def stop(self):
|
||||
self.halt = True
|
||||
|
||||
def terminate(self):
|
||||
"""Terminates the entire process."""
|
||||
# dramatic!! do nothing
|
||||
pass
|
||||
|
||||
interpreter.computer.languages = [Python]
|
||||
|
||||
# Configure interpreter
|
||||
interpreter = configure_interpreter(interpreter)
|
||||
|
||||
@app.get("/ping")
|
||||
async def ping():
|
||||
return PlainTextResponse("pong")
|
||||
|
||||
@app.websocket("/")
|
||||
async def websocket_endpoint(websocket: WebSocket):
|
||||
await websocket.accept()
|
||||
receive_task = asyncio.create_task(receive_messages(websocket))
|
||||
send_task = asyncio.create_task(send_messages(websocket))
|
||||
try:
|
||||
await asyncio.gather(receive_task, send_task)
|
||||
except Exception as e:
|
||||
logger.debug(traceback.format_exc())
|
||||
logger.info(f"Connection lost. Error: {e}")
|
||||
|
||||
|
||||
@app.post("/")
|
||||
async def add_computer_message(request: Request):
|
||||
body = await request.json()
|
||||
text = body.get("text")
|
||||
if not text:
|
||||
return {"error": "Missing 'text' in request body"}, 422
|
||||
message = {"role": "computer", "type": "console", "format": "output", "content": text}
|
||||
from_computer.put({"role": "computer", "type": "console", "format": "output", "start": True})
|
||||
from_computer.put(message)
|
||||
from_computer.put({"role": "computer", "type": "console", "format": "output", "end": True})
|
||||
|
||||
|
||||
async def receive_messages(websocket: WebSocket):
|
||||
while True:
|
||||
try:
|
||||
try:
|
||||
data = await websocket.receive()
|
||||
except Exception as e:
|
||||
print(str(e))
|
||||
return
|
||||
if 'text' in data:
|
||||
try:
|
||||
data = json.loads(data['text'])
|
||||
if data["role"] == "computer":
|
||||
from_computer.put(data) # To be handled by interpreter.computer.run
|
||||
elif data["role"] == "user":
|
||||
await from_user.put(data)
|
||||
else:
|
||||
raise("Unknown role:", data)
|
||||
except json.JSONDecodeError:
|
||||
pass # data is not JSON, leave it as is
|
||||
elif 'bytes' in data:
|
||||
data = data['bytes'] # binary data
|
||||
await from_user.put(data)
|
||||
except WebSocketDisconnect as e:
|
||||
if e.code == 1000:
|
||||
logger.info("Websocket connection closed normally.")
|
||||
return
|
||||
else:
|
||||
raise
|
||||
|
||||
|
||||
async def send_messages(websocket: WebSocket):
|
||||
while True:
|
||||
message = await to_device.get()
|
||||
logger.debug(f"Sending to the device: {type(message)} {message}")
|
||||
|
||||
try:
|
||||
if isinstance(message, dict):
|
||||
await websocket.send_json(message)
|
||||
elif isinstance(message, bytes):
|
||||
await websocket.send_bytes(message)
|
||||
else:
|
||||
raise TypeError("Message must be a dict or bytes")
|
||||
except:
|
||||
# Make sure to put the message back in the queue if you failed to send it
|
||||
await to_device.put(message)
|
||||
raise
|
||||
|
||||
async def listener():
|
||||
|
||||
while True:
|
||||
while True:
|
||||
if not from_user.empty():
|
||||
chunk = await from_user.get()
|
||||
break
|
||||
elif not from_computer.empty():
|
||||
chunk = from_computer.get()
|
||||
break
|
||||
await asyncio.sleep(1)
|
||||
|
||||
|
||||
|
||||
message = accumulator.accumulate(chunk)
|
||||
if message == None:
|
||||
# Will be None until we have a full message ready
|
||||
continue
|
||||
|
||||
# print(str(message)[:1000])
|
||||
|
||||
# At this point, we have our message
|
||||
|
||||
if message["type"] == "audio" and message["format"].startswith("bytes"):
|
||||
|
||||
if not message["content"]: # If it was nothing / silence
|
||||
continue
|
||||
|
||||
# Convert bytes to audio file
|
||||
# Format will be bytes.wav or bytes.opus
|
||||
mime_type = "audio/" + message["format"].split(".")[1]
|
||||
audio_file_path = bytes_to_wav(message["content"], mime_type)
|
||||
|
||||
# For microphone debugging:
|
||||
if False:
|
||||
os.system(f"open {audio_file_path}")
|
||||
import time
|
||||
time.sleep(15)
|
||||
|
||||
text = stt(audio_file_path)
|
||||
print(text)
|
||||
message = {"role": "user", "type": "message", "content": text}
|
||||
|
||||
# At this point, we have only text messages
|
||||
|
||||
# Custom stop message will halt us
|
||||
if message["content"].lower().strip(".,! ") == "stop":
|
||||
continue
|
||||
|
||||
# Load, append, and save conversation history
|
||||
with open(conversation_history_path, 'r') as file:
|
||||
messages = json.load(file)
|
||||
messages.append(message)
|
||||
with open(conversation_history_path, 'w') as file:
|
||||
json.dump(messages, file, indent=4)
|
||||
|
||||
accumulated_text = ""
|
||||
|
||||
force_task_completion_message = """AUTOMATED MESSAGE: Proceed. You CAN run code on my machine. If you want to run code, start your message with "```"! If the entire task I asked for is done, say exactly 'The task is done.' If you need some specific information (like username or password) say EXACTLY 'Please provide more information.' If it's impossible, say 'The task is impossible.' (If I haven't provided a task, say exactly 'Let me know what you'd like to do next.') Otherwise keep going."""
|
||||
interpreter.messages = [m for m in interpreter.messages if m["content"] != force_task_completion_message]
|
||||
insert_force_task_completion_message = True
|
||||
|
||||
if any([m["type"] == "image" for m in messages]) and interpreter.llm.model.startswith("gpt-"):
|
||||
interpreter.llm.model = "gpt-4-vision-preview"
|
||||
interpreter.llm.supports_vision = True
|
||||
|
||||
while insert_force_task_completion_message == True:
|
||||
|
||||
for chunk in interpreter.chat(messages, stream=True, display=True):
|
||||
|
||||
if chunk["type"] == "code":
|
||||
insert_force_task_completion_message = False
|
||||
|
||||
if any([m["type"] == "image" for m in interpreter.messages]):
|
||||
interpreter.llm.model = "gpt-4-vision-preview"
|
||||
|
||||
logger.debug("Got chunk:", chunk)
|
||||
|
||||
# Send it to the user
|
||||
await to_device.put(chunk)
|
||||
# Yield to the event loop, so you actually send it out
|
||||
await asyncio.sleep(0.01)
|
||||
|
||||
if os.getenv('TTS_RUNNER') == "server":
|
||||
# Speak full sentences out loud
|
||||
if chunk["role"] == "assistant" and "content" in chunk and chunk["type"] == "message":
|
||||
accumulated_text += chunk["content"]
|
||||
sentences = split_into_sentences(accumulated_text)
|
||||
|
||||
# If we're going to speak, say we're going to stop sending text.
|
||||
# This should be fixed probably, we should be able to do both in parallel, or only one.
|
||||
if any(is_full_sentence(sentence) for sentence in sentences):
|
||||
await to_device.put({"role": "assistant", "type": "message", "end": True})
|
||||
|
||||
if is_full_sentence(sentences[-1]):
|
||||
for sentence in sentences:
|
||||
await stream_tts_to_device(sentence)
|
||||
accumulated_text = ""
|
||||
else:
|
||||
for sentence in sentences[:-1]:
|
||||
await stream_tts_to_device(sentence)
|
||||
accumulated_text = sentences[-1]
|
||||
|
||||
# If we're going to speak, say we're going to stop sending text.
|
||||
# This should be fixed probably, we should be able to do both in parallel, or only one.
|
||||
if any(is_full_sentence(sentence) for sentence in sentences):
|
||||
await to_device.put({"role": "assistant", "type": "message", "start": True})
|
||||
|
||||
# If we have a new message, save our progress and go back to the top
|
||||
if not from_user.empty():
|
||||
|
||||
# Check if it's just an end flag. We ignore those.
|
||||
temp_message = await from_user.get()
|
||||
|
||||
if type(temp_message) is dict and temp_message.get("role") == "user" and temp_message.get("end"):
|
||||
# Yup. False alarm.
|
||||
continue
|
||||
else:
|
||||
# Whoops! Put that back
|
||||
await from_user.put(temp_message)
|
||||
|
||||
with open(conversation_history_path, 'w') as file:
|
||||
json.dump(interpreter.messages, file, indent=4)
|
||||
|
||||
# TODO: is triggering seemingly randomly
|
||||
#logger.info("New user message recieved. Breaking.")
|
||||
#break
|
||||
|
||||
# Also check if there's any new computer messages
|
||||
if not from_computer.empty():
|
||||
|
||||
with open(conversation_history_path, 'w') as file:
|
||||
json.dump(interpreter.messages, file, indent=4)
|
||||
|
||||
logger.info("New computer message recieved. Breaking.")
|
||||
break
|
||||
else:
|
||||
with open(conversation_history_path, 'w') as file:
|
||||
json.dump(interpreter.messages, file, indent=4)
|
||||
|
||||
force_task_completion_responses = [
|
||||
"the task is done.",
|
||||
"the task is impossible.",
|
||||
"let me know what you'd like to do next.",
|
||||
"please provide more information.",
|
||||
]
|
||||
|
||||
# Did the LLM respond with one of the key messages?
|
||||
if (
|
||||
interpreter.messages
|
||||
and any(
|
||||
task_status in interpreter.messages[-1].get("content", "").lower()
|
||||
for task_status in force_task_completion_responses
|
||||
)
|
||||
):
|
||||
insert_force_task_completion_message = False
|
||||
break
|
||||
|
||||
if insert_force_task_completion_message:
|
||||
interpreter.messages += [
|
||||
{
|
||||
"role": "user",
|
||||
"type": "message",
|
||||
"content": force_task_completion_message,
|
||||
}
|
||||
]
|
||||
else:
|
||||
break
|
||||
|
||||
async def stream_tts_to_device(sentence):
|
||||
force_task_completion_responses = [
|
||||
"the task is done",
|
||||
"the task is impossible",
|
||||
"let me know what you'd like to do next",
|
||||
]
|
||||
if sentence.lower().strip().strip(".!?").strip() in force_task_completion_responses:
|
||||
return
|
||||
|
||||
for chunk in stream_tts(sentence):
|
||||
await to_device.put(chunk)
|
||||
|
||||
def stream_tts(sentence):
|
||||
|
||||
audio_file = tts(sentence)
|
||||
|
||||
with open(audio_file, "rb") as f:
|
||||
audio_bytes = f.read()
|
||||
os.remove(audio_file)
|
||||
|
||||
file_type = "bytes.raw"
|
||||
chunk_size = 1024
|
||||
|
||||
# Stream the audio
|
||||
yield {"role": "assistant", "type": "audio", "format": file_type, "start": True}
|
||||
for i in range(0, len(audio_bytes), chunk_size):
|
||||
chunk = audio_bytes[i:i+chunk_size]
|
||||
yield chunk
|
||||
yield {"role": "assistant", "type": "audio", "format": file_type, "end": True}
|
||||
|
||||
from uvicorn import Config, Server
|
||||
import os
|
||||
import platform
|
||||
from importlib import import_module
|
||||
|
||||
# these will be overwritten
|
||||
HOST = ''
|
||||
PORT = 0
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup_event():
|
||||
server_url = f"{HOST}:{PORT}"
|
||||
print("")
|
||||
print_markdown(f"\n*Ready.*\n")
|
||||
print("")
|
||||
|
||||
@app.on_event("shutdown")
|
||||
async def shutdown_event():
|
||||
print_markdown("*Server is shutting down*")
|
||||
|
||||
async def main(server_host, server_port, llm_service, model, llm_supports_vision, llm_supports_functions, context_window, max_tokens, temperature, tts_service, stt_service):
|
||||
|
||||
global HOST
|
||||
global PORT
|
||||
PORT = server_port
|
||||
HOST = server_host
|
||||
|
||||
# Setup services
|
||||
application_directory = user_data_dir('01')
|
||||
services_directory = os.path.join(application_directory, 'services')
|
||||
|
||||
service_dict = {'llm': llm_service, 'tts': tts_service, 'stt': stt_service}
|
||||
|
||||
for service in service_dict:
|
||||
|
||||
service_directory = os.path.join(services_directory, service, service_dict[service])
|
||||
|
||||
# This is the folder they can mess around in
|
||||
config = {"service_directory": service_directory}
|
||||
|
||||
if service == "llm":
|
||||
config.update({
|
||||
"interpreter": interpreter,
|
||||
"model": model,
|
||||
"llm_supports_vision": llm_supports_vision,
|
||||
"llm_supports_functions": llm_supports_functions,
|
||||
"context_window": context_window,
|
||||
"max_tokens": max_tokens,
|
||||
"temperature": temperature
|
||||
})
|
||||
|
||||
module = import_module(f'.server.services.{service}.{service_dict[service]}.{service}', package='01OS')
|
||||
ServiceClass = getattr(module, service.capitalize())
|
||||
service_instance = ServiceClass(config)
|
||||
globals()[service] = getattr(service_instance, service)
|
||||
|
||||
interpreter.llm.completions = llm
|
||||
|
||||
# Start listening
|
||||
asyncio.create_task(listener())
|
||||
|
||||
# Start watching the kernel if it's your job to do that
|
||||
if True: # in the future, code can run on device. for now, just server.
|
||||
asyncio.create_task(put_kernel_messages_into_queue(from_computer))
|
||||
|
||||
config = Config(app, host=server_host, port=int(server_port), lifespan='on')
|
||||
server = Server(config)
|
||||
await server.serve()
|
||||
|
||||
# Run the FastAPI app
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
@ -1,15 +0,0 @@
|
||||
class Llm:
|
||||
def __init__(self, config):
|
||||
|
||||
# Litellm is used by OI by default, so we just modify OI
|
||||
|
||||
interpreter = config["interpreter"]
|
||||
config.pop("interpreter", None)
|
||||
config.pop("service_directory", None)
|
||||
for key, value in config.items():
|
||||
setattr(interpreter, key.replace("-", "_"), value)
|
||||
|
||||
self.llm = interpreter.llm.completions
|
||||
|
||||
|
||||
|
@ -1,49 +0,0 @@
|
||||
import os
|
||||
import subprocess
|
||||
import requests
|
||||
import json
|
||||
|
||||
class Llm:
|
||||
def __init__(self, config):
|
||||
self.install(config["service_directory"])
|
||||
|
||||
def install(self, service_directory):
|
||||
LLM_FOLDER_PATH = service_directory
|
||||
self.llm_directory = os.path.join(LLM_FOLDER_PATH, 'llm')
|
||||
if not os.path.isdir(self.llm_directory): # Check if the LLM directory exists
|
||||
os.makedirs(LLM_FOLDER_PATH, exist_ok=True)
|
||||
|
||||
# Install WasmEdge
|
||||
subprocess.run(['curl', '-sSf', 'https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install.sh', '|', 'bash', '-s', '--', '--plugin', 'wasi_nn-ggml'])
|
||||
|
||||
# Download the Qwen1.5-0.5B-Chat model GGUF file
|
||||
MODEL_URL = "https://huggingface.co/second-state/Qwen1.5-0.5B-Chat-GGUF/resolve/main/Qwen1.5-0.5B-Chat-Q5_K_M.gguf"
|
||||
subprocess.run(['curl', '-LO', MODEL_URL], cwd=self.llm_directory)
|
||||
|
||||
# Download the llama-api-server.wasm app
|
||||
APP_URL = "https://github.com/LlamaEdge/LlamaEdge/releases/latest/download/llama-api-server.wasm"
|
||||
subprocess.run(['curl', '-LO', APP_URL], cwd=self.llm_directory)
|
||||
|
||||
# Run the API server
|
||||
subprocess.run(['wasmedge', '--dir', '.:.', '--nn-preload', 'default:GGML:AUTO:Qwen1.5-0.5B-Chat-Q5_K_M.gguf', 'llama-api-server.wasm', '-p', 'llama-2-chat'], cwd=self.llm_directory)
|
||||
|
||||
print("LLM setup completed.")
|
||||
else:
|
||||
print("LLM already set up. Skipping download.")
|
||||
|
||||
def llm(self, messages):
|
||||
url = "http://localhost:8080/v1/chat/completions"
|
||||
headers = {
|
||||
'accept': 'application/json',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
data = {
|
||||
"messages": messages,
|
||||
"model": "llama-2-chat"
|
||||
}
|
||||
with requests.post(url, headers=headers, data=json.dumps(data), stream=True) as response:
|
||||
for line in response.iter_lines():
|
||||
if line:
|
||||
yield json.loads(line)
|
||||
|
||||
|
@ -1,84 +0,0 @@
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
import time
|
||||
import wget
|
||||
import stat
|
||||
|
||||
class Llm:
|
||||
def __init__(self, config):
|
||||
|
||||
self.interpreter = config["interpreter"]
|
||||
config.pop("interpreter", None)
|
||||
|
||||
self.install(config["service_directory"])
|
||||
|
||||
config.pop("service_directory", None)
|
||||
for key, value in config.items():
|
||||
setattr(self.interpreter, key.replace("-", "_"), value)
|
||||
|
||||
self.llm = self.interpreter.llm.completions
|
||||
|
||||
def install(self, service_directory):
|
||||
|
||||
if platform.system() == "Darwin": # Check if the system is MacOS
|
||||
result = subprocess.run(
|
||||
["xcode-select", "-p"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT
|
||||
)
|
||||
if result.returncode != 0:
|
||||
print(
|
||||
"Llamafile requires Mac users to have Xcode installed. You can install Xcode from https://developer.apple.com/xcode/ .\n\nAlternatively, you can use `LM Studio`, `Jan.ai`, or `Ollama` to manage local language models. Learn more at https://docs.openinterpreter.com/guides/running-locally ."
|
||||
)
|
||||
time.sleep(3)
|
||||
raise Exception("Xcode is not installed. Please install Xcode and try again.")
|
||||
|
||||
# Define the path to the models directory
|
||||
models_dir = os.path.join(service_directory, "models")
|
||||
|
||||
# Check and create the models directory if it doesn't exist
|
||||
if not os.path.exists(models_dir):
|
||||
os.makedirs(models_dir)
|
||||
|
||||
# Define the path to the new llamafile
|
||||
llamafile_path = os.path.join(models_dir, "phi-2.Q4_K_M.llamafile")
|
||||
|
||||
# Check if the new llamafile exists, if not download it
|
||||
if not os.path.exists(llamafile_path):
|
||||
print(
|
||||
"Attempting to download the `Phi-2` language model. This may take a few minutes."
|
||||
)
|
||||
time.sleep(3)
|
||||
|
||||
url = "https://huggingface.co/jartine/phi-2-llamafile/resolve/main/phi-2.Q4_K_M.llamafile"
|
||||
wget.download(url, llamafile_path)
|
||||
|
||||
|
||||
|
||||
# Make the new llamafile executable
|
||||
if platform.system() != "Windows":
|
||||
st = os.stat(llamafile_path)
|
||||
os.chmod(llamafile_path, st.st_mode | stat.S_IEXEC)
|
||||
|
||||
# Run the new llamafile in the background
|
||||
if os.path.exists(llamafile_path):
|
||||
try:
|
||||
# Test if the llamafile is executable
|
||||
subprocess.check_call([llamafile_path])
|
||||
except subprocess.CalledProcessError:
|
||||
print("The llamafile is not executable. Please check the file permissions.")
|
||||
raise
|
||||
subprocess.Popen([llamafile_path, "-ngl", "9999"])
|
||||
else:
|
||||
error_message = "The llamafile does not exist or is corrupted. Please ensure it has been downloaded correctly or try again."
|
||||
print(error_message)
|
||||
print(error_message)
|
||||
|
||||
self.interpreter.system_message = "You are Open Interpreter, a world-class programmer that can execute code on the user's machine."
|
||||
self.interpreter.offline = True
|
||||
|
||||
self.interpreter.llm.model = "local"
|
||||
self.interpreter.llm.temperature = 0
|
||||
self.interpreter.llm.api_base = "https://localhost:8080/v1"
|
||||
self.interpreter.llm.max_tokens = 1000
|
||||
self.interpreter.llm.context_window = 3000
|
||||
self.interpreter.llm.supports_functions = False
|
@ -1,137 +0,0 @@
|
||||
"""
|
||||
Defines a function which takes a path to an audio file and turns it into text.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
import os
|
||||
import contextlib
|
||||
import tempfile
|
||||
import shutil
|
||||
import ffmpeg
|
||||
import subprocess
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
|
||||
class Stt:
|
||||
def __init__(self, config):
|
||||
self.service_directory = config["service_directory"]
|
||||
install(self.service_directory)
|
||||
|
||||
def stt(self, audio_file_path):
|
||||
return stt(self.service_directory, audio_file_path)
|
||||
|
||||
|
||||
|
||||
def install(service_dir):
|
||||
|
||||
### INSTALL
|
||||
|
||||
WHISPER_RUST_PATH = os.path.join(service_dir, "whisper-rust")
|
||||
script_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
source_whisper_rust_path = os.path.join(script_dir, "whisper-rust")
|
||||
if not os.path.exists(source_whisper_rust_path):
|
||||
print(f"Source directory does not exist: {source_whisper_rust_path}")
|
||||
exit(1)
|
||||
if not os.path.exists(WHISPER_RUST_PATH):
|
||||
shutil.copytree(source_whisper_rust_path, WHISPER_RUST_PATH)
|
||||
|
||||
os.chdir(WHISPER_RUST_PATH)
|
||||
|
||||
# Check if whisper-rust executable exists before attempting to build
|
||||
if not os.path.isfile(os.path.join(WHISPER_RUST_PATH, "target/release/whisper-rust")):
|
||||
# Check if Rust is installed. Needed to build whisper executable
|
||||
rust_check = subprocess.call('command -v rustc', shell=True)
|
||||
if rust_check != 0:
|
||||
print("Rust is not installed or is not in system PATH. Please install Rust before proceeding.")
|
||||
exit(1)
|
||||
|
||||
# Build Whisper Rust executable if not found
|
||||
subprocess.call('cargo build --release', shell=True)
|
||||
else:
|
||||
print("Whisper Rust executable already exists. Skipping build.")
|
||||
|
||||
WHISPER_MODEL_PATH = os.path.join(service_dir, "model")
|
||||
|
||||
WHISPER_MODEL_NAME = os.getenv('WHISPER_MODEL_NAME', 'ggml-tiny.en.bin')
|
||||
WHISPER_MODEL_URL = os.getenv('WHISPER_MODEL_URL', 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/')
|
||||
|
||||
if not os.path.isfile(os.path.join(WHISPER_MODEL_PATH, WHISPER_MODEL_NAME)):
|
||||
os.makedirs(WHISPER_MODEL_PATH, exist_ok=True)
|
||||
subprocess.call(f'curl -L "{WHISPER_MODEL_URL}{WHISPER_MODEL_NAME}" -o "{os.path.join(WHISPER_MODEL_PATH, WHISPER_MODEL_NAME)}"', shell=True)
|
||||
else:
|
||||
print("Whisper model already exists. Skipping download.")
|
||||
|
||||
def convert_mime_type_to_format(mime_type: str) -> str:
|
||||
if mime_type == "audio/x-wav" or mime_type == "audio/wav":
|
||||
return "wav"
|
||||
if mime_type == "audio/webm":
|
||||
return "webm"
|
||||
if mime_type == "audio/raw":
|
||||
return "dat"
|
||||
|
||||
return mime_type
|
||||
|
||||
@contextlib.contextmanager
|
||||
def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
|
||||
temp_dir = tempfile.gettempdir()
|
||||
|
||||
# Create a temporary file with the appropriate extension
|
||||
input_ext = convert_mime_type_to_format(mime_type)
|
||||
input_path = os.path.join(temp_dir, f"input_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.{input_ext}")
|
||||
with open(input_path, 'wb') as f:
|
||||
f.write(audio)
|
||||
|
||||
# Check if the input file exists
|
||||
assert os.path.exists(input_path), f"Input file does not exist: {input_path}"
|
||||
|
||||
# Export to wav
|
||||
output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
|
||||
print(mime_type, input_path, output_path)
|
||||
if mime_type == "audio/raw":
|
||||
ffmpeg.input(
|
||||
input_path,
|
||||
f='s16le',
|
||||
ar='16000',
|
||||
ac=1,
|
||||
).output(output_path).run()
|
||||
else:
|
||||
ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
|
||||
|
||||
try:
|
||||
yield output_path
|
||||
finally:
|
||||
os.remove(input_path)
|
||||
os.remove(output_path)
|
||||
|
||||
def run_command(command):
|
||||
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||
return result.stdout, result.stderr
|
||||
|
||||
def get_transcription_file(service_directory, wav_file_path: str):
|
||||
local_path = os.path.join(service_directory, 'model')
|
||||
whisper_rust_path = os.path.join(service_directory, 'whisper-rust', 'target', 'release')
|
||||
model_name = os.getenv('WHISPER_MODEL_NAME', 'ggml-tiny.en.bin')
|
||||
|
||||
output, _ = run_command([
|
||||
os.path.join(whisper_rust_path, 'whisper-rust'),
|
||||
'--model-path', os.path.join(local_path, model_name),
|
||||
'--file-path', wav_file_path
|
||||
])
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def stt_wav(service_directory, wav_file_path: str):
|
||||
temp_dir = tempfile.gettempdir()
|
||||
output_path = os.path.join(temp_dir, f"output_stt_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
|
||||
ffmpeg.input(wav_file_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
|
||||
try:
|
||||
transcript = get_transcription_file(service_directory, output_path)
|
||||
finally:
|
||||
os.remove(output_path)
|
||||
return transcript
|
||||
|
||||
def stt(service_directory, input_data):
|
||||
return stt_wav(service_directory, input_data)
|
@ -1,10 +0,0 @@
|
||||
# Generated by Cargo
|
||||
# will have compiled files and executables
|
||||
debug/
|
||||
target/
|
||||
|
||||
# These are backup files generated by rustfmt
|
||||
**/*.rs.bk
|
||||
|
||||
# MSVC Windows builds of rustc generate these, which store debugging information
|
||||
*.pdb
|
File diff suppressed because it is too large
Load Diff
@ -1,14 +0,0 @@
|
||||
[package]
|
||||
name = "whisper-rust"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.79"
|
||||
clap = { version = "4.4.18", features = ["derive"] }
|
||||
cpal = "0.15.2"
|
||||
hound = "3.5.1"
|
||||
whisper-rs = "0.10.0"
|
||||
whisper-rs-sys = "0.8.0"
|
@ -1,34 +0,0 @@
|
||||
mod transcribe;
|
||||
|
||||
use clap::Parser;
|
||||
use std::path::PathBuf;
|
||||
use transcribe::transcribe;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(author, version, about, long_about = None)]
|
||||
struct Args {
|
||||
/// This is the model for Whisper STT
|
||||
#[arg(short, long, value_parser, required = true)]
|
||||
model_path: PathBuf,
|
||||
|
||||
/// This is the wav audio file that will be converted from speech to text
|
||||
#[arg(short, long, value_parser, required = true)]
|
||||
file_path: Option<PathBuf>,
|
||||
}
|
||||
|
||||
fn main() {
|
||||
|
||||
let args = Args::parse();
|
||||
|
||||
let file_path = match args.file_path {
|
||||
Some(fp) => fp,
|
||||
None => panic!("No file path provided")
|
||||
};
|
||||
|
||||
let result = transcribe(&args.model_path, &file_path);
|
||||
|
||||
match result {
|
||||
Ok(transcription) => print!("{}", transcription),
|
||||
Err(e) => panic!("Error: {}", e),
|
||||
}
|
||||
}
|
@ -1,64 +0,0 @@
|
||||
use whisper_rs::{FullParams, SamplingStrategy, WhisperContext, WhisperContextParameters};
|
||||
use std::path::PathBuf;
|
||||
|
||||
|
||||
/// Transcribes the given audio file using the whisper-rs library.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `model_path` - Path to Whisper model file
|
||||
/// * `file_path` - A string slice that holds the path to the audio file to be transcribed.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// A Result containing a String with the transcription if successful, or an error message if not.
|
||||
pub fn transcribe(model_path: &PathBuf, file_path: &PathBuf) -> Result<String, String> {
|
||||
|
||||
let model_path_str = model_path.to_str().expect("Not valid model path");
|
||||
// Load a context and model
|
||||
let ctx = WhisperContext::new_with_params(
|
||||
model_path_str, // Replace with the actual path to the model
|
||||
WhisperContextParameters::default(),
|
||||
)
|
||||
.map_err(|_| "failed to load model")?;
|
||||
|
||||
// Create a state
|
||||
let mut state = ctx.create_state().map_err(|_| "failed to create state")?;
|
||||
|
||||
// Create a params object
|
||||
// Note that currently the only implemented strategy is Greedy, BeamSearch is a WIP
|
||||
let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 1 });
|
||||
|
||||
// Edit parameters as needed
|
||||
params.set_n_threads(1); // Set the number of threads to use
|
||||
params.set_translate(true); // Enable translation
|
||||
params.set_language(Some("en")); // Set the language to translate to English
|
||||
// Disable printing to stdout
|
||||
params.set_print_special(false);
|
||||
params.set_print_progress(false);
|
||||
params.set_print_realtime(false);
|
||||
params.set_print_timestamps(false);
|
||||
|
||||
// Load the audio file
|
||||
let audio_data = std::fs::read(file_path)
|
||||
.map_err(|e| format!("failed to read audio file: {}", e))?
|
||||
.chunks_exact(2)
|
||||
.map(|chunk| i16::from_ne_bytes([chunk[0], chunk[1]]))
|
||||
.collect::<Vec<i16>>();
|
||||
|
||||
// Convert the audio data to the required format (16KHz mono i16 samples)
|
||||
let audio_data = whisper_rs::convert_integer_to_float_audio(&audio_data);
|
||||
|
||||
// Run the model
|
||||
state.full(params, &audio_data[..]).map_err(|_| "failed to run model")?;
|
||||
|
||||
// Fetch the results
|
||||
let num_segments = state.full_n_segments().map_err(|_| "failed to get number of segments")?;
|
||||
let mut transcription = String::new();
|
||||
for i in 0..num_segments {
|
||||
let segment = state.full_get_segment_text(i).map_err(|_| "failed to get segment")?;
|
||||
transcription.push_str(&segment);
|
||||
transcription.push('\n');
|
||||
}
|
||||
|
||||
Ok(transcription)
|
||||
}
|
@ -1,110 +0,0 @@
|
||||
class Stt:
|
||||
def __init__(self, config):
|
||||
pass
|
||||
|
||||
def stt(self, audio_file_path):
|
||||
return stt(audio_file_path)
|
||||
|
||||
|
||||
|
||||
from datetime import datetime
|
||||
import os
|
||||
import contextlib
|
||||
import tempfile
|
||||
import ffmpeg
|
||||
import subprocess
|
||||
import openai
|
||||
from openai import OpenAI
|
||||
|
||||
|
||||
client = OpenAI()
|
||||
|
||||
def convert_mime_type_to_format(mime_type: str) -> str:
|
||||
if mime_type == "audio/x-wav" or mime_type == "audio/wav":
|
||||
return "wav"
|
||||
if mime_type == "audio/webm":
|
||||
return "webm"
|
||||
if mime_type == "audio/raw":
|
||||
return "dat"
|
||||
|
||||
return mime_type
|
||||
|
||||
@contextlib.contextmanager
|
||||
def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
|
||||
temp_dir = tempfile.gettempdir()
|
||||
|
||||
# Create a temporary file with the appropriate extension
|
||||
input_ext = convert_mime_type_to_format(mime_type)
|
||||
input_path = os.path.join(temp_dir, f"input_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.{input_ext}")
|
||||
with open(input_path, 'wb') as f:
|
||||
f.write(audio)
|
||||
|
||||
# Check if the input file exists
|
||||
assert os.path.exists(input_path), f"Input file does not exist: {input_path}"
|
||||
|
||||
# Export to wav
|
||||
output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
|
||||
print(mime_type, input_path, output_path)
|
||||
if mime_type == "audio/raw":
|
||||
ffmpeg.input(
|
||||
input_path,
|
||||
f='s16le',
|
||||
ar='16000',
|
||||
ac=1,
|
||||
).output(output_path).run()
|
||||
else:
|
||||
ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
|
||||
|
||||
try:
|
||||
yield output_path
|
||||
finally:
|
||||
os.remove(input_path)
|
||||
os.remove(output_path)
|
||||
|
||||
def run_command(command):
|
||||
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||
return result.stdout, result.stderr
|
||||
|
||||
def get_transcription_file(wav_file_path: str):
|
||||
local_path = os.path.join(os.path.dirname(__file__), 'local_service')
|
||||
whisper_rust_path = os.path.join(os.path.dirname(__file__), 'whisper-rust', 'target', 'release')
|
||||
model_name = os.getenv('WHISPER_MODEL_NAME', 'ggml-tiny.en.bin')
|
||||
|
||||
output, error = run_command([
|
||||
os.path.join(whisper_rust_path, 'whisper-rust'),
|
||||
'--model-path', os.path.join(local_path, model_name),
|
||||
'--file-path', wav_file_path
|
||||
])
|
||||
|
||||
return output
|
||||
|
||||
def get_transcription_bytes(audio_bytes: bytearray, mime_type):
|
||||
with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path:
|
||||
return get_transcription_file(wav_file_path)
|
||||
|
||||
def stt_bytes(audio_bytes: bytearray, mime_type="audio/wav"):
|
||||
with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path:
|
||||
return stt_wav(wav_file_path)
|
||||
|
||||
def stt_wav(wav_file_path: str):
|
||||
|
||||
audio_file = open(wav_file_path, "rb")
|
||||
try:
|
||||
transcript = client.audio.transcriptions.create(
|
||||
model="whisper-1",
|
||||
file=audio_file,
|
||||
response_format="text"
|
||||
)
|
||||
except openai.BadRequestError as e:
|
||||
print(f"openai.BadRequestError: {e}")
|
||||
return None
|
||||
|
||||
return transcript
|
||||
|
||||
def stt(input_data, mime_type="audio/wav"):
|
||||
if isinstance(input_data, str):
|
||||
return stt_wav(input_data)
|
||||
elif isinstance(input_data, bytearray):
|
||||
return stt_bytes(input_data, mime_type)
|
||||
else:
|
||||
raise ValueError("Input data should be either a path to a wav file (str) or audio bytes (bytearray)")
|
@ -1,30 +0,0 @@
|
||||
import ffmpeg
|
||||
import tempfile
|
||||
from openai import OpenAI
|
||||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
||||
client = OpenAI()
|
||||
|
||||
class Tts:
|
||||
def __init__(self, config):
|
||||
pass
|
||||
|
||||
def tts(self, text):
|
||||
response = client.audio.speech.create(
|
||||
model="tts-1",
|
||||
voice="alloy",
|
||||
input=text,
|
||||
response_format="opus"
|
||||
)
|
||||
with tempfile.NamedTemporaryFile(suffix=".opus", delete=False) as temp_file:
|
||||
response.stream_to_file(temp_file.name)
|
||||
|
||||
# TODO: hack to format audio correctly for device
|
||||
outfile = tempfile.gettempdir() + "/" + "raw.dat"
|
||||
ffmpeg.input(temp_file.name).output(outfile, f="s16le", ar="16000", ac="1").run()
|
||||
|
||||
return outfile
|
||||
|
||||
|
@ -1,84 +0,0 @@
|
||||
import ffmpeg
|
||||
import tempfile
|
||||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
import urllib.request
|
||||
import tarfile
|
||||
|
||||
class Tts:
|
||||
def __init__(self, config):
|
||||
self.piper_directory = ""
|
||||
self.install(config["service_directory"])
|
||||
|
||||
def tts(self, text):
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
|
||||
output_file = temp_file.name
|
||||
piper_dir = self.piper_directory
|
||||
subprocess.run([
|
||||
os.path.join(piper_dir, 'piper'),
|
||||
'--model', os.path.join(piper_dir, os.getenv('PIPER_VOICE_NAME', 'en_US-lessac-medium.onnx')),
|
||||
'--output_file', output_file
|
||||
], input=text, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||
|
||||
# TODO: hack to format audio correctly for device
|
||||
outfile = tempfile.gettempdir() + "/" + "raw.dat"
|
||||
ffmpeg.input(temp_file.name).output(outfile, f="s16le", ar="16000", ac="1").run()
|
||||
|
||||
return outfile
|
||||
|
||||
def install(self, service_directory):
|
||||
PIPER_FOLDER_PATH = service_directory
|
||||
self.piper_directory = os.path.join(PIPER_FOLDER_PATH, 'piper')
|
||||
if not os.path.isdir(self.piper_directory): # Check if the Piper directory exists
|
||||
os.makedirs(PIPER_FOLDER_PATH, exist_ok=True)
|
||||
|
||||
# Determine OS and architecture
|
||||
OS = os.uname().sysname
|
||||
ARCH = os.uname().machine
|
||||
if OS == "Darwin":
|
||||
OS = "macos"
|
||||
if ARCH == "arm64":
|
||||
ARCH = "aarch64"
|
||||
elif ARCH == "x86_64":
|
||||
ARCH = "x64"
|
||||
else:
|
||||
print("Piper: unsupported architecture")
|
||||
return
|
||||
|
||||
PIPER_ASSETNAME = f"piper_{OS}_{ARCH}.tar.gz"
|
||||
PIPER_URL = "https://github.com/rhasspy/piper/releases/latest/download/"
|
||||
|
||||
# Download and extract Piper
|
||||
urllib.request.urlretrieve(f"{PIPER_URL}{PIPER_ASSETNAME}", os.path.join(PIPER_FOLDER_PATH, PIPER_ASSETNAME))
|
||||
with tarfile.open(os.path.join(PIPER_FOLDER_PATH, PIPER_ASSETNAME), 'r:gz') as tar:
|
||||
tar.extractall(path=PIPER_FOLDER_PATH)
|
||||
|
||||
PIPER_VOICE_URL = os.getenv('PIPER_VOICE_URL', 'https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/')
|
||||
PIPER_VOICE_NAME = os.getenv('PIPER_VOICE_NAME', 'en_US-lessac-medium.onnx')
|
||||
|
||||
# Download voice model and its json file
|
||||
urllib.request.urlretrieve(f"{PIPER_VOICE_URL}{PIPER_VOICE_NAME}", os.path.join(self.piper_directory, PIPER_VOICE_NAME))
|
||||
urllib.request.urlretrieve(f"{PIPER_VOICE_URL}{PIPER_VOICE_NAME}.json", os.path.join(self.piper_directory, f"{PIPER_VOICE_NAME}.json"))
|
||||
|
||||
# Additional setup for macOS
|
||||
if OS == "macos":
|
||||
if ARCH == "x64":
|
||||
subprocess.run(['softwareupdate', '--install-rosetta', '--agree-to-license'])
|
||||
|
||||
PIPER_PHONEMIZE_ASSETNAME = f"piper-phonemize_{OS}_{ARCH}.tar.gz"
|
||||
PIPER_PHONEMIZE_URL = "https://github.com/rhasspy/piper-phonemize/releases/latest/download/"
|
||||
urllib.request.urlretrieve(f"{PIPER_PHONEMIZE_URL}{PIPER_PHONEMIZE_ASSETNAME}", os.path.join(self.piper_directory, PIPER_PHONEMIZE_ASSETNAME))
|
||||
|
||||
with tarfile.open(os.path.join(self.piper_directory, PIPER_PHONEMIZE_ASSETNAME), 'r:gz') as tar:
|
||||
tar.extractall(path=self.piper_directory)
|
||||
|
||||
PIPER_DIR = self.piper_directory
|
||||
subprocess.run(['install_name_tool', '-change', '@rpath/libespeak-ng.1.dylib', f"{PIPER_DIR}/piper-phonemize/lib/libespeak-ng.1.dylib", f"{PIPER_DIR}/piper"])
|
||||
subprocess.run(['install_name_tool', '-change', '@rpath/libonnxruntime.1.14.1.dylib', f"{PIPER_DIR}/piper-phonemize/lib/libonnxruntime.1.14.1.dylib", f"{PIPER_DIR}/piper"])
|
||||
subprocess.run(['install_name_tool', '-change', '@rpath/libpiper_phonemize.1.dylib', f"{PIPER_DIR}/piper-phonemize/lib/libpiper_phonemize.1.dylib", f"{PIPER_DIR}/piper"])
|
||||
|
||||
print("Piper setup completed.")
|
||||
else:
|
||||
print("Piper already set up. Skipping download.")
|
@ -1,34 +0,0 @@
|
||||
import threading
|
||||
from datetime import datetime
|
||||
import json
|
||||
import subprocess
|
||||
import requests
|
||||
|
||||
|
||||
def send_request(message) -> None:
|
||||
url = "http://localhost:8000/"
|
||||
data = {"text": message}
|
||||
try:
|
||||
response = requests.post(url, json=data)
|
||||
response.raise_for_status()
|
||||
except requests.RequestException as e:
|
||||
print(f"Request failed: {e}")
|
||||
|
||||
def schedule(days=0, hours=0, mins=0, secs=0, target_datetime=None, message="") -> None:
|
||||
"""Schedules a reminder after a specified delay or for a specific datetime. The delay is defined by days, hours, minutes, and seconds. If a target_datetime is provided, it schedules the reminder for that datetime instead."""
|
||||
|
||||
if target_datetime is None:
|
||||
# Calculate the delay in seconds if no target_datetime is provided
|
||||
delay = days * 86400 + hours * 3600 + mins * 60 + secs
|
||||
else:
|
||||
# Calculate the delay in seconds from now until the target datetime
|
||||
now = datetime.now()
|
||||
delay = (target_datetime - now).total_seconds()
|
||||
# Ensure delay is non-negative
|
||||
delay = max(0, delay)
|
||||
|
||||
# Create a timer
|
||||
timer = threading.Timer(delay, send_request, args=[message])
|
||||
|
||||
# Start the timer
|
||||
timer.start()
|
@ -1,240 +0,0 @@
|
||||
# The dynamic system message is where most of the 01's behavior is configured.
|
||||
# You can put code into the system message {{ in brackets like this }}
|
||||
# which will be rendered just before the interpreter starts writing a message.
|
||||
|
||||
import os
|
||||
|
||||
system_message = r"""
|
||||
|
||||
You are the 01, a SCREENLESS executive assistant that can complete any task.
|
||||
When you execute code, it will be executed on the user's machine. The user has given you full and complete permission to execute any code necessary to complete the task. Execute the code.
|
||||
You can access the internet. Run any code to achieve the goal, and if at first you don't succeed, try again and again.
|
||||
You can install new packages.
|
||||
Be concise. Your messages are being read aloud to the user. DO NOT MAKE PLANS. RUN CODE QUICKLY.
|
||||
Try to spread complex tasks over multiple code blocks. Don't try to complex tasks in one go.
|
||||
Manually summarize text.
|
||||
|
||||
Use computer.browser.search for almost everything. Use Applescript frequently.
|
||||
|
||||
The user is in Seattle, Washington.
|
||||
|
||||
To send email, use Applescript. To check calendar events, use iCal buddy (e.g. `/opt/homebrew/bin/icalBuddy eventsFrom:today to:+7`)
|
||||
|
||||
DONT TELL THE USER THE METHOD YOU'LL USE. Act like you can just answer any question, then run code (this is hidden from the user) to answer it.
|
||||
|
||||
Your responses should be very short, no more than 1-2 sentences long.
|
||||
|
||||
DO NOT USE MARKDOWN. ONLY WRITE PLAIN TEXT. DO NOT USE MARKDOWN.
|
||||
|
||||
# TASKS
|
||||
|
||||
You should help the user manage their tasks.
|
||||
|
||||
Store the user's tasks in a Python list called `tasks`.
|
||||
|
||||
---
|
||||
|
||||
The user's current task is: {{ tasks[0] if tasks else "No current tasks." }}
|
||||
|
||||
{{
|
||||
if len(tasks) > 1:
|
||||
print("The next task is: ", tasks[1])
|
||||
}}
|
||||
|
||||
---
|
||||
|
||||
When the user completes the current task, you should remove it from the list and read the next item by running `tasks = tasks[1:]\ntasks[0]`. Then, tell the user what the next task is.
|
||||
|
||||
When the user tells you about a set of tasks, you should intelligently order tasks, batch similar tasks, and break down large tasks into smaller tasks (for this, you should consult the user and get their permission to break it down). Your goal is to manage the task list as intelligently as possible, to make the user as efficient and non-overwhelmed as possible. They will require a lot of encouragement, support, and kindness. Don't say too much about what's ahead of them— just try to focus them on each step at a time.
|
||||
|
||||
After starting a task, you should check in with the user around the estimated completion time to see if the task is completed.
|
||||
|
||||
To do this, schedule a reminder based on estimated completion time using the function `schedule(days=0, hours=0, mins=0, secs=0, datetime="valid date time", message="Your message here.")`, WHICH HAS ALREADY BEEN IMPORTED. YOU DON'T NEED TO IMPORT THE `schedule` FUNCTION. IT IS AVAILABLE. You'll receive the message at the time you scheduled it.
|
||||
|
||||
You guide the user through the list one task at a time, convincing them to move forward, giving a pep talk if need be. Your job is essentially to answer "what should I (the user) be doing right now?" for every moment of the day.
|
||||
|
||||
# BROWSER
|
||||
|
||||
The Google search result will be returned from this function as a string: `computer.browser.search("query")`
|
||||
|
||||
# CRITICAL NOTES
|
||||
|
||||
Code output, despite being sent to you by the user, cannot be seen by the user. You NEED to tell the user about the output of some code, even if it's exact. >>The user does not have a screen.<<
|
||||
|
||||
ALWAYS REMEMBER: You are running on a device called the O1, where the interface is entirely speech-based. Make your responses to the user VERY short. DO NOT PLAN. BE CONCISE. WRITE CODE TO RUN IT.
|
||||
|
||||
Translate things to other languages INSTANTLY and MANUALLY. Don't try to use a translation tool. Summarize things manually. Don't use a summarizer tool.
|
||||
|
||||
"""
|
||||
|
||||
# OLD SYSTEM MESSAGE
|
||||
|
||||
old_system_message = r"""
|
||||
|
||||
You are the 01, an executive assistant that can complete **any** task.
|
||||
When you execute code, it will be executed **on the user's machine**. The user has given you **full and complete permission** to execute any code necessary to complete the task. Execute the code.
|
||||
You can access the internet. Run **any code** to achieve the goal, and if at first you don't succeed, try again and again.
|
||||
You can install new packages.
|
||||
Be concise. Your messages are being read aloud to the user. DO NOT MAKE PLANS. Immediately run code.
|
||||
Try to spread complex tasks over multiple code blocks.
|
||||
Manually summarize text. You cannot use other libraries to do this. You MUST MANUALLY SUMMARIZE, WITHOUT CODING.
|
||||
|
||||
For the users request, first, choose if you want to use Python, Applescript, Shell, or computer control (below) via Python.
|
||||
|
||||
# USER'S TASKS
|
||||
|
||||
You should help the user manage their tasks.
|
||||
|
||||
Store the user's tasks in a Python list called `tasks`.
|
||||
|
||||
---
|
||||
|
||||
The user's current task is: {{ tasks[0] if tasks else "No current tasks." }}
|
||||
|
||||
{{
|
||||
if len(tasks) > 1:
|
||||
print("The next task is: ", tasks[1])
|
||||
}}
|
||||
|
||||
---
|
||||
|
||||
When the user completes the current task, you should remove it from the list and read the next item by running `tasks = tasks[1:]\ntasks[0]`. Then, tell the user what the next task is.
|
||||
|
||||
When the user tells you about a set of tasks, you should intelligently order tasks, batch similar tasks, and break down large tasks into smaller tasks (for this, you should consult the user and get their permission to break it down). Your goal is to manage the task list as intelligently as possible, to make the user as efficient and non-overwhelmed as possible. They will require a lot of encouragement, support, and kindness. Don't say too much about what's ahead of them— just try to focus them on each step at a time.
|
||||
|
||||
After starting a task, you should check in with the user around the estimated completion time to see if the task is completed. Use the `schedule(datetime, message)` function, which has already been imported.
|
||||
|
||||
To do this, schedule a reminder based on estimated completion time using the function `schedule(datetime_object, "Your message here.")`, WHICH HAS ALREADY BEEN IMPORTED. YOU DON'T NEED TO IMPORT THE `schedule` FUNCTION. IT IS AVALIABLE. You'll recieve the message at `datetime_object`.
|
||||
|
||||
You guide the user through the list one task at a time, convincing them to move forward, giving a pep talk if need be. Your job is essentially to answer "what should I (the user) be doing right now?" for every moment of the day.
|
||||
|
||||
# COMPUTER CONTROL (RARE)
|
||||
|
||||
You are a computer controlling language model. You can 100% control the user's GUI.
|
||||
|
||||
You may use the `computer` Python module (already imported) to control the user's keyboard and mouse, if the task **requires** it:
|
||||
|
||||
```python
|
||||
computer.browser.search(query)
|
||||
|
||||
computer.display.view() # Shows you what's on the screen, returns a `pil_image` `in case you need it (rarely). **You almost always want to do this first!**
|
||||
|
||||
computer.keyboard.hotkey(" ", "command") # Opens spotlight
|
||||
computer.keyboard.write("hello")
|
||||
|
||||
computer.mouse.click("text onscreen") # This clicks on the UI element with that text. Use this **frequently** and get creative! To click a video, you could pass the *timestamp* (which is usually written on the thumbnail) into this.
|
||||
computer.mouse.move("open recent >") # This moves the mouse over the UI element with that text. Many dropdowns will disappear if you click them. You have to hover over items to reveal more.
|
||||
computer.mouse.click(x=500, y=500) # Use this very, very rarely. It's highly inaccurate
|
||||
computer.mouse.click(icon="gear icon") # Moves mouse to the icon with that description. Use this very often
|
||||
|
||||
computer.mouse.scroll(-10) # Scrolls down. If you don't find some text on screen that you expected to be there, you probably want to do this
|
||||
x, y = computer.display.center() # Get your bearings
|
||||
|
||||
computer.clipboard.view() # Returns contents of clipboard
|
||||
computer.os.get_selected_text() # Use frequently. If editing text, the user often wants this
|
||||
```
|
||||
|
||||
You are an image-based AI, you can see images.
|
||||
Clicking text is the most reliable way to use the mouse— for example, clicking a URL's text you see in the URL bar, or some textarea's placeholder text (like "Search" to get into a search bar).
|
||||
If you use `plt.show()`, the resulting image will be sent to you. However, if you use `PIL.Image.show()`, the resulting image will NOT be sent to you.
|
||||
It is very important to make sure you are focused on the right application and window. Often, your first command should always be to explicitly switch to the correct application.
|
||||
When searching the web, use query parameters. For example, https://www.amazon.com/s?k=monitor
|
||||
Try multiple methods before saying the task is impossible. **You can do it!**
|
||||
|
||||
{{
|
||||
# Add window information
|
||||
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
|
||||
original_stdout = sys.stdout
|
||||
sys.stdout = open(os.devnull, 'w')
|
||||
original_stderr = sys.stderr
|
||||
sys.stderr = open(os.devnull, 'w')
|
||||
|
||||
try:
|
||||
|
||||
import pywinctl
|
||||
|
||||
active_window = pywinctl.getActiveWindow()
|
||||
|
||||
if active_window:
|
||||
app_info = ""
|
||||
|
||||
if "_appName" in active_window.__dict__:
|
||||
app_info += (
|
||||
"Active Application: " + active_window.__dict__["_appName"]
|
||||
)
|
||||
|
||||
if hasattr(active_window, "title"):
|
||||
app_info += "\n" + "Active Window Title: " + active_window.title
|
||||
elif "_winTitle" in active_window.__dict__:
|
||||
app_info += (
|
||||
"\n"
|
||||
+ "Active Window Title:"
|
||||
+ active_window.__dict__["_winTitle"]
|
||||
)
|
||||
|
||||
if app_info != "":
|
||||
print(app_info)
|
||||
except:
|
||||
# Non blocking
|
||||
pass
|
||||
finally:
|
||||
sys.stdout = original_stdout
|
||||
sys.stderr = original_stderr
|
||||
|
||||
}}
|
||||
|
||||
# SKILLS
|
||||
|
||||
Try to use the following functions (assume they're imported) to complete your goals whenever possible:
|
||||
|
||||
{{
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
|
||||
from interpreter import interpreter
|
||||
from pathlib import Path
|
||||
|
||||
interpreter.model = "gpt-3.5"
|
||||
|
||||
combined_messages = "\\n".join(json.dumps(x) for x in messages[-3:])
|
||||
#query_msg = interpreter.chat(f"This is the conversation so far: {combined_messages}. What is a <10 words query that could be used to find functions that would help answer the user's question?")
|
||||
#query = query_msg[0]['content']
|
||||
query = combined_messages
|
||||
interpreter.computer.skills.path = '''OI_SKILLS_DIR'''
|
||||
|
||||
skills = interpreter.computer.skills.search(query)
|
||||
lowercase_skills = [skill[0].lower() + skill[1:] for skill in skills]
|
||||
output = "\\n".join(lowercase_skills)
|
||||
|
||||
# VERY HACKY! We should fix this, we hard code it for noisy code^:
|
||||
print("IGNORE_ALL_ABOVE_THIS_LINE")
|
||||
|
||||
print(output)
|
||||
}}
|
||||
|
||||
Remember: You can run Python code outside a function only to run a Python function; all other code must go in a in Python function if you first write a Python function. ALL imports must go inside the function.
|
||||
|
||||
# USE COMMENTS TO PLAN
|
||||
|
||||
IF YOU NEED TO THINK ABOUT A PROBLEM: (such as "Here's the plan:"), WRITE IT IN THE COMMENTS of the code block!
|
||||
|
||||
For example:
|
||||
> User: What is 432/7?
|
||||
> Assistant: Let me use Python to calculate that.
|
||||
> Assistant Python function call:
|
||||
> # Here's the plan:
|
||||
> # 1. Divide the numbers
|
||||
> # 2. Round it to 3 digits.
|
||||
> print(round(432/7, 3))
|
||||
> Assistant: 432 / 7 is 61.714.
|
||||
|
||||
# FINAL MESSAGES
|
||||
|
||||
ALWAYS REMEMBER: You are running on a device called the O1, where the interface is entirely speech-based. Make your responses to the user **VERY short.**
|
||||
|
||||
""".strip().replace("OI_SKILLS_DIR", os.path.join(os.path.dirname(__file__), "skills"))
|
@ -1,134 +0,0 @@
|
||||
# The dynamic system message is where most of the 01's behavior is configured.
|
||||
# You can put code into the system message {{ in brackets like this }}
|
||||
# which will be rendered just before the interpreter starts writing a message.
|
||||
|
||||
import os
|
||||
|
||||
system_message = r"""
|
||||
|
||||
You are the 01, an executive assistant that can complete **any** task.
|
||||
When you execute code, it will be executed **on the user's machine**. The user has given you **full and complete permission** to execute any code necessary to complete the task. Execute the code.
|
||||
For the users request, ALWAYS CHOOSE PYTHON. If the task requires computer control, USE THE computer control (mentioned below) or the Skills library (also mentioned below) via Python.
|
||||
Try to execute the user's request with the computer control or the Skills library first. ONLY IF the task cannot be completed using the computer control or the skills library, write your own code.
|
||||
If you're writing your own code, YOU CAN ACCESS THE INTERNET. Run **any code** to achieve the goal, and if at first you don't succeed, try again and again.
|
||||
You can install new packages.
|
||||
Be concise. DO NOT MAKE PLANS. Immediately run code.
|
||||
Try to spread complex tasks over multiple code blocks.
|
||||
Manually summarize text. You cannot use other libraries to do this. You MUST MANUALLY SUMMARIZE, WITHOUT CODING.
|
||||
|
||||
When a user refers to a filename, they're likely referring to an existing file in the directory you're currently executing code in.
|
||||
|
||||
# COMPUTER CONTROL
|
||||
|
||||
You are a computer controlling language model. You can 100% control the user's GUI.
|
||||
|
||||
You may use the `computer` Python module to control the user's keyboard and mouse, if the task **requires** it:
|
||||
|
||||
```python
|
||||
from interpreter import interpreter
|
||||
import os
|
||||
import time
|
||||
|
||||
interpreter.computer.browser.search(query)
|
||||
|
||||
interpreter.computer.display.view() # Shows you what's on the screen, returns a `pil_image` `in case you need it (rarely). **You almost always want to do this first!**
|
||||
|
||||
interpreter.computer.keyboard.hotkey(" ", "command") # Opens spotlight
|
||||
interpreter.computer.keyboard.write("hello")
|
||||
|
||||
interpreter.computer.mouse.click("text onscreen") # This clicks on the UI element with that text. Use this **frequently** and get creative! To click a video, you could pass the *timestamp* (which is usually written on the thumbnail) into this.
|
||||
interpreter.computer.mouse.move("open recent >") # This moves the mouse over the UI element with that text. Many dropdowns will disappear if you click them. You have to hover over items to reveal more.
|
||||
interpreter.computer.mouse.click(x=500, y=500) # Use this very, very rarely. It's highly inaccurate
|
||||
interpreter.computer.mouse.click(icon="gear icon") # Moves mouse to the icon with that description. Use this very often
|
||||
|
||||
interpreter.computer.mouse.scroll(-10) # Scrolls down. If you don't find some text on screen that you expected to be there, you probably want to do this
|
||||
x, y = interpreter.computer.display.center() # Get your bearings
|
||||
|
||||
interpreter.computer.clipboard.view() # Returns contents of clipboard
|
||||
interpreter.computer.os.get_selected_text() # Use frequently. If editing text, the user often wants this
|
||||
```
|
||||
|
||||
You are an image-based AI, you can see images.
|
||||
Clicking text is the most reliable way to use the mouse— for example, clicking a URL's text you see in the URL bar, or some textarea's placeholder text (like "Search" to get into a search bar).
|
||||
If you use `plt.show()`, the resulting image will be sent to you. However, if you use `PIL.Image.show()`, the resulting image will NOT be sent to you.
|
||||
It is very important to make sure you are focused on the right application and window. Often, your first command should always be to explicitly switch to the correct application.
|
||||
When searching the web, use query parameters. For example, https://www.amazon.com/s?k=monitor
|
||||
Try multiple methods before saying the task is impossible. **You can do it!**
|
||||
|
||||
{{
|
||||
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
|
||||
original_stdout = sys.stdout
|
||||
sys.stdout = open(os.devnull, 'w')
|
||||
original_stderr = sys.stderr
|
||||
sys.stderr = open(os.devnull, 'w')
|
||||
|
||||
try:
|
||||
|
||||
import pywinctl
|
||||
|
||||
active_window = pywinctl.getActiveWindow()
|
||||
|
||||
if active_window:
|
||||
app_info = ""
|
||||
|
||||
if "_appName" in active_window.__dict__:
|
||||
app_info += (
|
||||
"Active Application: " + active_window.__dict__["_appName"]
|
||||
)
|
||||
|
||||
if hasattr(active_window, "title"):
|
||||
app_info += "\n" + "Active Window Title: " + active_window.title
|
||||
elif "_winTitle" in active_window.__dict__:
|
||||
app_info += (
|
||||
"\n"
|
||||
+ "Active Window Title:"
|
||||
+ active_window.__dict__["_winTitle"]
|
||||
)
|
||||
|
||||
if app_info != "":
|
||||
print(app_info)
|
||||
except:
|
||||
pass
|
||||
finally:
|
||||
sys.stdout = original_stdout
|
||||
sys.stderr = original_stderr
|
||||
|
||||
}}
|
||||
|
||||
# SKILLS LIBRARY
|
||||
|
||||
This is the skills library. Try to use the following functions to complete your goals WHENEVER POSSIBLE:
|
||||
|
||||
{{
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
|
||||
from interpreter import interpreter
|
||||
from pathlib import Path
|
||||
|
||||
interpreter.model = "gpt-3.5"
|
||||
|
||||
combined_messages = "\\n".join(json.dumps(x) for x in messages[-3:])
|
||||
#query_msg = interpreter.chat(f"This is the conversation so far: {combined_messages}. What is a <10 words query that could be used to find functions that would help answer the user's question?")
|
||||
#query = query_msg[0]['content']
|
||||
query = combined_messages
|
||||
interpreter.computer.skills.path = '''OI_SKILLS_DIR'''
|
||||
|
||||
skills = interpreter.computer.skills.search(query)
|
||||
lowercase_skills = [skill[0].lower() + skill[1:] for skill in skills]
|
||||
output = "\\n".join(lowercase_skills)
|
||||
|
||||
# VERY HACKY! We should fix this, we hard code it for noisy code^:
|
||||
#print("IGNORE_ALL_ABOVE_THIS_LINE")
|
||||
|
||||
print(output)
|
||||
}}
|
||||
|
||||
Remember: You can run Python code outside a function only to run a Python function; all other code must go in a in Python function if you first write a Python function. ALL imports must go inside the function.
|
||||
|
||||
""".strip().replace("OI_SKILLS_DIR", os.path.abspath(os.path.join(os.path.dirname(__file__), "skills")))
|
@ -1,13 +0,0 @@
|
||||
# test_main.py
|
||||
import subprocess
|
||||
import uuid
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
def test_ping(client):
|
||||
response = client.get("/ping")
|
||||
assert response.status_code == 200
|
||||
assert response.text == "pong"
|
@ -1,57 +0,0 @@
|
||||
import os
|
||||
import subprocess
|
||||
import re
|
||||
import shutil
|
||||
import time
|
||||
from ..utils.print_markdown import print_markdown
|
||||
|
||||
def create_tunnel(tunnel_method='bore', server_host='localhost', server_port=8000):
|
||||
print_markdown(f"Exposing server to the internet...")
|
||||
|
||||
if tunnel_method == "bore":
|
||||
try:
|
||||
output = subprocess.check_output('command -v bore', shell=True)
|
||||
except subprocess.CalledProcessError:
|
||||
print("The bore-cli command is not available. Please run 'cargo install bore-cli'.")
|
||||
print("For more information, see https://github.com/ekzhang/bore")
|
||||
exit(1)
|
||||
|
||||
time.sleep(6)
|
||||
output = subprocess.check_output(f'bore local {server_port} --to bore.pub', shell=True)
|
||||
|
||||
for line in output.split('\n'):
|
||||
if "listening at bore.pub:" in line:
|
||||
remote_port = re.search('bore.pub:([0-9]*)', line).group(1)
|
||||
print_markdown(f"Your server is being hosted at the following URL: bore.pub:{remote_port}")
|
||||
break
|
||||
|
||||
|
||||
|
||||
|
||||
elif tunnel_method == "localtunnel":
|
||||
if not subprocess.call('command -v lt', shell=True):
|
||||
print("The 'lt' command is not available.")
|
||||
print("Please ensure you have Node.js installed, then run 'npm install -g localtunnel'.")
|
||||
print("For more information, see https://github.com/localtunnel/localtunnel")
|
||||
exit(1)
|
||||
else:
|
||||
output = subprocess.check_output(f'npx localtunnel --port {server_port}', shell=True)
|
||||
for line in output.split('\n'):
|
||||
if "your url is: https://" in line:
|
||||
remote_url = re.search('https://([a-zA-Z0-9.-]*)', line).group(0).replace('https://', '')
|
||||
print(f"Your server is being hosted at the following URL: {remote_url}")
|
||||
break
|
||||
|
||||
elif tunnel_method == "ngrok":
|
||||
if not subprocess.call('command -v ngrok', shell=True):
|
||||
print("The ngrok command is not available.")
|
||||
print("Please install ngrok using the instructions at https://ngrok.com/docs/getting-started/")
|
||||
exit(1)
|
||||
else:
|
||||
output = subprocess.check_output(f'ngrok http {server_port} --log stdout', shell=True)
|
||||
for line in output.split('\n'):
|
||||
if "started tunnel" in line:
|
||||
remote_url = re.search('https://([a-zA-Z0-9.-]*)', line).group(0).replace('https://', '')
|
||||
print(f"Your server is being hosted at the following URL: {remote_url}")
|
||||
break
|
||||
|
@ -1,56 +0,0 @@
|
||||
from datetime import datetime
|
||||
import os
|
||||
import contextlib
|
||||
import tempfile
|
||||
import ffmpeg
|
||||
import subprocess
|
||||
|
||||
def convert_mime_type_to_format(mime_type: str) -> str:
|
||||
if mime_type == "audio/x-wav" or mime_type == "audio/wav":
|
||||
return "wav"
|
||||
if mime_type == "audio/webm":
|
||||
return "webm"
|
||||
if mime_type == "audio/raw":
|
||||
return "dat"
|
||||
|
||||
return mime_type
|
||||
|
||||
@contextlib.contextmanager
|
||||
def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
|
||||
temp_dir = tempfile.gettempdir()
|
||||
|
||||
# Create a temporary file with the appropriate extension
|
||||
input_ext = convert_mime_type_to_format(mime_type)
|
||||
input_path = os.path.join(temp_dir, f"input_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.{input_ext}")
|
||||
with open(input_path, 'wb') as f:
|
||||
f.write(audio)
|
||||
|
||||
# Check if the input file exists
|
||||
assert os.path.exists(input_path), f"Input file does not exist: {input_path}"
|
||||
|
||||
# Export to wav
|
||||
output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
|
||||
print(mime_type, input_path, output_path)
|
||||
if mime_type == "audio/raw":
|
||||
ffmpeg.input(
|
||||
input_path,
|
||||
f='s16le',
|
||||
ar='16000',
|
||||
ac=1,
|
||||
).output(output_path).run()
|
||||
else:
|
||||
ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
|
||||
|
||||
try:
|
||||
yield output_path
|
||||
finally:
|
||||
os.remove(input_path)
|
||||
|
||||
def run_command(command):
|
||||
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||
return result.stdout, result.stderr
|
||||
|
||||
|
||||
def bytes_to_wav(audio_bytes: bytearray, mime_type):
|
||||
with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path:
|
||||
return wav_file_path
|
@ -1,38 +0,0 @@
|
||||
import os
|
||||
import platform
|
||||
|
||||
|
||||
def get_system_info():
|
||||
system = platform.system()
|
||||
|
||||
if system == "Linux":
|
||||
# Attempt to identify specific Linux distribution
|
||||
distro = "linux" # Default to generic 'linux'
|
||||
try:
|
||||
with open("/etc/os-release") as f:
|
||||
os_release_info = f.read().lower()
|
||||
if "ubuntu" in os_release_info:
|
||||
return "raspberry-pi-ubuntu"
|
||||
elif "raspbian" in os_release_info:
|
||||
return "raspberry-pi-os"
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
# Check for Raspberry Pi hardware
|
||||
try:
|
||||
with open("/proc/device-tree/model") as f:
|
||||
model_info = f.read()
|
||||
if "Raspberry Pi" in model_info:
|
||||
if distro == "ubuntu":
|
||||
return "raspberry-pi-ubuntu"
|
||||
return "raspberry-pi"
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
return distro
|
||||
elif system == "Darwin":
|
||||
return "darwin"
|
||||
elif system == "Windows":
|
||||
return "windows"
|
||||
else:
|
||||
return "unknown"
|
@ -1,71 +0,0 @@
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv() # take environment variables from .env.
|
||||
|
||||
import asyncio
|
||||
import subprocess
|
||||
import platform
|
||||
|
||||
from .logs import setup_logging
|
||||
from .logs import logger
|
||||
setup_logging()
|
||||
|
||||
def get_kernel_messages():
|
||||
"""
|
||||
Is this the way to do this?
|
||||
"""
|
||||
current_platform = platform.system()
|
||||
|
||||
if current_platform == "Darwin":
|
||||
process = subprocess.Popen(['syslog'], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
|
||||
output, _ = process.communicate()
|
||||
return output.decode('utf-8')
|
||||
elif current_platform == "Linux":
|
||||
with open('/var/log/dmesg', 'r') as file:
|
||||
return file.read()
|
||||
else:
|
||||
logger.info("Unsupported platform.")
|
||||
|
||||
def custom_filter(message):
|
||||
# Check for {TO_INTERPRETER{ message here }TO_INTERPRETER} pattern
|
||||
if '{TO_INTERPRETER{' in message and '}TO_INTERPRETER}' in message:
|
||||
start = message.find('{TO_INTERPRETER{') + len('{TO_INTERPRETER{')
|
||||
end = message.find('}TO_INTERPRETER}', start)
|
||||
return message[start:end]
|
||||
# Check for USB mention
|
||||
# elif 'USB' in message:
|
||||
# return message
|
||||
# # Check for network related keywords
|
||||
# elif any(keyword in message for keyword in ['network', 'IP', 'internet', 'LAN', 'WAN', 'router', 'switch']) and "networkStatusForFlags" not in message:
|
||||
|
||||
# return message
|
||||
else:
|
||||
return None
|
||||
|
||||
last_messages = ""
|
||||
|
||||
def check_filtered_kernel():
|
||||
messages = get_kernel_messages()
|
||||
messages.replace(last_messages, "")
|
||||
messages = messages.split("\n")
|
||||
|
||||
filtered_messages = []
|
||||
for message in messages:
|
||||
if custom_filter(message):
|
||||
filtered_messages.append(message)
|
||||
|
||||
return "\n".join(filtered_messages)
|
||||
|
||||
async def put_kernel_messages_into_queue(queue):
|
||||
while True:
|
||||
text = check_filtered_kernel()
|
||||
if text:
|
||||
if isinstance(queue, asyncio.Queue):
|
||||
await queue.put({"role": "computer", "type": "console", "start": True})
|
||||
await queue.put({"role": "computer", "type": "console", "format": "output", "content": text})
|
||||
await queue.put({"role": "computer", "type": "console", "end": True})
|
||||
else:
|
||||
queue.put({"role": "computer", "type": "console", "start": True})
|
||||
queue.put({"role": "computer", "type": "console", "format": "output", "content": text})
|
||||
queue.put({"role": "computer", "type": "console", "end": True})
|
||||
|
||||
await asyncio.sleep(5)
|
@ -1,25 +0,0 @@
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv() # take environment variables from .env.
|
||||
|
||||
import os
|
||||
import logging
|
||||
|
||||
logger: logging.Logger = logging.getLogger("01")
|
||||
root_logger: logging.Logger = logging.getLogger()
|
||||
|
||||
|
||||
def _basic_config() -> None:
|
||||
logging.basicConfig(
|
||||
format="%(message)s"
|
||||
)
|
||||
|
||||
|
||||
def setup_logging() -> None:
|
||||
env = os.environ.get("LOG_LEVEL", "").upper()
|
||||
if env == "DEBUG":
|
||||
_basic_config()
|
||||
logger.setLevel(logging.DEBUG)
|
||||
root_logger.setLevel(logging.DEBUG)
|
||||
elif env == "INFO":
|
||||
_basic_config()
|
||||
logger.setLevel(logging.INFO)
|
@ -1,28 +0,0 @@
|
||||
import os
|
||||
import psutil
|
||||
import signal
|
||||
|
||||
def kill_process_tree():
|
||||
pid = os.getpid() # Get the current process ID
|
||||
try:
|
||||
# Send SIGTERM to the entire process group to ensure all processes are targeted
|
||||
os.killpg(os.getpgid(pid), signal.SIGKILL)
|
||||
parent = psutil.Process(pid)
|
||||
children = parent.children(recursive=True)
|
||||
for child in children:
|
||||
print(f"Forcefully terminating child PID {child.pid}")
|
||||
child.kill() # Forcefully kill the child process immediately
|
||||
gone, still_alive = psutil.wait_procs(children, timeout=3)
|
||||
|
||||
if still_alive:
|
||||
for child in still_alive:
|
||||
print(f"Child PID {child.pid} still alive, attempting another kill")
|
||||
child.kill()
|
||||
|
||||
print(f"Forcefully terminating parent PID {pid}")
|
||||
parent.kill() # Forcefully kill the parent process immediately
|
||||
parent.wait(3) # Wait for the parent process to terminate
|
||||
except psutil.NoSuchProcess:
|
||||
print(f"Process {pid} does not exist or is already terminated")
|
||||
except psutil.AccessDenied:
|
||||
print(f"Permission denied to terminate some processes")
|
@ -1,45 +0,0 @@
|
||||
class Accumulator:
|
||||
def __init__(self):
|
||||
self.template = {"role": None, "type": None, "format": None, "content": None}
|
||||
self.message = self.template
|
||||
|
||||
def accumulate(self, chunk):
|
||||
#print(str(chunk)[:100])
|
||||
if type(chunk) == dict:
|
||||
|
||||
if "format" in chunk and chunk["format"] == "active_line":
|
||||
# We don't do anything with these
|
||||
return None
|
||||
|
||||
if "start" in chunk:
|
||||
self.message = chunk
|
||||
self.message.pop("start")
|
||||
return None
|
||||
|
||||
if "content" in chunk:
|
||||
|
||||
if any(self.message[key] != chunk[key] for key in self.message if key != "content"):
|
||||
self.message = chunk
|
||||
if "content" not in self.message:
|
||||
self.message["content"] = chunk["content"]
|
||||
else:
|
||||
if type(chunk["content"]) == dict:
|
||||
# dict concatenation cannot happen, so we see if chunk is a dict
|
||||
self.message["content"]["content"] += chunk["content"]["content"]
|
||||
else:
|
||||
self.message["content"] += chunk["content"]
|
||||
return None
|
||||
|
||||
if "end" in chunk:
|
||||
# We will proceed
|
||||
message = self.message
|
||||
self.message = self.template
|
||||
return message
|
||||
|
||||
if type(chunk) == bytes:
|
||||
if "content" not in self.message or type(self.message["content"]) != bytes:
|
||||
self.message["content"] = b""
|
||||
self.message["content"] += chunk
|
||||
return None
|
||||
|
||||
|
@ -1,9 +0,0 @@
|
||||
from rich.console import Console
|
||||
from rich.markdown import Markdown
|
||||
|
||||
def print_markdown(markdown_text):
|
||||
console = Console()
|
||||
md = Markdown(markdown_text)
|
||||
print("")
|
||||
console.print(md)
|
||||
print("")
|
File diff suppressed because one or more lines are too long
@ -1,52 +0,0 @@
|
||||
[tool.poetry]
|
||||
name = "01OS"
|
||||
packages = [
|
||||
{include = "01OS"},
|
||||
]
|
||||
include = ["start.py"]
|
||||
version = "0.0.13"
|
||||
description = "The open-source language model computer"
|
||||
authors = ["Killian <killian@openinterpreter.com>"]
|
||||
license = "AGPL"
|
||||
readme = "README.md"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.9,<3.12"
|
||||
pyaudio = "^0.2.14"
|
||||
pynput = "^1.7.6"
|
||||
fastapi = "^0.110.0"
|
||||
uvicorn = "^0.27.1"
|
||||
websockets = "^12.0"
|
||||
python-dotenv = "^1.0.1"
|
||||
ffmpeg-python = "^0.2.0"
|
||||
textual = "^0.50.1"
|
||||
pydub = "^0.25.1"
|
||||
ngrok = "^1.0.0"
|
||||
simpleaudio = "^1.0.4"
|
||||
opencv-python = "^4.9.0.80"
|
||||
open-interpreter = {version = "0.2.1rc2", extras = ["os"]}
|
||||
psutil = "^5.9.8"
|
||||
typer = "^0.9.0"
|
||||
platformdirs = "^4.2.0"
|
||||
rich = "^13.7.1"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.poetry.scripts]
|
||||
01 = "start:app"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
black = "^23.10.1"
|
||||
isort = "^5.12.0"
|
||||
pre-commit = "^3.6.2"
|
||||
pytest = "^8.1.1"
|
||||
|
||||
[tool.black]
|
||||
target-version = ['py311']
|
||||
|
||||
[tool.isort]
|
||||
profile = "black"
|
||||
multi_line_output = 3
|
||||
include_trailing_comma = true
|
@ -1,9 +0,0 @@
|
||||
; Config for Pytest Runner.
|
||||
; suppress Deprecation Warning and User Warning to not spam the interface, but check periodically
|
||||
[pytest]
|
||||
python_files = tests.py test_*.py
|
||||
filterwarnings =
|
||||
ignore::UserWarning
|
||||
ignore::DeprecationWarning
|
||||
log_cli = true
|
||||
log_cli_level = INFO
|
@ -1,145 +0,0 @@
|
||||
import typer
|
||||
import asyncio
|
||||
import platform
|
||||
import concurrent.futures
|
||||
import threading
|
||||
import os
|
||||
import importlib
|
||||
create_tunnel = importlib.import_module(".server.tunnel", package="01OS").create_tunnel
|
||||
import signal
|
||||
app = typer.Typer()
|
||||
|
||||
@app.command()
|
||||
def run(
|
||||
server: bool = typer.Option(False, "--server", help="Run server"),
|
||||
server_host: str = typer.Option("0.0.0.0", "--server-host", help="Specify the server host where the server will deploy"),
|
||||
server_port: int = typer.Option(8000, "--server-port", help="Specify the server port where the server will deploy"),
|
||||
|
||||
tunnel_service: str = typer.Option("bore", "--tunnel-service", help="Specify the tunnel service"),
|
||||
expose: bool = typer.Option(False, "--expose", help="Expose server to internet"),
|
||||
|
||||
client: bool = typer.Option(False, "--client", help="Run client"),
|
||||
server_url: str = typer.Option(None, "--server-url", help="Specify the server URL that the client should expect. Defaults to server-host and server-port"),
|
||||
client_type: str = typer.Option("auto", "--client-type", help="Specify the client type"),
|
||||
|
||||
llm_service: str = typer.Option("litellm", "--llm-service", help="Specify the LLM service"),
|
||||
|
||||
model: str = typer.Option("gpt-4", "--model", help="Specify the model"),
|
||||
llm_supports_vision: bool = typer.Option(False, "--llm-supports-vision", help="Specify if the LLM service supports vision"),
|
||||
llm_supports_functions: bool = typer.Option(False, "--llm-supports-functions", help="Specify if the LLM service supports functions"),
|
||||
context_window: int = typer.Option(2048, "--context-window", help="Specify the context window size"),
|
||||
max_tokens: int = typer.Option(4096, "--max-tokens", help="Specify the maximum number of tokens"),
|
||||
temperature: float = typer.Option(0.8, "--temperature", help="Specify the temperature for generation"),
|
||||
|
||||
tts_service: str = typer.Option("openai", "--tts-service", help="Specify the TTS service"),
|
||||
|
||||
stt_service: str = typer.Option("openai", "--stt-service", help="Specify the STT service"),
|
||||
|
||||
local: bool = typer.Option(False, "--local", help="Use recommended local services for LLM, STT, and TTS"),
|
||||
):
|
||||
|
||||
_run(
|
||||
server=server,
|
||||
server_host=server_host,
|
||||
server_port=server_port,
|
||||
tunnel_service=tunnel_service,
|
||||
expose=expose,
|
||||
client=client,
|
||||
server_url=server_url,
|
||||
client_type=client_type,
|
||||
llm_service=llm_service,
|
||||
model=model,
|
||||
llm_supports_vision=llm_supports_vision,
|
||||
llm_supports_functions=llm_supports_functions,
|
||||
context_window=context_window,
|
||||
max_tokens=max_tokens,
|
||||
temperature=temperature,
|
||||
tts_service=tts_service,
|
||||
stt_service=stt_service,
|
||||
local=local
|
||||
)
|
||||
|
||||
def _run(
|
||||
server: bool = False,
|
||||
server_host: str = "0.0.0.0",
|
||||
server_port: int = 8000,
|
||||
|
||||
tunnel_service: str = "bore",
|
||||
expose: bool = False,
|
||||
|
||||
client: bool = False,
|
||||
server_url: str = None,
|
||||
client_type: str = "auto",
|
||||
|
||||
llm_service: str = "litellm",
|
||||
|
||||
model: str = "gpt-4",
|
||||
llm_supports_vision: bool = False,
|
||||
llm_supports_functions: bool = False,
|
||||
context_window: int = 2048,
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.8,
|
||||
|
||||
tts_service: str = "openai",
|
||||
|
||||
stt_service: str = "openai",
|
||||
|
||||
local: bool = False
|
||||
):
|
||||
|
||||
if local:
|
||||
tts_service = "piper"
|
||||
# llm_service = "llamafile"
|
||||
stt_service = "local-whisper"
|
||||
|
||||
if not server_url:
|
||||
server_url = f"{server_host}:{server_port}"
|
||||
|
||||
if not server and not client:
|
||||
server = True
|
||||
client = True
|
||||
|
||||
def handle_exit(signum, frame):
|
||||
os._exit(0)
|
||||
|
||||
signal.signal(signal.SIGINT, handle_exit)
|
||||
|
||||
if server:
|
||||
main = importlib.import_module(".server.server", package="01OS").main
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
server_thread = threading.Thread(target=loop.run_until_complete, args=(main(server_host, server_port, llm_service, model, llm_supports_vision, llm_supports_functions, context_window, max_tokens, temperature, tts_service, stt_service),))
|
||||
server_thread.start()
|
||||
|
||||
if expose:
|
||||
tunnel_thread = threading.Thread(target=create_tunnel, args=[tunnel_service, server_host, server_port])
|
||||
tunnel_thread.start()
|
||||
|
||||
if client:
|
||||
if client_type == "auto":
|
||||
system_type = platform.system()
|
||||
if system_type == "Darwin": # Mac OS
|
||||
client_type = "mac"
|
||||
elif system_type == "Linux": # Linux System
|
||||
try:
|
||||
with open('/proc/device-tree/model', 'r') as m:
|
||||
if 'raspberry pi' in m.read().lower():
|
||||
client_type = "rpi"
|
||||
else:
|
||||
client_type = "linux"
|
||||
except FileNotFoundError:
|
||||
client_type = "linux"
|
||||
|
||||
module = importlib.import_module(f".clients.{client_type}.device", package='01OS')
|
||||
client_thread = threading.Thread(target=module.main, args=[server_url])
|
||||
client_thread.start()
|
||||
|
||||
try:
|
||||
if server:
|
||||
server_thread.join()
|
||||
if expose:
|
||||
tunnel_thread.join()
|
||||
if client:
|
||||
client_thread.join()
|
||||
except KeyboardInterrupt:
|
||||
os.kill(os.getpid(), signal.SIGINT)
|
@ -1,5 +1,5 @@
|
||||
- [ ] What does 01OS look like when you boot it up?
|
||||
- [ ] What does 01OS look like when it's running?
|
||||
- [ ] What does _01OS look like when you boot it up?
|
||||
- [ ] What does _01OS look like when it's running?
|
||||
- [ ] What does the 01 website look like?
|
||||
|
||||
Awaiting hardware design decisions until hardware team has decided if we're starting from scratch or repurposing.
|
||||
|
Loading…
Reference in new issue