Merge remote-tracking branch 'upstream/main' into u/shivenmian/teach

pull/33/head
Shiven Mian 12 months ago
commit 7469e684d6

5
.gitignore vendored

@ -1,10 +1,13 @@
ggml-*.bin
OS/01/local_tts/*
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
01OS/01OS/server/conversations/user.json
01OS/01OS/server/tts/local_service/*
01OS/01OS/server/stt/local_service/*
# C extensions
*.so

@ -6,7 +6,7 @@
ALL_LOCAL=False
WHISPER_MODEL_NAME="ggml-tiny.en.bin"
# Uncomment and set the OpenAI API key for OpenInterpreter to work
# Uncomment to set your OpenAI API key
# OPENAI_API_KEY=sk-...
# For TTS, we use the en_US-lessac-medium voice model by default
@ -19,17 +19,18 @@ PIPER_VOICE_NAME="en_US-lessac-medium.onnx"
#NGROK_AUTHTOKEN="AUTH TOKEN"
# If SERVER_START, this is where we'll serve the server.
# If DEVICE_START, this is where the device expects the server to be.
# If CLIENT_START, this is where the client expects the server to be.
SERVER_URL=ws://localhost:8000/
# If you are setting up Ngrok then either change the below to Ngrok URL if running device separately, else comment it
SERVER_CONNECTION_URL=ws://localhost:8000/
SERVER_START=True
DEVICE_START=True
CLIENT_START=True
# Control where various operations happen— can be `device` or `server`.
# Explicitly set the client type (macos, rpi)
CLIENT_TYPE=auto
# Control where various operations happen— can be `client` or `server`.
CODE_RUNNER=server
TTS_RUNNER=server # If device, audio will be sent over websocket.
STT_RUNNER=device # If server, audio will be sent over websocket.
TTS_RUNNER=server # If client, audio will be sent over websocket.
STT_RUNNER=client # If server, audio will be sent over websocket.
# Will expose the server publically and display that URL.
SERVER_EXPOSE_PUBLICALLY=False

@ -0,0 +1,242 @@
from dotenv import load_dotenv
load_dotenv() # take environment variables from .env.
import asyncio
import threading
import os
import pyaudio
from starlette.websockets import WebSocket
from queue import Queue
from pynput import keyboard
import json
import traceback
import websockets
import queue
import pydub
import ast
from pydub import AudioSegment
from pydub.playback import play
import io
import time
import wave
import tempfile
from datetime import datetime
from interpreter import interpreter # Just for code execution. Maybe we should let people do from interpreter.computer import run?
from ..server.utils.kernel import put_kernel_messages_into_queue
from ..server.utils.get_system_info import get_system_info
from ..server.stt.stt import stt_wav
from ..server.utils.logs import setup_logging
from ..server.utils.logs import logger
setup_logging()
# Configuration for Audio Recording
CHUNK = 1024 # Record in chunks of 1024 samples
FORMAT = pyaudio.paInt16 # 16 bits per sample
CHANNELS = 1 # Mono
RATE = 44100 # Sample rate
RECORDING = False # Flag to control recording state
SPACEBAR_PRESSED = False # Flag to track spacebar press state
# Specify OS
current_platform = get_system_info()
# Initialize PyAudio
p = pyaudio.PyAudio()
import asyncio
send_queue = queue.Queue()
class Device:
def __init__(self):
pass
def record_audio(self):
if os.getenv('STT_RUNNER') == "server":
# STT will happen on the server. we're sending audio.
send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "start": True})
elif os.getenv('STT_RUNNER') == "client":
# STT will happen here, on the client. we're sending text.
send_queue.put({"role": "user", "type": "message", "start": True})
else:
raise Exception("STT_RUNNER must be set to either 'client' or 'server'.")
"""Record audio from the microphone and add it to the queue."""
stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
logger.info("Recording started...")
global RECORDING
# Create a temporary WAV file to store the audio data
temp_dir = tempfile.gettempdir()
wav_path = os.path.join(temp_dir, f"audio_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
wav_file = wave.open(wav_path, 'wb')
wav_file.setnchannels(CHANNELS)
wav_file.setsampwidth(p.get_sample_size(FORMAT))
wav_file.setframerate(RATE)
while RECORDING:
data = stream.read(CHUNK, exception_on_overflow=False)
wav_file.writeframes(data)
wav_file.close()
stream.stop_stream()
stream.close()
logger.info("Recording stopped.")
duration = wav_file.getnframes() / RATE
if duration < 0.3:
# Just pressed it. Send stop message
if os.getenv('STT_RUNNER') == "client":
send_queue.put({"role": "user", "type": "message", "content": "stop"})
send_queue.put({"role": "user", "type": "message", "end": True})
else:
send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "content": ""})
send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "end": True})
else:
if os.getenv('STT_RUNNER') == "client":
# Run stt then send text
text = stt_wav(wav_path)
send_queue.put({"role": "user", "type": "message", "content": text})
send_queue.put({"role": "user", "type": "message", "end": True})
else:
# Stream audio
with open(wav_path, 'rb') as audio_file:
byte_data = audio_file.read(CHUNK)
while byte_data:
send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "content": str(byte_data)})
byte_data = audio_file.read(CHUNK)
send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "end": True})
if os.path.exists(wav_path):
os.remove(wav_path)
def toggle_recording(self, state):
"""Toggle the recording state."""
global RECORDING, SPACEBAR_PRESSED
if state and not SPACEBAR_PRESSED:
SPACEBAR_PRESSED = True
if not RECORDING:
RECORDING = True
threading.Thread(target=self.record_audio).start()
elif not state and SPACEBAR_PRESSED:
SPACEBAR_PRESSED = False
RECORDING = False
def on_press(self, key):
"""Detect spacebar press."""
if key == keyboard.Key.space:
self.toggle_recording(True)
def on_release(self, key):
"""Detect spacebar release and ESC key press."""
if key == keyboard.Key.space:
self.toggle_recording(False)
elif key == keyboard.Key.esc or (key == keyboard.Key.ctrl and keyboard.Key.c):
logger.info("Exiting...")
os._exit(0)
async def message_sender(self, websocket):
while True:
message = await asyncio.get_event_loop().run_in_executor(None, send_queue.get)
await websocket.send(json.dumps(message))
send_queue.task_done()
async def websocket_communication(self, WS_URL):
while True:
try:
async with websockets.connect(WS_URL) as websocket:
logger.info("Press the spacebar to start/stop recording. Press ESC to exit.")
asyncio.create_task(self.message_sender(websocket))
initial_message = {"role": None, "type": None, "format": None, "content": None}
message_so_far = initial_message
while True:
message = await websocket.recv()
logger.debug(f"Got this message from the server: {type(message)} {message}")
if type(message) == str:
message = json.loads(message)
if message.get("end"):
logger.debug(f"Complete message from the server: {message_so_far}")
logger.info("\n")
message_so_far = initial_message
if "content" in message:
print(message['content'], end="", flush=True)
if any(message_so_far[key] != message[key] for key in message_so_far if key != "content"):
message_so_far = message
else:
message_so_far["content"] += message["content"]
if message["type"] == "audio" and "content" in message:
audio_bytes = bytes(ast.literal_eval(message["content"]))
# Convert bytes to audio file
audio_file = io.BytesIO(audio_bytes)
audio = AudioSegment.from_mp3(audio_file)
# Play the audio
play(audio)
await asyncio.sleep(1)
# Run the code if that's the client's job
if os.getenv('CODE_RUNNER') == "client":
if message["type"] == "code" and "end" in message:
language = message_so_far["format"]
code = message_so_far["content"]
result = interpreter.computer.run(language, code)
send_queue.put(result)
except:
# traceback.print_exc()
logger.info(f"Connecting to `{WS_URL}`...")
await asyncio.sleep(2)
async def start_async(self):
# Configuration for WebSocket
WS_URL = os.getenv('SERVER_URL')
if not WS_URL:
raise ValueError("The environment variable SERVER_URL is not set. Please set it to proceed.")
# Start the WebSocket communication
asyncio.create_task(self.websocket_communication(WS_URL))
# Start watching the kernel if it's your job to do that
if os.getenv('CODE_RUNNER') == "client":
asyncio.create_task(put_kernel_messages_into_queue(send_queue))
# If Raspberry Pi, add the button listener, otherwise use the spacebar
if current_platform.startswith("raspberry-pi"):
logger.info("Raspberry Pi detected, using button on GPIO pin 15")
# Use GPIO pin 15
pindef = ["gpiochip4", "15"] # gpiofind PIN15
print("PINDEF", pindef)
# HACK: needs passwordless sudo
process = await asyncio.create_subprocess_exec("sudo", "gpiomon", "-brf", *pindef, stdout=asyncio.subprocess.PIPE)
while True:
line = await process.stdout.readline()
if line:
line = line.decode().strip()
if "FALLING" in line:
self.toggle_recording(False)
elif "RISING" in line:
self.toggle_recording(True)
else:
break
else:
# Keyboard listener for spacebar press/release
listener = keyboard.Listener(on_press=self.on_press, on_release=self.on_release)
listener.start()
def start(self):
asyncio.run(self.start_async())
p.terminate()

@ -0,0 +1,4 @@
from ..base_device import Device
desktop_device = Device()
desktop_device.start()

@ -0,0 +1,4 @@
from ..base_device import Device
rpi_device = Device()
rpi_device.start()

@ -0,0 +1,8 @@
DEVICE=$(uname -n)
if [[ "$DEVICE" == "rpi" ]]; then
cd 01OS
python -m 01OS.clients.rpi.device &
else
cd 01OS
python -m 01OS.clients.macos.device &
fi

Binary file not shown.

@ -100,7 +100,11 @@ print(output)
json.dump([], file)
### SKILLS
interpreter.computer.skills.skills_dir = Path(__file__).parent / 'skills'
interpreter.computer.skills.import_skills()
try:
interpreter.computer.skills.skills_dir = Path(__file__).parent / 'skills'
interpreter.computer.skills.import_skills()
except:
print("Temporarily skipping skills (OI 0.2.1, which is unreleased) so we can push to `pip`.")
pass
return interpreter

@ -1,34 +1,27 @@
from dotenv import load_dotenv
load_dotenv() # take environment variables from .env.
from starlette.websockets import WebSocketDisconnect
import ast
import json
import time
import queue
import os
import traceback
from queue import Queue
from threading import Thread
import threading
import uvicorn
import re
from fastapi import FastAPI
from fastapi.responses import PlainTextResponse
from threading import Thread
from starlette.websockets import WebSocket
from stt import stt_bytes
from tts import tts
from .stt.stt import stt_bytes
from .tts.tts import tts
from pathlib import Path
import asyncio
import urllib.parse
from utils.kernel import put_kernel_messages_into_queue
from i import configure_interpreter
from .utils.kernel import put_kernel_messages_into_queue
from .i import configure_interpreter
from interpreter import interpreter
import ngrok
from utils.logs import setup_logging
from utils.logs import logger
from .utils.logs import setup_logging
from .utils.logs import logger
setup_logging()

Binary file not shown.

@ -14,8 +14,8 @@ import subprocess
import openai
from openai import OpenAI
from utils.logs import setup_logging
from utils.logs import logger
from ..utils.logs import setup_logging
from ..utils.logs import logger
setup_logging()
client = OpenAI()
@ -56,18 +56,19 @@ def run_command(command):
return result.stdout, result.stderr
def get_transcription_file(wav_file_path: str):
whisper_rust_path = os.path.join(os.path.dirname(__file__), 'local_stt', 'whisper-rust')
local_path = os.path.join(os.path.dirname(__file__), 'local_service')
whisper_rust_path = os.path.join(local_path, 'whisper-rust')
model_name = os.getenv('WHISPER_MODEL_NAME')
if not model_name:
raise EnvironmentError("WHISPER_MODEL_NAME environment variable is not set.")
output, error = run_command([
os.path.join(whisper_rust_path, 'whisper-rust'),
'--model-path', os.path.join(whisper_rust_path, model_name),
'--model-path', os.path.join(local_path, model_name),
'--file-path', wav_file_path
])
print("Exciting transcription result:", output)
print("Transcription result:", output)
return output
def get_transcription_bytes(audio_bytes: bytearray, mime_type):

@ -2,17 +2,18 @@
Defines a function which takes text and returns a path to an audio file.
"""
from pydub import AudioSegment
from dotenv import load_dotenv
load_dotenv() # take environment variables from .env.
import tempfile
from openai import OpenAI
from pydub import AudioSegment
from pydub.playback import play
from playsound import playsound
import os
import subprocess
import tempfile
from pydub import AudioSegment
from pydub.playback import play
import simpleaudio as sa
client = OpenAI()
@ -28,13 +29,14 @@ def tts(text, play_audio):
response.stream_to_file(temp_file.name)
if play_audio:
playsound(temp_file.name)
audio = AudioSegment.from_mp3(temp_file.name)
play_audiosegment(audio)
return temp_file.read()
else:
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
output_file = temp_file.name
piper_dir = os.path.join(os.path.dirname(__file__), 'local_tts', 'piper')
piper_dir = os.path.join(os.path.dirname(__file__), 'local_service', 'piper')
subprocess.run([
os.path.join(piper_dir, 'piper'),
'--model', os.path.join(piper_dir, os.getenv('PIPER_VOICE_NAME')),
@ -42,5 +44,32 @@ def tts(text, play_audio):
], input=text, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if play_audio:
playsound(temp_file.name)
audio = AudioSegment.from_wav(temp_file.name)
play_audiosegment(audio)
return temp_file.read()
def play_audiosegment(audio):
"""
the default makes some pops. this fixes that
"""
# Apply a fade-out (optional but recommended to smooth the end)
audio = audio.fade_out(500)
# Add silence at the end
silence_duration_ms = 500 # Duration of silence in milliseconds
silence = AudioSegment.silent(duration=silence_duration_ms)
audio_with_padding = audio + silence
# Save the modified audio as a WAV file for compatibility with simpleaudio
audio_with_padding.export("output_audio.wav", format="wav")
# Load the processed WAV file
wave_obj = sa.WaveObject.from_wave_file("output_audio.wav")
# Play the audio
play_obj = wave_obj.play()
# Wait for the playback to finish
play_obj.wait_done()

@ -5,8 +5,8 @@ import asyncio
import subprocess
import platform
from utils.logs import setup_logging
from utils.logs import logger
from .logs import setup_logging
from .logs import logger
setup_logging()
def get_kernel_messages():

@ -0,0 +1,33 @@
The open-source language model computer.
```bash
pip install 01OS
```
```bash
01 # This will run a server + attempt to determine and run a client.
# (Behavior can be modified by changing the contents of `.env`)
```
**Expose an 01 server publically:**
```bash
01 --server --expose # This will print a URL that a client can point to.
```
**Run a specific client:**
```bash
01 --client macos # Options: macos, rpi
```
**Run locally:**
The current default uses OpenAI's services.
The `--local` flag will install and run the [whisper.cpp](https://github.com/ggerganov/whisper.cpp) STT and [Piper](https://github.com/rhasspy/piper) TTS models.
```bash
01 --local # Local client and server
01 --local --server --expose # Expose a local server
```

Binary file not shown.

3517
01OS/poetry.lock generated

File diff suppressed because it is too large Load Diff

@ -0,0 +1,34 @@
[tool.poetry]
name = "01OS"
packages = [
{include = "01OS"},
]
include = [".env.example", "start.py", "start.sh"]
version = "0.0.2"
description = "The open-source language model computer"
authors = ["Killian <killian@openinterpreter.com>"]
license = "AGPL"
readme = "README.md"
[tool.poetry.dependencies]
python = ">=3.9,<3.12"
asyncio = "^3.4.3"
pyaudio = "^0.2.14"
pynput = "^1.7.6"
fastapi = "^0.109.2"
uvicorn = "^0.27.1"
websockets = "^12.0"
python-dotenv = "^1.0.1"
ffmpeg-python = "^0.2.0"
textual = "^0.50.1"
pydub = "^0.25.1"
ngrok = "^1.0.0"
open-interpreter = "^0.2.0"
simpleaudio = "^1.0.4"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
[tool.poetry.scripts]
01 = "start:main"

@ -0,0 +1,23 @@
"""
This is just for the Python package we need a Python entrypoint.
Just starts `start.sh` with all the same command line arguments. Aliased to 01.
"""
import os
import subprocess
import sys
def main():
# Get command line arguments
args = sys.argv[1:]
# Get the directory of the current script
dir_path = os.path.dirname(os.path.realpath(__file__))
# Prepare the command
command = [os.path.join(dir_path, 'start.sh')] + args
# Start start.sh with the command line arguments
subprocess.run(command, check=True)

@ -0,0 +1,172 @@
#!/usr/bin/env bash
### Import Environment Variables from .env
SCRIPT_DIR="$(dirname "$0")"
if [ ! -f "$SCRIPT_DIR/.env" ]; then
echo "No .env file found. Copying from .env.example..."
cp "$SCRIPT_DIR/.env.example" "$SCRIPT_DIR/.env"
fi
set -a; source "$SCRIPT_DIR/.env"; set +a
### COMMAND LINE ARGUMENTS
# Set both SERVER_START and CLIENT_START to False if "--server" or "--client" is passed as an argument
# (This way, --server runs only the server, --client runs only the client.)
if [[ "$@" == *"--server"* ]] || [[ "$@" == *"--client"* ]]; then
export SERVER_START="False"
export CLIENT_START="False"
fi
# Check if "--local" is passed as an argument
if [[ "$@" == *"--local"* ]]; then
# If "--local" is passed, set ALL_LOCAL to True
export ALL_LOCAL="True"
fi
# Check if "--server" is passed as an argument
if [[ "$@" == *"--server"* ]]; then
# If "--server" is passed, set SERVER_START to True
export SERVER_START="True"
fi
# Check if "--client" is passed as an argument
if [[ "$@" == *"--client"* ]]; then
# If "--client" is passed, set CLIENT_START to True
export CLIENT_START="True"
# Extract the client type from the arguments
CLIENT_TYPE=$(echo "$@" | sed -n -e 's/^.*--client //p' | awk '{print $1}')
# If client type is not empty, export it
if [[ ! -z "$CLIENT_TYPE" ]]; then
export CLIENT_TYPE
fi
fi
# Check if "--expose" is passed as an argument
if [[ "$@" == *"--expose"* ]]; then
# If "--expose" is passed, set SERVER_EXPOSE_PUBLICALLY to True
export SERVER_EXPOSE_PUBLICALLY="True"
fi
### SETUP
if [[ "$ALL_LOCAL" == "True" ]]; then
# if using local models, install the models / executables
## WHISPER
WHISPER_MODEL_URL="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/"
WHISPER_PATH="$SCRIPT_DIR/01OS/server/stt/local_service"
if [[ ! -f "${WHISPER_PATH}/${WHISPER_MODEL_NAME}" ]]; then
mkdir -p "${WHISPER_PATH}"
curl -L "${WHISPER_MODEL_URL}${WHISPER_MODEL_NAME}" -o "${WHISPER_PATH}/${WHISPER_MODEL_NAME}"
fi
## PIPER
PIPER_FILE_PATH="$SCRIPT_DIR/01OS/server/tts/local_service${PIPER_URL}${PIPER_ASSETNAME}"
if [[ ! -f "$PIPER_FILE_PATH" ]]; then
mkdir -p "${PIPER_FILE_PATH}"
OS=$(uname -s)
ARCH=$(uname -m)
if [ "$OS" = "Darwin" ]; then
OS="macos"
if [ "$ARCH" = "arm64" ]; then
ARCH="aarch64"
elif [ "$ARCH" = "x86_64" ]; then
ARCH="x64"
else
echo "Piper: unsupported architecture"
fi
fi
PIPER_ASSETNAME="piper_${OS}_${ARCH}.tar.gz"
PIPER_URL="https://github.com/rhasspy/piper/releases/latest/download/"
# Save the current working directory
CWD=$(pwd)
# Navigate to SCRIPT_DIR/01OS/server/tts/local_service
cd $SCRIPT_DIR/01OS/server/tts/local_service
curl -L "${PIPER_URL}${PIPER_ASSETNAME}" -o "${PIPER_ASSETNAME}"
tar -xvzf $PIPER_ASSETNAME
cd piper
if [ "$OS" = "macos" ]; then
if [ "$ARCH" = "x64" ]; then
softwareupdate --install-rosetta --agree-to-license
fi
PIPER_PHONEMIZE_ASSETNAME="piper-phonemize_${OS}_${ARCH}.tar.gz"
PIPER_PHONEMIZE_URL="https://github.com/rhasspy/piper-phonemize/releases/latest/download/"
curl -OL "${PIPER_PHONEMIZE_URL}${PIPER_PHONEMIZE_ASSETNAME}"
tar -xvzf $PIPER_PHONEMIZE_ASSETNAME
curl -OL "${PIPER_VOICE_URL}${PIPER_VOICE_NAME}"
curl -OL "${PIPER_VOICE_URL}${PIPER_VOICE_NAME}.json"
PIPER_DIR=`pwd`
install_name_tool -change @rpath/libespeak-ng.1.dylib "${PIPER_DIR}/piper-phonemize/lib/libespeak-ng.1.dylib" "${PIPER_DIR}/piper"
install_name_tool -change @rpath/libonnxruntime.1.14.1.dylib "${PIPER_DIR}/piper-phonemize/lib/libonnxruntime.1.14.1.dylib" "${PIPER_DIR}/piper"
install_name_tool -change @rpath/libpiper_phonemize.1.dylib "${PIPER_DIR}/piper-phonemize/lib/libpiper_phonemize.1.dylib" "${PIPER_DIR}/piper"
fi
# Navigate back to the current working directory
cd $CWD
fi
fi
### START
start_client() {
echo "Starting client..."
bash 01OS/clients/start.sh &
CLIENT_PID=$!
echo "client started as process $CLIENT_PID"
}
# Function to start server
start_server() {
echo "Starting server..."
python -m 01OS.server.server &
SERVER_PID=$!
echo "Server started as process $SERVER_PID"
}
stop_processes() {
if [[ -n $CLIENT_PID ]]; then
echo "Stopping client..."
kill $CLIENT_PID
fi
if [[ -n $SERVER_PID ]]; then
echo "Stopping server..."
kill $SERVER_PID
fi
}
# Trap SIGINT and SIGTERM to stop processes when the script is terminated
trap stop_processes SIGINT SIGTERM
# SERVER
# Start server if SERVER_START is True
if [[ "$SERVER_START" == "True" ]]; then
start_server
fi
# CLIENT
# Start client if CLIENT_START is True
if [[ "$CLIENT_START" == "True" ]]; then
start_client
fi
# Wait for client and server processes to exit
wait $CLIENT_PID
wait $SERVER_PID
# TTS, STT
# (todo)
# (i think we should start with hosted services)
# LLM
# (disabled, we'll start with hosted services)
# python core/llm/start.py &

1
OS/01/.gitignore vendored

@ -1 +0,0 @@
conversations/user.json

@ -1,14 +0,0 @@
git+https://github.com/KillianLucas/open-interpreter.git
asyncio
PyAudio
pynput
fastapi
uvicorn
websockets
playsound
python-dotenv
ffmpeg-python
textual
pydub
python-dotenv
ngrok

@ -1,123 +0,0 @@
#!/usr/bin/env bash
### Import Environment Variables from .env
if [ ! -f ".env" ]; then
echo "Error: .env file does not exist. To create one, see .env.example for an example."
exit 1
fi
set -a; source .env; set +a
### SETUP
if [[ "$ALL_LOCAL" == "True" ]]; then
# if using local models, install the models / executables
WHISPER_MODEL_URL="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/"
WHISPER_RUST_PATH="`pwd`/local_stt/whisper-rust"
curl -OL "${WHISPER_MODEL_URL}${WHISPER_MODEL_NAME}" --output-dir ${WHISPER_RUST_PATH}
OS=$(uname -s)
ARCH=$(uname -m)
if [ "$OS" = "Darwin" ]; then
OS="macos"
if [ "$ARCH" = "arm64" ]; then
ARCH="aarch64"
elif [ "$ARCH" = "x86_64" ]; then
ARCH="x64"
else
echo "Piper: unsupported architecture"
fi
fi
PIPER_ASSETNAME="piper_${OS}_${ARCH}.tar.gz"
PIPER_URL="https://github.com/rhasspy/piper/releases/latest/download/"
mkdir local_tts
cd local_tts
curl -OL "${PIPER_URL}${PIPER_ASSETNAME}"
tar -xvzf $PIPER_ASSETNAME
cd piper
curl -OL "${PIPER_VOICE_URL}${PIPER_VOICE_NAME}"
curl -OL "${PIPER_VOICE_URL}${PIPER_VOICE_NAME}.json"
if [ "$OS" = "macos" ]; then
if [ "$ARCH" = "x64" ]; then
softwareupdate --install-rosetta --agree-to-license
fi
PIPER_PHONEMIZE_ASSETNAME="piper-phonemize_${OS}_${ARCH}.tar.gz"
PIPER_PHONEMIZE_URL="https://github.com/rhasspy/piper-phonemize/releases/latest/download/"
curl -OL "${PIPER_PHONEMIZE_URL}${PIPER_PHONEMIZE_ASSETNAME}"
tar -xvzf $PIPER_PHONEMIZE_ASSETNAME
PIPER_DIR=`pwd`
install_name_tool -change @rpath/libespeak-ng.1.dylib "${PIPER_DIR}/piper-phonemize/lib/libespeak-ng.1.dylib" "${PIPER_DIR}/piper"
install_name_tool -change @rpath/libonnxruntime.1.14.1.dylib "${PIPER_DIR}/piper-phonemize/lib/libonnxruntime.1.14.1.dylib" "${PIPER_DIR}/piper"
install_name_tool -change @rpath/libpiper_phonemize.1.dylib "${PIPER_DIR}/piper-phonemize/lib/libpiper_phonemize.1.dylib" "${PIPER_DIR}/piper"
fi
cd ../..
fi
# (for dev, reset the ports we were using)
SERVER_PORT=$(echo $SERVER_URL | grep -oE "[0-9]+")
if [ -n "$SERVER_PORT" ]; then
lsof -ti tcp:$SERVER_PORT | xargs kill 2>/dev/null || true
fi
### START
start_device() {
echo "Starting device..."
if [[ -n $NGROK_AUTHTOKEN ]]; then
echo "Waiting for Ngrok to setup"
sleep 7
read -p "Enter the Ngrok URL: " ngrok_url
export SERVER_CONNECTION_URL=$ngrok_url
echo "SERVER_CONNECTION_URL set to $SERVER_CONNECTION_URL"
fi
python device.py &
DEVICE_PID=$!
echo "Device started as process $DEVICE_PID"
}
# Function to start server
start_server() {
echo "Starting server..."
python server.py &
SERVER_PID=$!
echo "Server started as process $SERVER_PID"
}
stop_processes() {
if [[ -n $DEVICE_PID ]]; then
echo "Stopping device..."
kill $DEVICE_PID
fi
if [[ -n $SERVER_PID ]]; then
echo "Stopping server..."
kill $SERVER_PID
fi
}
# Trap SIGINT and SIGTERM to stop processes when the script is terminated
trap stop_processes SIGINT SIGTERM
# SERVER
# Start server if SERVER_START is True
if [[ "$SERVER_START" == "True" ]]; then
start_server
fi
# DEVICE
# Start device if DEVICE_START is True
if [[ "$DEVICE_START" == "True" ]]; then
start_device
fi
# Wait for device and server processes to exit
wait $DEVICE_PID
wait $SERVER_PID
# TTS, STT
# (todo)
# (i think we should start with hosted services)
# LLM
# (disabled, we'll start with hosted services)
# python core/llm/start.py &

@ -1,81 +0,0 @@
# New: The 8th Architecture
```
/01
start.sh # entrypoint, runs server, device, llm
server.py # uses tts and stt if it must, exposes "/"
device.py # also uses tts and stt, hits "/"
llm.py # starts an openai-compatible server
model.llamafile
i.py # creates an interpreter which server just imports
tts.py
stt.py
/conversations
user.json
/skills # files in here will run in the 01's interpreter
schedule.py
...
```
This is flatter and simpler.
**Device** handles the device — i.e. everything the user interacts + watching the kernel + running code (which produces `computer` LMC messages) if `DEVICE_EXECUTE_CODE` is true. Runs TTS and STT, sends LMC messages to "/".
**Server** serves "/", a websocket that accepts `user` LMC messages and sends back `assistant` LMC messages. Runs code (which produces `computer` LMC messages) if `SERVER_EXECUTE_CODE` is true.
**Llm** starts an OpenAI-compatible server with `model.llamafile`. Downloads a heavily quantized Phi-2 if `model.llamafile` doesn't exist.
**I** creates an `interpreter` object. This is where you configure the 01's behavior.
# What is this?
This is the operating system that powers the 01.
# No, I mean what's this folder?
It's the `diff` between 01OS and Ubuntu.
01OS should be a customized version of Linux. Ubuntu is popular, stable, runs on lots of different hardware. **(open question: Should this be Xubuntu, which is lighter? or something else?)**
We want to _build on_ Ubuntu by customizing the stable branch programatically, not by forking it — which would mean we'd have to maintain the underlying OS, merge in security patches, etc. Yuck.
This folder contains everything we want to change from the base Ubuntu. A folder here represents a folder added/modified at the `root`. You can think of it like the `diff` between 01OS and Ubuntu.
I imagine we'll use something like Cubic to then press this + Ubuntu into an ISO image.
# Setup & Usage
Clone this repo, then run `OS/01/start.sh`.
# Structure
### `start.sh`
The start script's job is to start the `core` and the `app` (in full-screen mode).
### `/core`
The `core`'s job is to:
1. Set up the language model
2. Set up the interpreter
3. Serve the interpreter at "/"
### `/app`
The `app`'s job is to be the interface between the user and the interpreter (text in). This could be text only, audio, video, who knows, but it becomes LMC messages or plain text.
For the first version, I think we should just handle audio in/out. So the `app`'s job here is to:
1. Be a fullscreen app for the user to use 01
2. Turn the user's speech into text and send it to "/"
3. Turn the interpreter's text into speech and play it for the user
### Changes to Linux
We need to make the following changes:
1. Modify the bootloader to just show white circle on black
2. Auto start the start script, `start.sh`
3. Put detectors everywhere, which will put [LMC Messages](https://docs.openinterpreter.com/protocols/lmc-messages) from the computer into `/01/core/queue`. Michael suggested we simply watch and filter the `dmesg` stream (I think that's what it's called?), so I suppose we could have a script like `/01/core/kernel_watcher.py` that puts things into the queue? Honestly knowing we could get it all from one place like that— maybe this should be simpler. Is the queue necessary? How about we just expect the computer to send computer messages to the websocket at `/`? Then yeah, maybe we do have redis there, then instead of looking at that folder, we check the redis queue...
4. (open question: should we do this? do we want the first 01 to be ready for GUI control?) Make the display that's shown to the user (and filled with the `app`) the _secondary_ display. The primary display will be a normal Ubuntu desktop, invisible to the user. Why? So the interpreter can control the primary display "under the hood".

@ -1,6 +1,8 @@
# ○
Official repository for [The 01 Project](https://twitter.com/hellokillian/status/1745875973583896950).
Official pre-release repository for [The 01 Project](https://twitter.com/hellokillian/status/1745875973583896950).
> **11** days remaining until launch
<br>
@ -8,33 +10,70 @@ Official repository for [The 01 Project](https://twitter.com/hellokillian/status
<br>
## Configuration:
Copy the OS/01/.env.example file to OS/01/.env and then configure the environment variables within the file.
## Install Required Libraries:
## Install dependencies:
```bash
# MacOS
brew install portaudio ffmpeg
# Ubuntu
sudo apt-get install portaudio19-dev libav-tools
sudo apt-get install portaudio19-dev ffmpeg
```
## Setup for usage (experimental):
```bash
pip install 01OS
```
**Run the 01 end-to-end:**
```bash
01 # This will run a server + attempt to determine and run a client.
# (Behavior can be modified by changing the contents of `.env`)
```
**Expose an 01 server publically:**
```bash
01 --server --expose # This will print a URL that a client can point to.
```
**Run a specific client:**
```bash
01 --client macos # Options: macos, rpi
```
**Run locally:**
The current default uses OpenAI's services.
The `--local` flag will install and run the [whisper.cpp](https://github.com/ggerganov/whisper.cpp) STT and [Piper](https://github.com/rhasspy/piper) TTS models.
```bash
python -m pip install -r requirements.txt
01 --local # Local client and server
01 --local --server --expose # Expose a local server
```
NB: Depending on your local Python version, you may run into [this issue↗](https://github.com/TaylorSMarks/playsound/issues/150) installing playsound. Workarounds are provided in the issue.
## Usage
<br>
## Setup for development:
```bash
cd OS/01
bash start.sh
# Clone the repo, cd into the 01OS directory
git clone https://github.com/KillianLucas/01.git
cd 01OS
# Install dependencies, run the commands above
poetry install
poetry run 01
```
If you want to run local text-to-speech and speech-to-text, set `ALL_LOCAL` in the `start.sh` script to True. This will use the [whisper.cpp](https://github.com/ggerganov/whisper.cpp) and [Piper](https://github.com/rhasspy/piper) models.
**Configuration:**
Copy the `01OS/.env.example` file to `01OS/.env` then configure the environment variables within the file.
<br>
## Background
@ -60,14 +99,3 @@ What we're going to do.
What the 01 will be able to do.
<br>
## Project Management
### [Tasks ↗](https://github.com/KillianLucas/01/blob/main/TASKS.md)
Our master task list.
<br>
> **13** days remaining until launch

Loading…
Cancel
Save