commit
7469e684d6
@ -0,0 +1,242 @@
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv() # take environment variables from .env.
|
||||
|
||||
import asyncio
|
||||
import threading
|
||||
import os
|
||||
import pyaudio
|
||||
from starlette.websockets import WebSocket
|
||||
from queue import Queue
|
||||
from pynput import keyboard
|
||||
import json
|
||||
import traceback
|
||||
import websockets
|
||||
import queue
|
||||
import pydub
|
||||
import ast
|
||||
from pydub import AudioSegment
|
||||
from pydub.playback import play
|
||||
import io
|
||||
import time
|
||||
import wave
|
||||
import tempfile
|
||||
from datetime import datetime
|
||||
from interpreter import interpreter # Just for code execution. Maybe we should let people do from interpreter.computer import run?
|
||||
from ..server.utils.kernel import put_kernel_messages_into_queue
|
||||
from ..server.utils.get_system_info import get_system_info
|
||||
from ..server.stt.stt import stt_wav
|
||||
|
||||
from ..server.utils.logs import setup_logging
|
||||
from ..server.utils.logs import logger
|
||||
setup_logging()
|
||||
|
||||
# Configuration for Audio Recording
|
||||
CHUNK = 1024 # Record in chunks of 1024 samples
|
||||
FORMAT = pyaudio.paInt16 # 16 bits per sample
|
||||
CHANNELS = 1 # Mono
|
||||
RATE = 44100 # Sample rate
|
||||
RECORDING = False # Flag to control recording state
|
||||
SPACEBAR_PRESSED = False # Flag to track spacebar press state
|
||||
|
||||
# Specify OS
|
||||
current_platform = get_system_info()
|
||||
|
||||
# Initialize PyAudio
|
||||
p = pyaudio.PyAudio()
|
||||
|
||||
import asyncio
|
||||
|
||||
send_queue = queue.Queue()
|
||||
|
||||
class Device:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def record_audio(self):
|
||||
|
||||
if os.getenv('STT_RUNNER') == "server":
|
||||
# STT will happen on the server. we're sending audio.
|
||||
send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "start": True})
|
||||
elif os.getenv('STT_RUNNER') == "client":
|
||||
# STT will happen here, on the client. we're sending text.
|
||||
send_queue.put({"role": "user", "type": "message", "start": True})
|
||||
else:
|
||||
raise Exception("STT_RUNNER must be set to either 'client' or 'server'.")
|
||||
|
||||
"""Record audio from the microphone and add it to the queue."""
|
||||
stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
|
||||
logger.info("Recording started...")
|
||||
global RECORDING
|
||||
|
||||
# Create a temporary WAV file to store the audio data
|
||||
temp_dir = tempfile.gettempdir()
|
||||
wav_path = os.path.join(temp_dir, f"audio_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
|
||||
wav_file = wave.open(wav_path, 'wb')
|
||||
wav_file.setnchannels(CHANNELS)
|
||||
wav_file.setsampwidth(p.get_sample_size(FORMAT))
|
||||
wav_file.setframerate(RATE)
|
||||
|
||||
while RECORDING:
|
||||
data = stream.read(CHUNK, exception_on_overflow=False)
|
||||
wav_file.writeframes(data)
|
||||
|
||||
wav_file.close()
|
||||
stream.stop_stream()
|
||||
stream.close()
|
||||
logger.info("Recording stopped.")
|
||||
|
||||
duration = wav_file.getnframes() / RATE
|
||||
if duration < 0.3:
|
||||
# Just pressed it. Send stop message
|
||||
if os.getenv('STT_RUNNER') == "client":
|
||||
send_queue.put({"role": "user", "type": "message", "content": "stop"})
|
||||
send_queue.put({"role": "user", "type": "message", "end": True})
|
||||
else:
|
||||
send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "content": ""})
|
||||
send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "end": True})
|
||||
else:
|
||||
if os.getenv('STT_RUNNER') == "client":
|
||||
# Run stt then send text
|
||||
text = stt_wav(wav_path)
|
||||
send_queue.put({"role": "user", "type": "message", "content": text})
|
||||
send_queue.put({"role": "user", "type": "message", "end": True})
|
||||
else:
|
||||
# Stream audio
|
||||
with open(wav_path, 'rb') as audio_file:
|
||||
byte_data = audio_file.read(CHUNK)
|
||||
while byte_data:
|
||||
send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "content": str(byte_data)})
|
||||
byte_data = audio_file.read(CHUNK)
|
||||
send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "end": True})
|
||||
|
||||
if os.path.exists(wav_path):
|
||||
os.remove(wav_path)
|
||||
|
||||
def toggle_recording(self, state):
|
||||
"""Toggle the recording state."""
|
||||
global RECORDING, SPACEBAR_PRESSED
|
||||
if state and not SPACEBAR_PRESSED:
|
||||
SPACEBAR_PRESSED = True
|
||||
if not RECORDING:
|
||||
RECORDING = True
|
||||
threading.Thread(target=self.record_audio).start()
|
||||
elif not state and SPACEBAR_PRESSED:
|
||||
SPACEBAR_PRESSED = False
|
||||
RECORDING = False
|
||||
|
||||
def on_press(self, key):
|
||||
"""Detect spacebar press."""
|
||||
if key == keyboard.Key.space:
|
||||
self.toggle_recording(True)
|
||||
|
||||
def on_release(self, key):
|
||||
"""Detect spacebar release and ESC key press."""
|
||||
if key == keyboard.Key.space:
|
||||
self.toggle_recording(False)
|
||||
elif key == keyboard.Key.esc or (key == keyboard.Key.ctrl and keyboard.Key.c):
|
||||
logger.info("Exiting...")
|
||||
os._exit(0)
|
||||
|
||||
async def message_sender(self, websocket):
|
||||
while True:
|
||||
message = await asyncio.get_event_loop().run_in_executor(None, send_queue.get)
|
||||
await websocket.send(json.dumps(message))
|
||||
send_queue.task_done()
|
||||
|
||||
async def websocket_communication(self, WS_URL):
|
||||
while True:
|
||||
try:
|
||||
async with websockets.connect(WS_URL) as websocket:
|
||||
logger.info("Press the spacebar to start/stop recording. Press ESC to exit.")
|
||||
asyncio.create_task(self.message_sender(websocket))
|
||||
|
||||
initial_message = {"role": None, "type": None, "format": None, "content": None}
|
||||
message_so_far = initial_message
|
||||
|
||||
while True:
|
||||
message = await websocket.recv()
|
||||
|
||||
logger.debug(f"Got this message from the server: {type(message)} {message}")
|
||||
|
||||
if type(message) == str:
|
||||
message = json.loads(message)
|
||||
|
||||
if message.get("end"):
|
||||
logger.debug(f"Complete message from the server: {message_so_far}")
|
||||
logger.info("\n")
|
||||
message_so_far = initial_message
|
||||
|
||||
if "content" in message:
|
||||
print(message['content'], end="", flush=True)
|
||||
if any(message_so_far[key] != message[key] for key in message_so_far if key != "content"):
|
||||
message_so_far = message
|
||||
else:
|
||||
message_so_far["content"] += message["content"]
|
||||
|
||||
if message["type"] == "audio" and "content" in message:
|
||||
audio_bytes = bytes(ast.literal_eval(message["content"]))
|
||||
|
||||
# Convert bytes to audio file
|
||||
audio_file = io.BytesIO(audio_bytes)
|
||||
audio = AudioSegment.from_mp3(audio_file)
|
||||
|
||||
# Play the audio
|
||||
play(audio)
|
||||
|
||||
await asyncio.sleep(1)
|
||||
|
||||
# Run the code if that's the client's job
|
||||
if os.getenv('CODE_RUNNER') == "client":
|
||||
if message["type"] == "code" and "end" in message:
|
||||
language = message_so_far["format"]
|
||||
code = message_so_far["content"]
|
||||
result = interpreter.computer.run(language, code)
|
||||
send_queue.put(result)
|
||||
|
||||
|
||||
except:
|
||||
# traceback.print_exc()
|
||||
logger.info(f"Connecting to `{WS_URL}`...")
|
||||
await asyncio.sleep(2)
|
||||
|
||||
async def start_async(self):
|
||||
# Configuration for WebSocket
|
||||
WS_URL = os.getenv('SERVER_URL')
|
||||
if not WS_URL:
|
||||
raise ValueError("The environment variable SERVER_URL is not set. Please set it to proceed.")
|
||||
|
||||
# Start the WebSocket communication
|
||||
asyncio.create_task(self.websocket_communication(WS_URL))
|
||||
|
||||
# Start watching the kernel if it's your job to do that
|
||||
if os.getenv('CODE_RUNNER') == "client":
|
||||
asyncio.create_task(put_kernel_messages_into_queue(send_queue))
|
||||
|
||||
|
||||
# If Raspberry Pi, add the button listener, otherwise use the spacebar
|
||||
if current_platform.startswith("raspberry-pi"):
|
||||
logger.info("Raspberry Pi detected, using button on GPIO pin 15")
|
||||
# Use GPIO pin 15
|
||||
pindef = ["gpiochip4", "15"] # gpiofind PIN15
|
||||
print("PINDEF", pindef)
|
||||
|
||||
# HACK: needs passwordless sudo
|
||||
process = await asyncio.create_subprocess_exec("sudo", "gpiomon", "-brf", *pindef, stdout=asyncio.subprocess.PIPE)
|
||||
while True:
|
||||
line = await process.stdout.readline()
|
||||
if line:
|
||||
line = line.decode().strip()
|
||||
if "FALLING" in line:
|
||||
self.toggle_recording(False)
|
||||
elif "RISING" in line:
|
||||
self.toggle_recording(True)
|
||||
else:
|
||||
break
|
||||
else:
|
||||
# Keyboard listener for spacebar press/release
|
||||
listener = keyboard.Listener(on_press=self.on_press, on_release=self.on_release)
|
||||
listener.start()
|
||||
|
||||
def start(self):
|
||||
asyncio.run(self.start_async())
|
||||
p.terminate()
|
@ -0,0 +1,4 @@
|
||||
from ..base_device import Device
|
||||
|
||||
desktop_device = Device()
|
||||
desktop_device.start()
|
@ -0,0 +1,4 @@
|
||||
from ..base_device import Device
|
||||
|
||||
rpi_device = Device()
|
||||
rpi_device.start()
|
@ -0,0 +1,8 @@
|
||||
DEVICE=$(uname -n)
|
||||
if [[ "$DEVICE" == "rpi" ]]; then
|
||||
cd 01OS
|
||||
python -m 01OS.clients.rpi.device &
|
||||
else
|
||||
cd 01OS
|
||||
python -m 01OS.clients.macos.device &
|
||||
fi
|
Binary file not shown.
@ -1,34 +1,27 @@
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv() # take environment variables from .env.
|
||||
|
||||
from starlette.websockets import WebSocketDisconnect
|
||||
import ast
|
||||
import json
|
||||
import time
|
||||
import queue
|
||||
import os
|
||||
import traceback
|
||||
from queue import Queue
|
||||
from threading import Thread
|
||||
import threading
|
||||
import uvicorn
|
||||
import re
|
||||
from fastapi import FastAPI
|
||||
from fastapi.responses import PlainTextResponse
|
||||
from threading import Thread
|
||||
from starlette.websockets import WebSocket
|
||||
from stt import stt_bytes
|
||||
from tts import tts
|
||||
from .stt.stt import stt_bytes
|
||||
from .tts.tts import tts
|
||||
from pathlib import Path
|
||||
import asyncio
|
||||
import urllib.parse
|
||||
from utils.kernel import put_kernel_messages_into_queue
|
||||
from i import configure_interpreter
|
||||
from .utils.kernel import put_kernel_messages_into_queue
|
||||
from .i import configure_interpreter
|
||||
from interpreter import interpreter
|
||||
import ngrok
|
||||
|
||||
from utils.logs import setup_logging
|
||||
from utils.logs import logger
|
||||
from .utils.logs import setup_logging
|
||||
from .utils.logs import logger
|
||||
setup_logging()
|
||||
|
||||
|
Binary file not shown.
@ -0,0 +1,33 @@
|
||||
The open-source language model computer.
|
||||
|
||||
```bash
|
||||
pip install 01OS
|
||||
```
|
||||
|
||||
```bash
|
||||
01 # This will run a server + attempt to determine and run a client.
|
||||
# (Behavior can be modified by changing the contents of `.env`)
|
||||
```
|
||||
|
||||
**Expose an 01 server publically:**
|
||||
|
||||
```bash
|
||||
01 --server --expose # This will print a URL that a client can point to.
|
||||
```
|
||||
|
||||
**Run a specific client:**
|
||||
|
||||
```bash
|
||||
01 --client macos # Options: macos, rpi
|
||||
```
|
||||
|
||||
**Run locally:**
|
||||
|
||||
The current default uses OpenAI's services.
|
||||
|
||||
The `--local` flag will install and run the [whisper.cpp](https://github.com/ggerganov/whisper.cpp) STT and [Piper](https://github.com/rhasspy/piper) TTS models.
|
||||
|
||||
```bash
|
||||
01 --local # Local client and server
|
||||
01 --local --server --expose # Expose a local server
|
||||
```
|
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,34 @@
|
||||
[tool.poetry]
|
||||
name = "01OS"
|
||||
packages = [
|
||||
{include = "01OS"},
|
||||
]
|
||||
include = [".env.example", "start.py", "start.sh"]
|
||||
version = "0.0.2"
|
||||
description = "The open-source language model computer"
|
||||
authors = ["Killian <killian@openinterpreter.com>"]
|
||||
license = "AGPL"
|
||||
readme = "README.md"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.9,<3.12"
|
||||
asyncio = "^3.4.3"
|
||||
pyaudio = "^0.2.14"
|
||||
pynput = "^1.7.6"
|
||||
fastapi = "^0.109.2"
|
||||
uvicorn = "^0.27.1"
|
||||
websockets = "^12.0"
|
||||
python-dotenv = "^1.0.1"
|
||||
ffmpeg-python = "^0.2.0"
|
||||
textual = "^0.50.1"
|
||||
pydub = "^0.25.1"
|
||||
ngrok = "^1.0.0"
|
||||
open-interpreter = "^0.2.0"
|
||||
simpleaudio = "^1.0.4"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.poetry.scripts]
|
||||
01 = "start:main"
|
@ -0,0 +1,23 @@
|
||||
"""
|
||||
This is just for the Python package — we need a Python entrypoint.
|
||||
Just starts `start.sh` with all the same command line arguments. Aliased to 01.
|
||||
"""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
def main():
|
||||
|
||||
# Get command line arguments
|
||||
args = sys.argv[1:]
|
||||
|
||||
# Get the directory of the current script
|
||||
dir_path = os.path.dirname(os.path.realpath(__file__))
|
||||
|
||||
# Prepare the command
|
||||
command = [os.path.join(dir_path, 'start.sh')] + args
|
||||
|
||||
# Start start.sh with the command line arguments
|
||||
subprocess.run(command, check=True)
|
||||
|
@ -0,0 +1,172 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
### Import Environment Variables from .env
|
||||
SCRIPT_DIR="$(dirname "$0")"
|
||||
if [ ! -f "$SCRIPT_DIR/.env" ]; then
|
||||
echo "No .env file found. Copying from .env.example..."
|
||||
cp "$SCRIPT_DIR/.env.example" "$SCRIPT_DIR/.env"
|
||||
fi
|
||||
set -a; source "$SCRIPT_DIR/.env"; set +a
|
||||
|
||||
### COMMAND LINE ARGUMENTS
|
||||
|
||||
# Set both SERVER_START and CLIENT_START to False if "--server" or "--client" is passed as an argument
|
||||
# (This way, --server runs only the server, --client runs only the client.)
|
||||
if [[ "$@" == *"--server"* ]] || [[ "$@" == *"--client"* ]]; then
|
||||
export SERVER_START="False"
|
||||
export CLIENT_START="False"
|
||||
fi
|
||||
|
||||
# Check if "--local" is passed as an argument
|
||||
if [[ "$@" == *"--local"* ]]; then
|
||||
# If "--local" is passed, set ALL_LOCAL to True
|
||||
export ALL_LOCAL="True"
|
||||
fi
|
||||
|
||||
# Check if "--server" is passed as an argument
|
||||
if [[ "$@" == *"--server"* ]]; then
|
||||
# If "--server" is passed, set SERVER_START to True
|
||||
export SERVER_START="True"
|
||||
fi
|
||||
|
||||
# Check if "--client" is passed as an argument
|
||||
if [[ "$@" == *"--client"* ]]; then
|
||||
# If "--client" is passed, set CLIENT_START to True
|
||||
export CLIENT_START="True"
|
||||
# Extract the client type from the arguments
|
||||
CLIENT_TYPE=$(echo "$@" | sed -n -e 's/^.*--client //p' | awk '{print $1}')
|
||||
# If client type is not empty, export it
|
||||
if [[ ! -z "$CLIENT_TYPE" ]]; then
|
||||
export CLIENT_TYPE
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check if "--expose" is passed as an argument
|
||||
if [[ "$@" == *"--expose"* ]]; then
|
||||
# If "--expose" is passed, set SERVER_EXPOSE_PUBLICALLY to True
|
||||
export SERVER_EXPOSE_PUBLICALLY="True"
|
||||
fi
|
||||
|
||||
### SETUP
|
||||
|
||||
if [[ "$ALL_LOCAL" == "True" ]]; then
|
||||
# if using local models, install the models / executables
|
||||
|
||||
## WHISPER
|
||||
|
||||
WHISPER_MODEL_URL="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/"
|
||||
WHISPER_PATH="$SCRIPT_DIR/01OS/server/stt/local_service"
|
||||
if [[ ! -f "${WHISPER_PATH}/${WHISPER_MODEL_NAME}" ]]; then
|
||||
mkdir -p "${WHISPER_PATH}"
|
||||
curl -L "${WHISPER_MODEL_URL}${WHISPER_MODEL_NAME}" -o "${WHISPER_PATH}/${WHISPER_MODEL_NAME}"
|
||||
fi
|
||||
|
||||
## PIPER
|
||||
|
||||
PIPER_FILE_PATH="$SCRIPT_DIR/01OS/server/tts/local_service${PIPER_URL}${PIPER_ASSETNAME}"
|
||||
if [[ ! -f "$PIPER_FILE_PATH" ]]; then
|
||||
|
||||
mkdir -p "${PIPER_FILE_PATH}"
|
||||
|
||||
OS=$(uname -s)
|
||||
ARCH=$(uname -m)
|
||||
if [ "$OS" = "Darwin" ]; then
|
||||
OS="macos"
|
||||
if [ "$ARCH" = "arm64" ]; then
|
||||
ARCH="aarch64"
|
||||
elif [ "$ARCH" = "x86_64" ]; then
|
||||
ARCH="x64"
|
||||
else
|
||||
echo "Piper: unsupported architecture"
|
||||
fi
|
||||
fi
|
||||
PIPER_ASSETNAME="piper_${OS}_${ARCH}.tar.gz"
|
||||
PIPER_URL="https://github.com/rhasspy/piper/releases/latest/download/"
|
||||
|
||||
# Save the current working directory
|
||||
CWD=$(pwd)
|
||||
|
||||
# Navigate to SCRIPT_DIR/01OS/server/tts/local_service
|
||||
cd $SCRIPT_DIR/01OS/server/tts/local_service
|
||||
|
||||
curl -L "${PIPER_URL}${PIPER_ASSETNAME}" -o "${PIPER_ASSETNAME}"
|
||||
tar -xvzf $PIPER_ASSETNAME
|
||||
cd piper
|
||||
if [ "$OS" = "macos" ]; then
|
||||
if [ "$ARCH" = "x64" ]; then
|
||||
softwareupdate --install-rosetta --agree-to-license
|
||||
fi
|
||||
PIPER_PHONEMIZE_ASSETNAME="piper-phonemize_${OS}_${ARCH}.tar.gz"
|
||||
PIPER_PHONEMIZE_URL="https://github.com/rhasspy/piper-phonemize/releases/latest/download/"
|
||||
|
||||
curl -OL "${PIPER_PHONEMIZE_URL}${PIPER_PHONEMIZE_ASSETNAME}"
|
||||
tar -xvzf $PIPER_PHONEMIZE_ASSETNAME
|
||||
curl -OL "${PIPER_VOICE_URL}${PIPER_VOICE_NAME}"
|
||||
curl -OL "${PIPER_VOICE_URL}${PIPER_VOICE_NAME}.json"
|
||||
PIPER_DIR=`pwd`
|
||||
install_name_tool -change @rpath/libespeak-ng.1.dylib "${PIPER_DIR}/piper-phonemize/lib/libespeak-ng.1.dylib" "${PIPER_DIR}/piper"
|
||||
install_name_tool -change @rpath/libonnxruntime.1.14.1.dylib "${PIPER_DIR}/piper-phonemize/lib/libonnxruntime.1.14.1.dylib" "${PIPER_DIR}/piper"
|
||||
install_name_tool -change @rpath/libpiper_phonemize.1.dylib "${PIPER_DIR}/piper-phonemize/lib/libpiper_phonemize.1.dylib" "${PIPER_DIR}/piper"
|
||||
fi
|
||||
|
||||
# Navigate back to the current working directory
|
||||
cd $CWD
|
||||
fi
|
||||
fi
|
||||
|
||||
### START
|
||||
|
||||
start_client() {
|
||||
echo "Starting client..."
|
||||
bash 01OS/clients/start.sh &
|
||||
CLIENT_PID=$!
|
||||
echo "client started as process $CLIENT_PID"
|
||||
}
|
||||
|
||||
# Function to start server
|
||||
start_server() {
|
||||
echo "Starting server..."
|
||||
python -m 01OS.server.server &
|
||||
SERVER_PID=$!
|
||||
echo "Server started as process $SERVER_PID"
|
||||
}
|
||||
|
||||
stop_processes() {
|
||||
if [[ -n $CLIENT_PID ]]; then
|
||||
echo "Stopping client..."
|
||||
kill $CLIENT_PID
|
||||
fi
|
||||
if [[ -n $SERVER_PID ]]; then
|
||||
echo "Stopping server..."
|
||||
kill $SERVER_PID
|
||||
fi
|
||||
}
|
||||
|
||||
# Trap SIGINT and SIGTERM to stop processes when the script is terminated
|
||||
trap stop_processes SIGINT SIGTERM
|
||||
|
||||
# SERVER
|
||||
# Start server if SERVER_START is True
|
||||
if [[ "$SERVER_START" == "True" ]]; then
|
||||
start_server
|
||||
fi
|
||||
|
||||
# CLIENT
|
||||
# Start client if CLIENT_START is True
|
||||
if [[ "$CLIENT_START" == "True" ]]; then
|
||||
start_client
|
||||
fi
|
||||
|
||||
# Wait for client and server processes to exit
|
||||
wait $CLIENT_PID
|
||||
wait $SERVER_PID
|
||||
|
||||
# TTS, STT
|
||||
|
||||
# (todo)
|
||||
# (i think we should start with hosted services)
|
||||
|
||||
# LLM
|
||||
|
||||
# (disabled, we'll start with hosted services)
|
||||
# python core/llm/start.py &
|
@ -1 +0,0 @@
|
||||
conversations/user.json
|
@ -1,14 +0,0 @@
|
||||
git+https://github.com/KillianLucas/open-interpreter.git
|
||||
asyncio
|
||||
PyAudio
|
||||
pynput
|
||||
fastapi
|
||||
uvicorn
|
||||
websockets
|
||||
playsound
|
||||
python-dotenv
|
||||
ffmpeg-python
|
||||
textual
|
||||
pydub
|
||||
python-dotenv
|
||||
ngrok
|
@ -1,123 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
### Import Environment Variables from .env
|
||||
if [ ! -f ".env" ]; then
|
||||
echo "Error: .env file does not exist. To create one, see .env.example for an example."
|
||||
exit 1
|
||||
fi
|
||||
set -a; source .env; set +a
|
||||
|
||||
### SETUP
|
||||
|
||||
if [[ "$ALL_LOCAL" == "True" ]]; then
|
||||
# if using local models, install the models / executables
|
||||
WHISPER_MODEL_URL="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/"
|
||||
WHISPER_RUST_PATH="`pwd`/local_stt/whisper-rust"
|
||||
curl -OL "${WHISPER_MODEL_URL}${WHISPER_MODEL_NAME}" --output-dir ${WHISPER_RUST_PATH}
|
||||
OS=$(uname -s)
|
||||
ARCH=$(uname -m)
|
||||
if [ "$OS" = "Darwin" ]; then
|
||||
OS="macos"
|
||||
if [ "$ARCH" = "arm64" ]; then
|
||||
ARCH="aarch64"
|
||||
elif [ "$ARCH" = "x86_64" ]; then
|
||||
ARCH="x64"
|
||||
else
|
||||
echo "Piper: unsupported architecture"
|
||||
fi
|
||||
fi
|
||||
PIPER_ASSETNAME="piper_${OS}_${ARCH}.tar.gz"
|
||||
PIPER_URL="https://github.com/rhasspy/piper/releases/latest/download/"
|
||||
mkdir local_tts
|
||||
cd local_tts
|
||||
curl -OL "${PIPER_URL}${PIPER_ASSETNAME}"
|
||||
tar -xvzf $PIPER_ASSETNAME
|
||||
cd piper
|
||||
curl -OL "${PIPER_VOICE_URL}${PIPER_VOICE_NAME}"
|
||||
curl -OL "${PIPER_VOICE_URL}${PIPER_VOICE_NAME}.json"
|
||||
if [ "$OS" = "macos" ]; then
|
||||
if [ "$ARCH" = "x64" ]; then
|
||||
softwareupdate --install-rosetta --agree-to-license
|
||||
fi
|
||||
PIPER_PHONEMIZE_ASSETNAME="piper-phonemize_${OS}_${ARCH}.tar.gz"
|
||||
PIPER_PHONEMIZE_URL="https://github.com/rhasspy/piper-phonemize/releases/latest/download/"
|
||||
curl -OL "${PIPER_PHONEMIZE_URL}${PIPER_PHONEMIZE_ASSETNAME}"
|
||||
tar -xvzf $PIPER_PHONEMIZE_ASSETNAME
|
||||
PIPER_DIR=`pwd`
|
||||
install_name_tool -change @rpath/libespeak-ng.1.dylib "${PIPER_DIR}/piper-phonemize/lib/libespeak-ng.1.dylib" "${PIPER_DIR}/piper"
|
||||
install_name_tool -change @rpath/libonnxruntime.1.14.1.dylib "${PIPER_DIR}/piper-phonemize/lib/libonnxruntime.1.14.1.dylib" "${PIPER_DIR}/piper"
|
||||
install_name_tool -change @rpath/libpiper_phonemize.1.dylib "${PIPER_DIR}/piper-phonemize/lib/libpiper_phonemize.1.dylib" "${PIPER_DIR}/piper"
|
||||
fi
|
||||
cd ../..
|
||||
fi
|
||||
|
||||
# (for dev, reset the ports we were using)
|
||||
|
||||
SERVER_PORT=$(echo $SERVER_URL | grep -oE "[0-9]+")
|
||||
if [ -n "$SERVER_PORT" ]; then
|
||||
lsof -ti tcp:$SERVER_PORT | xargs kill 2>/dev/null || true
|
||||
fi
|
||||
|
||||
### START
|
||||
|
||||
start_device() {
|
||||
echo "Starting device..."
|
||||
if [[ -n $NGROK_AUTHTOKEN ]]; then
|
||||
echo "Waiting for Ngrok to setup"
|
||||
sleep 7
|
||||
read -p "Enter the Ngrok URL: " ngrok_url
|
||||
export SERVER_CONNECTION_URL=$ngrok_url
|
||||
echo "SERVER_CONNECTION_URL set to $SERVER_CONNECTION_URL"
|
||||
fi
|
||||
python device.py &
|
||||
DEVICE_PID=$!
|
||||
echo "Device started as process $DEVICE_PID"
|
||||
}
|
||||
|
||||
# Function to start server
|
||||
start_server() {
|
||||
echo "Starting server..."
|
||||
python server.py &
|
||||
SERVER_PID=$!
|
||||
echo "Server started as process $SERVER_PID"
|
||||
}
|
||||
|
||||
stop_processes() {
|
||||
if [[ -n $DEVICE_PID ]]; then
|
||||
echo "Stopping device..."
|
||||
kill $DEVICE_PID
|
||||
fi
|
||||
if [[ -n $SERVER_PID ]]; then
|
||||
echo "Stopping server..."
|
||||
kill $SERVER_PID
|
||||
fi
|
||||
}
|
||||
|
||||
# Trap SIGINT and SIGTERM to stop processes when the script is terminated
|
||||
trap stop_processes SIGINT SIGTERM
|
||||
|
||||
# SERVER
|
||||
# Start server if SERVER_START is True
|
||||
if [[ "$SERVER_START" == "True" ]]; then
|
||||
start_server
|
||||
fi
|
||||
|
||||
# DEVICE
|
||||
# Start device if DEVICE_START is True
|
||||
if [[ "$DEVICE_START" == "True" ]]; then
|
||||
start_device
|
||||
fi
|
||||
|
||||
# Wait for device and server processes to exit
|
||||
wait $DEVICE_PID
|
||||
wait $SERVER_PID
|
||||
|
||||
# TTS, STT
|
||||
|
||||
# (todo)
|
||||
# (i think we should start with hosted services)
|
||||
|
||||
# LLM
|
||||
|
||||
# (disabled, we'll start with hosted services)
|
||||
# python core/llm/start.py &
|
@ -1,81 +0,0 @@
|
||||
# New: The 8th Architecture
|
||||
|
||||
```
|
||||
/01
|
||||
start.sh # entrypoint, runs server, device, llm
|
||||
server.py # uses tts and stt if it must, exposes "/"
|
||||
device.py # also uses tts and stt, hits "/"
|
||||
llm.py # starts an openai-compatible server
|
||||
model.llamafile
|
||||
i.py # creates an interpreter which server just imports
|
||||
tts.py
|
||||
stt.py
|
||||
/conversations
|
||||
user.json
|
||||
/skills # files in here will run in the 01's interpreter
|
||||
schedule.py
|
||||
...
|
||||
```
|
||||
|
||||
This is flatter and simpler.
|
||||
|
||||
**Device** handles the device — i.e. everything the user interacts + watching the kernel + running code (which produces `computer` LMC messages) if `DEVICE_EXECUTE_CODE` is true. Runs TTS and STT, sends LMC messages to "/".
|
||||
|
||||
**Server** serves "/", a websocket that accepts `user` LMC messages and sends back `assistant` LMC messages. Runs code (which produces `computer` LMC messages) if `SERVER_EXECUTE_CODE` is true.
|
||||
|
||||
**Llm** starts an OpenAI-compatible server with `model.llamafile`. Downloads a heavily quantized Phi-2 if `model.llamafile` doesn't exist.
|
||||
|
||||
**I** creates an `interpreter` object. This is where you configure the 01's behavior.
|
||||
|
||||
# What is this?
|
||||
|
||||
This is the operating system that powers the 01.
|
||||
|
||||
# No, I mean what's this folder?
|
||||
|
||||
It's the `diff` between 01OS and Ubuntu.
|
||||
|
||||
01OS should be a customized version of Linux. Ubuntu is popular, stable, runs on lots of different hardware. **(open question: Should this be Xubuntu, which is lighter? or something else?)**
|
||||
|
||||
We want to _build on_ Ubuntu by customizing the stable branch programatically, not by forking it — which would mean we'd have to maintain the underlying OS, merge in security patches, etc. Yuck.
|
||||
|
||||
This folder contains everything we want to change from the base Ubuntu. A folder here represents a folder added/modified at the `root`. You can think of it like the `diff` between 01OS and Ubuntu.
|
||||
|
||||
I imagine we'll use something like Cubic to then press this + Ubuntu into an ISO image.
|
||||
|
||||
# Setup & Usage
|
||||
|
||||
Clone this repo, then run `OS/01/start.sh`.
|
||||
|
||||
# Structure
|
||||
|
||||
### `start.sh`
|
||||
|
||||
The start script's job is to start the `core` and the `app` (in full-screen mode).
|
||||
|
||||
### `/core`
|
||||
|
||||
The `core`'s job is to:
|
||||
|
||||
1. Set up the language model
|
||||
2. Set up the interpreter
|
||||
3. Serve the interpreter at "/"
|
||||
|
||||
### `/app`
|
||||
|
||||
The `app`'s job is to be the interface between the user and the interpreter (text in). This could be text only, audio, video, who knows, but it becomes LMC messages or plain text.
|
||||
|
||||
For the first version, I think we should just handle audio in/out. So the `app`'s job here is to:
|
||||
|
||||
1. Be a fullscreen app for the user to use 01
|
||||
2. Turn the user's speech into text and send it to "/"
|
||||
3. Turn the interpreter's text into speech and play it for the user
|
||||
|
||||
### Changes to Linux
|
||||
|
||||
We need to make the following changes:
|
||||
|
||||
1. Modify the bootloader to just show white circle on black
|
||||
2. Auto start the start script, `start.sh`
|
||||
3. Put detectors everywhere, which will put [LMC Messages](https://docs.openinterpreter.com/protocols/lmc-messages) from the computer into `/01/core/queue`. Michael suggested we simply watch and filter the `dmesg` stream (I think that's what it's called?), so I suppose we could have a script like `/01/core/kernel_watcher.py` that puts things into the queue? Honestly knowing we could get it all from one place like that— maybe this should be simpler. Is the queue necessary? How about we just expect the computer to send computer messages to the websocket at `/`? Then yeah, maybe we do have redis there, then instead of looking at that folder, we check the redis queue...
|
||||
4. (open question: should we do this? do we want the first 01 to be ready for GUI control?) Make the display that's shown to the user (and filled with the `app`) the _secondary_ display. The primary display will be a normal Ubuntu desktop, invisible to the user. Why? So the interpreter can control the primary display "under the hood".
|
Before Width: | Height: | Size: 470 KiB After Width: | Height: | Size: 470 KiB |
Before Width: | Height: | Size: 702 KiB After Width: | Height: | Size: 702 KiB |
Before Width: | Height: | Size: 1022 KiB After Width: | Height: | Size: 1022 KiB |
Loading…
Reference in new issue