Merge remote-tracking branch 'upstream/main' into u/shivenmian/teach

12 months ago · 7469e684d6
parent 565ff18acf c49f705e66
commit 7469e684d6
81 changed files with 4158 additions and 281 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,10 +1,13 @@
 ggml-*.bin
-OS/01/local_tts/*
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class

+01OS/01OS/server/conversations/user.json
+01OS/01OS/server/tts/local_service/*
+01OS/01OS/server/stt/local_service/*
+
 # C extensions
 *.so

--- a/OS/01/.env.example
+++ b/OS/01/.env.example
@ -6,7 +6,7 @@
 ALL_LOCAL=False
 WHISPER_MODEL_NAME="ggml-tiny.en.bin"

-# Uncomment and set the OpenAI API key for OpenInterpreter to work
+# Uncomment to set your OpenAI API key
 # OPENAI_API_KEY=sk-...

 # For TTS, we use the en_US-lessac-medium voice model by default
@ -19,17 +19,18 @@ PIPER_VOICE_NAME="en_US-lessac-medium.onnx"
 #NGROK_AUTHTOKEN="AUTH TOKEN"

 # If SERVER_START, this is where we'll serve the server.
-# If DEVICE_START, this is where the device expects the server to be.
+# If CLIENT_START, this is where the client expects the server to be.
 SERVER_URL=ws://localhost:8000/
-# If you are setting up Ngrok then either change the below to Ngrok URL if running device separately, else comment it
-SERVER_CONNECTION_URL=ws://localhost:8000/
 SERVER_START=True
-DEVICE_START=True
+CLIENT_START=True

-# Control where various operations happen— can be `device` or `server`.
+# Explicitly set the client type (macos, rpi)
+CLIENT_TYPE=auto
+
+# Control where various operations happen— can be `client` or `server`.
 CODE_RUNNER=server
-TTS_RUNNER=server # If device, audio will be sent over websocket.
-STT_RUNNER=device # If server, audio will be sent over websocket.
+TTS_RUNNER=server # If client, audio will be sent over websocket.
+STT_RUNNER=client # If server, audio will be sent over websocket.

 # Will expose the server publically and display that URL.
 SERVER_EXPOSE_PUBLICALLY=False
--- a/01OS/01OS/init.py
+++ b/01OS/01OS/init.py
--- a/01OS/01OS/clients/init.py
+++ b/01OS/01OS/clients/init.py
--- a/01OS/01OS/clients/base_device.py
+++ b/01OS/01OS/clients/base_device.py
@ -0,0 +1,242 @@
+from dotenv import load_dotenv
+load_dotenv()  # take environment variables from .env.
+
+import asyncio
+import threading
+import os
+import pyaudio
+from starlette.websockets import WebSocket
+from queue import Queue
+from pynput import keyboard
+import json
+import traceback
+import websockets
+import queue
+import pydub
+import ast
+from pydub import AudioSegment
+from pydub.playback import play
+import io
+import time
+import wave
+import tempfile
+from datetime import datetime
+from interpreter import interpreter # Just for code execution. Maybe we should let people do from interpreter.computer import run?
+from ..server.utils.kernel import put_kernel_messages_into_queue
+from ..server.utils.get_system_info import get_system_info
+from ..server.stt.stt import stt_wav
+
+from ..server.utils.logs import setup_logging
+from ..server.utils.logs import logger
+setup_logging()
+
+# Configuration for Audio Recording
+CHUNK = 1024  # Record in chunks of 1024 samples
+FORMAT = pyaudio.paInt16  # 16 bits per sample
+CHANNELS = 1  # Mono
+RATE = 44100  # Sample rate
+RECORDING = False  # Flag to control recording state
+SPACEBAR_PRESSED = False  # Flag to track spacebar press state
+
+# Specify OS
+current_platform = get_system_info()
+
+# Initialize PyAudio
+p = pyaudio.PyAudio()
+
+import asyncio
+
+send_queue = queue.Queue()
+
+class Device:
+    def __init__(self):
+        pass
+
+    def record_audio(self):
+        
+        if os.getenv('STT_RUNNER') == "server":
+            # STT will happen on the server. we're sending audio.
+            send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "start": True})
+        elif os.getenv('STT_RUNNER') == "client":
+            # STT will happen here, on the client. we're sending text.
+            send_queue.put({"role": "user", "type": "message", "start": True})
+        else:
+            raise Exception("STT_RUNNER must be set to either 'client' or 'server'.")
+
+        """Record audio from the microphone and add it to the queue."""
+        stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
+        logger.info("Recording started...")
+        global RECORDING
+
+        # Create a temporary WAV file to store the audio data
+        temp_dir = tempfile.gettempdir()
+        wav_path = os.path.join(temp_dir, f"audio_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
+        wav_file = wave.open(wav_path, 'wb')
+        wav_file.setnchannels(CHANNELS)
+        wav_file.setsampwidth(p.get_sample_size(FORMAT))
+        wav_file.setframerate(RATE)
+
+        while RECORDING:
+            data = stream.read(CHUNK, exception_on_overflow=False)
+            wav_file.writeframes(data)
+
+        wav_file.close()
+        stream.stop_stream()
+        stream.close()
+        logger.info("Recording stopped.")
+
+        duration = wav_file.getnframes() / RATE
+        if duration < 0.3:
+            # Just pressed it. Send stop message
+            if os.getenv('STT_RUNNER') == "client":
+                send_queue.put({"role": "user", "type": "message", "content": "stop"})
+                send_queue.put({"role": "user", "type": "message", "end": True})
+            else:
+                send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "content": ""})
+                send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "end": True})
+        else:
+            if os.getenv('STT_RUNNER') == "client":
+                # Run stt then send text
+                text = stt_wav(wav_path)
+                send_queue.put({"role": "user", "type": "message", "content": text})
+                send_queue.put({"role": "user", "type": "message", "end": True})
+            else:
+                # Stream audio
+                with open(wav_path, 'rb') as audio_file:
+                    byte_data = audio_file.read(CHUNK)
+                    while byte_data:
+                        send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "content": str(byte_data)})
+                        byte_data = audio_file.read(CHUNK)
+                send_queue.put({"role": "user", "type": "audio", "format": "audio/wav", "end": True})
+
+        if os.path.exists(wav_path):
+            os.remove(wav_path)
+
+    def toggle_recording(self, state):
+        """Toggle the recording state."""
+        global RECORDING, SPACEBAR_PRESSED
+        if state and not SPACEBAR_PRESSED:
+            SPACEBAR_PRESSED = True
+            if not RECORDING:
+                RECORDING = True
+                threading.Thread(target=self.record_audio).start()
+        elif not state and SPACEBAR_PRESSED:
+            SPACEBAR_PRESSED = False
+            RECORDING = False
+
+    def on_press(self, key):
+        """Detect spacebar press."""
+        if key == keyboard.Key.space:
+            self.toggle_recording(True)
+
+    def on_release(self, key):
+        """Detect spacebar release and ESC key press."""
+        if key == keyboard.Key.space:
+            self.toggle_recording(False)
+        elif key == keyboard.Key.esc or (key == keyboard.Key.ctrl and keyboard.Key.c):
+            logger.info("Exiting...")
+            os._exit(0)
+
+    async def message_sender(self, websocket):
+        while True:
+            message = await asyncio.get_event_loop().run_in_executor(None, send_queue.get)
+            await websocket.send(json.dumps(message))
+            send_queue.task_done()
+
+    async def websocket_communication(self, WS_URL):
+        while True:
+            try:
+                async with websockets.connect(WS_URL) as websocket:
+                    logger.info("Press the spacebar to start/stop recording. Press ESC to exit.")
+                    asyncio.create_task(self.message_sender(websocket))
+
+                    initial_message = {"role": None, "type": None, "format": None, "content": None} 
+                    message_so_far = initial_message
+
+                    while True:
+                        message = await websocket.recv()
+
+                        logger.debug(f"Got this message from the server: {type(message)} {message}")
+
+                        if type(message) == str:
+                            message = json.loads(message)
+
+                        if message.get("end"):
+                            logger.debug(f"Complete message from the server: {message_so_far}")
+                            logger.info("\n")
+                            message_so_far = initial_message
+
+                        if "content" in message:
+                            print(message['content'], end="", flush=True)
+                            if any(message_so_far[key] != message[key] for key in message_so_far if key != "content"):
+                                message_so_far = message
+                            else:
+                                message_so_far["content"] += message["content"]
+
+                        if message["type"] == "audio" and "content" in message:
+                            audio_bytes = bytes(ast.literal_eval(message["content"]))
+
+                            # Convert bytes to audio file
+                            audio_file = io.BytesIO(audio_bytes)
+                            audio = AudioSegment.from_mp3(audio_file)
+
+                            # Play the audio
+                            play(audio)
+
+                            await asyncio.sleep(1)
+
+                        # Run the code if that's the client's job
+                        if os.getenv('CODE_RUNNER') == "client":
+                            if message["type"] == "code" and "end" in message:
+                                language = message_so_far["format"]
+                                code = message_so_far["content"]
+                                result = interpreter.computer.run(language, code)
+                                send_queue.put(result)
+    
+
+            except:
+                # traceback.print_exc()
+                logger.info(f"Connecting to `{WS_URL}`...")
+                await asyncio.sleep(2)
+
+    async def start_async(self):
+            # Configuration for WebSocket
+            WS_URL = os.getenv('SERVER_URL')
+            if not WS_URL:
+                raise ValueError("The environment variable SERVER_URL is not set. Please set it to proceed.")
+
+            # Start the WebSocket communication
+            asyncio.create_task(self.websocket_communication(WS_URL))
+
+            # Start watching the kernel if it's your job to do that
+            if os.getenv('CODE_RUNNER') == "client":
+                asyncio.create_task(put_kernel_messages_into_queue(send_queue))
+
+            
+            # If Raspberry Pi, add the button listener, otherwise use the spacebar
+            if current_platform.startswith("raspberry-pi"):
+                logger.info("Raspberry Pi detected, using button on GPIO pin 15")
+                # Use GPIO pin 15
+                pindef = ["gpiochip4", "15"] # gpiofind PIN15
+                print("PINDEF", pindef)
+
+                # HACK: needs passwordless sudo
+                process = await asyncio.create_subprocess_exec("sudo", "gpiomon", "-brf", *pindef, stdout=asyncio.subprocess.PIPE)
+                while True:
+                    line = await process.stdout.readline()
+                    if line:
+                        line = line.decode().strip()
+                        if "FALLING" in line:
+                            self.toggle_recording(False)
+                        elif "RISING" in line:
+                            self.toggle_recording(True)
+                    else:
+                        break
+            else:
+                # Keyboard listener for spacebar press/release
+                listener = keyboard.Listener(on_press=self.on_press, on_release=self.on_release)
+                listener.start()
+
+    def start(self):
+        asyncio.run(self.start_async())
+        p.terminate()
--- a/01OS/01OS/clients/macos/init.py
+++ b/01OS/01OS/clients/macos/init.py
--- a/01OS/01OS/clients/macos/device.py
+++ b/01OS/01OS/clients/macos/device.py
@ -0,0 +1,4 @@
+from ..base_device import Device
+
+desktop_device = Device()
+desktop_device.start()
--- a/01OS/01OS/clients/rpi/init.py
+++ b/01OS/01OS/clients/rpi/init.py
--- a/01OS/01OS/clients/rpi/device.py
+++ b/01OS/01OS/clients/rpi/device.py
@ -0,0 +1,4 @@
+from ..base_device import Device
+
+rpi_device = Device()
+rpi_device.start()
--- a/01OS/01OS/clients/start.sh
+++ b/01OS/01OS/clients/start.sh
@ -0,0 +1,8 @@
+DEVICE=$(uname -n)
+if [[ "$DEVICE" == "rpi" ]]; then
+    cd 01OS
+    python -m 01OS.clients.rpi.device &
+else
+    cd 01OS
+    python -m 01OS.clients.macos.device &
+fi
--- a/01OS/01OS/server/.DS_Store
+++ b/01OS/01OS/server/.DS_Store
--- a/OS/01/clients/base_device.py
+++ b/OS/01/clients/base_device.py
--- a/01OS/01OS/server/conversations/another-interpreter.json
+++ b/01OS/01OS/server/conversations/another-interpreter.json
--- a/01OS/01OS/server/i.py
+++ b/01OS/01OS/server/i.py
@ -100,7 +100,11 @@ print(output)
        json.dump([], file)

    ### SKILLS
-    interpreter.computer.skills.skills_dir = Path(__file__).parent / 'skills'
-    interpreter.computer.skills.import_skills()
+    try:
+        interpreter.computer.skills.skills_dir = Path(__file__).parent / 'skills'
+        interpreter.computer.skills.import_skills()
+    except:
+        print("Temporarily skipping skills (OI 0.2.1, which is unreleased) so we can push to `pip`.")
+        pass

    return interpreter
--- a/01OS/01OS/server/llm.py
+++ b/01OS/01OS/server/llm.py
--- a/OS/01/clients/desktop/device.py
+++ b/OS/01/clients/desktop/device.py
--- a/01OS/01OS/server/server.py
+++ b/01OS/01OS/server/server.py
@ -1,34 +1,27 @@
 from dotenv import load_dotenv
 load_dotenv()  # take environment variables from .env.

-from starlette.websockets import WebSocketDisconnect
 import ast
 import json
-import time
 import queue
 import os
 import traceback
-from queue import Queue
-from threading import Thread
-import threading
-import uvicorn
 import re
 from fastapi import FastAPI
 from fastapi.responses import PlainTextResponse
-from threading import Thread
 from starlette.websockets import WebSocket
-from stt import stt_bytes
-from tts import tts
+from .stt.stt import stt_bytes
+from .tts.tts import tts
 from pathlib import Path
 import asyncio
 import urllib.parse
-from utils.kernel import put_kernel_messages_into_queue
-from i import configure_interpreter
+from .utils.kernel import put_kernel_messages_into_queue
+from .i import configure_interpreter
 from interpreter import interpreter
 import ngrok

-from utils.logs import setup_logging
-from utils.logs import logger
+from .utils.logs import setup_logging
+from .utils.logs import logger
 setup_logging()


--- a/01OS/01OS/server/skills/schedule.py
+++ b/01OS/01OS/server/skills/schedule.py
--- a/01OS/01OS/server/stt/init.py
+++ b/01OS/01OS/server/stt/init.py
--- a/01OS/01OS/server/stt/local_service/.DS_Store
+++ b/01OS/01OS/server/stt/local_service/.DS_Store
--- a/01OS/01OS/server/stt/local_service/init.py
+++ b/01OS/01OS/server/stt/local_service/init.py
--- a/01OS/01OS/server/stt/local_service/whisper-rust/.gitignore
+++ b/01OS/01OS/server/stt/local_service/whisper-rust/.gitignore
--- a/01OS/01OS/server/stt/local_service/whisper-rust/Cargo.lock
+++ b/01OS/01OS/server/stt/local_service/whisper-rust/Cargo.lock
--- a/01OS/01OS/server/stt/local_service/whisper-rust/Cargo.toml
+++ b/01OS/01OS/server/stt/local_service/whisper-rust/Cargo.toml
--- a/01OS/01OS/server/stt/local_service/whisper-rust/README.md
+++ b/01OS/01OS/server/stt/local_service/whisper-rust/README.md
--- a/01OS/01OS/server/stt/local_service/whisper-rust/src/main.rs
+++ b/01OS/01OS/server/stt/local_service/whisper-rust/src/main.rs
--- a/01OS/01OS/server/stt/local_service/whisper-rust/src/transcribe.rs
+++ b/01OS/01OS/server/stt/local_service/whisper-rust/src/transcribe.rs
--- a/01OS/01OS/server/stt/local_service/whisper-rust/whisper-rust
+++ b/01OS/01OS/server/stt/local_service/whisper-rust/whisper-rust
--- a/01OS/01OS/server/stt/stt.py
+++ b/01OS/01OS/server/stt/stt.py
@ -14,8 +14,8 @@ import subprocess
 import openai
 from openai import OpenAI

-from utils.logs import setup_logging
-from utils.logs import logger
+from ..utils.logs import setup_logging
+from ..utils.logs import logger
 setup_logging()

 client = OpenAI()
@ -56,18 +56,19 @@ def run_command(command):
    return result.stdout, result.stderr

 def get_transcription_file(wav_file_path: str):
-    whisper_rust_path = os.path.join(os.path.dirname(__file__), 'local_stt', 'whisper-rust')
+    local_path = os.path.join(os.path.dirname(__file__), 'local_service')
+    whisper_rust_path = os.path.join(local_path, 'whisper-rust')
    model_name = os.getenv('WHISPER_MODEL_NAME')
    if not model_name:
        raise EnvironmentError("WHISPER_MODEL_NAME environment variable is not set.")

    output, error = run_command([
        os.path.join(whisper_rust_path, 'whisper-rust'),
-        '--model-path', os.path.join(whisper_rust_path, model_name),
+        '--model-path', os.path.join(local_path, model_name),
        '--file-path', wav_file_path
    ])

-    print("Exciting transcription result:", output)
+    print("Transcription result:", output)
    return output

 def get_transcription_bytes(audio_bytes: bytearray, mime_type):
--- a/01OS/01OS/server/tts/local_service/init.py
+++ b/01OS/01OS/server/tts/local_service/init.py
--- a/01OS/01OS/server/tts/tts.py
+++ b/01OS/01OS/server/tts/tts.py
@ -2,17 +2,18 @@
 Defines a function which takes text and returns a path to an audio file.
 """

+from pydub import AudioSegment
 from dotenv import load_dotenv
 load_dotenv()  # take environment variables from .env.

 import tempfile
 from openai import OpenAI
-from pydub import AudioSegment
-from pydub.playback import play
-from playsound import playsound
 import os
 import subprocess
 import tempfile
+from pydub import AudioSegment
+from pydub.playback import play
+import simpleaudio as sa

 client = OpenAI()

@ -28,13 +29,14 @@ def tts(text, play_audio):
            response.stream_to_file(temp_file.name)
            
            if play_audio:
-                playsound(temp_file.name)
-            
+                audio = AudioSegment.from_mp3(temp_file.name)
+                play_audiosegment(audio)
+
            return temp_file.read()
    else:
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
            output_file = temp_file.name
-            piper_dir = os.path.join(os.path.dirname(__file__), 'local_tts', 'piper')
+            piper_dir = os.path.join(os.path.dirname(__file__), 'local_service', 'piper')
            subprocess.run([
                os.path.join(piper_dir, 'piper'),
                '--model', os.path.join(piper_dir, os.getenv('PIPER_VOICE_NAME')),
@ -42,5 +44,32 @@ def tts(text, play_audio):
            ], input=text, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

            if play_audio:
-                playsound(temp_file.name)
+                audio = AudioSegment.from_wav(temp_file.name)
+                play_audiosegment(audio)
            return temp_file.read()
+
+def play_audiosegment(audio):
+    """
+    the default makes some pops. this fixes that
+    """
+
+    # Apply a fade-out (optional but recommended to smooth the end)
+    audio = audio.fade_out(500)
+
+    # Add silence at the end
+    silence_duration_ms = 500  # Duration of silence in milliseconds
+    silence = AudioSegment.silent(duration=silence_duration_ms)
+    audio_with_padding = audio + silence
+
+    # Save the modified audio as a WAV file for compatibility with simpleaudio
+    audio_with_padding.export("output_audio.wav", format="wav")
+
+    # Load the processed WAV file
+    wave_obj = sa.WaveObject.from_wave_file("output_audio.wav")
+
+    # Play the audio
+    play_obj = wave_obj.play()
+
+    # Wait for the playback to finish
+    play_obj.wait_done()
+
--- a/01OS/01OS/server/utils/init.py
+++ b/01OS/01OS/server/utils/init.py
--- a/01OS/01OS/server/utils/get_system_info.py
+++ b/01OS/01OS/server/utils/get_system_info.py
--- a/01OS/01OS/server/utils/kernel.py
+++ b/01OS/01OS/server/utils/kernel.py
@ -5,8 +5,8 @@ import asyncio
 import subprocess
 import platform

-from utils.logs import setup_logging
-from utils.logs import logger
+from .logs import setup_logging
+from .logs import logger
 setup_logging()

 def get_kernel_messages():
--- a/01OS/01OS/server/utils/logs.py
+++ b/01OS/01OS/server/utils/logs.py
--- a/01OS/README.md
+++ b/01OS/README.md
@ -0,0 +1,33 @@
+The open-source language model computer.
+
+```bash
+pip install 01OS
+```
+
+```bash
+01 # This will run a server + attempt to determine and run a client.
+# (Behavior can be modified by changing the contents of `.env`)
+```
+
+**Expose an 01 server publically:**
+
+```bash
+01 --server --expose # This will print a URL that a client can point to.
+```
+
+**Run a specific client:**
+
+```bash
+01 --client macos # Options: macos, rpi
+```
+
+**Run locally:**
+
+The current default uses OpenAI's services.
+
+The `--local` flag will install and run the [whisper.cpp](https://github.com/ggerganov/whisper.cpp) STT and [Piper](https://github.com/rhasspy/piper) TTS models.
+
+```bash
+01 --local # Local client and server
+01 --local --server --expose # Expose a local server
+```
--- a/01OS/_archive/core/init.py
+++ b/01OS/_archive/core/init.py
--- a/01OS/_archive/core/core.py
+++ b/01OS/_archive/core/core.py
--- a/01OS/_archive/core/i_endpoint.py
+++ b/01OS/_archive/core/i_endpoint.py
--- a/01OS/_archive/core/kernel_watch.py
+++ b/01OS/_archive/core/kernel_watch.py
--- a/01OS/_archive/core/start_core.py
+++ b/01OS/_archive/core/start_core.py
--- a/01OS/_archive/device.py
+++ b/01OS/_archive/device.py
--- a/01OS/_archive/interface/init.py
+++ b/01OS/_archive/interface/init.py
--- a/01OS/_archive/interface/candidate_animation.html
+++ b/01OS/_archive/interface/candidate_animation.html
--- a/01OS/_archive/interface/display.html
+++ b/01OS/_archive/interface/display.html
--- a/01OS/_archive/interface/interface.py
+++ b/01OS/_archive/interface/interface.py
--- a/01OS/_archive/listen.py
+++ b/01OS/_archive/listen.py
--- a/01OS/_archive/record.py
+++ b/01OS/_archive/record.py
--- a/01OS/_archive/run.py
+++ b/01OS/_archive/run.py
--- a/01OS/output_audio.wav
+++ b/01OS/output_audio.wav
--- a/01OS/poetry.lock
+++ b/01OS/poetry.lock
--- a/01OS/pyproject.toml
+++ b/01OS/pyproject.toml
@ -0,0 +1,34 @@
+[tool.poetry]
+name = "01OS"
+packages = [
+    {include = "01OS"},
+]
+include = [".env.example", "start.py", "start.sh"]
+version = "0.0.2"
+description = "The open-source language model computer"
+authors = ["Killian <killian@openinterpreter.com>"]
+license = "AGPL"
+readme = "README.md"
+
+[tool.poetry.dependencies]
+python = ">=3.9,<3.12"
+asyncio = "^3.4.3"
+pyaudio = "^0.2.14"
+pynput = "^1.7.6"
+fastapi = "^0.109.2"
+uvicorn = "^0.27.1"
+websockets = "^12.0"
+python-dotenv = "^1.0.1"
+ffmpeg-python = "^0.2.0"
+textual = "^0.50.1"
+pydub = "^0.25.1"
+ngrok = "^1.0.0"
+open-interpreter = "^0.2.0"
+simpleaudio = "^1.0.4"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
+
+[tool.poetry.scripts]
+01 = "start:main"
--- a/01OS/start.py
+++ b/01OS/start.py
@ -0,0 +1,23 @@
+"""
+This is just for the Python package — we need a Python entrypoint.
+Just starts `start.sh` with all the same command line arguments. Aliased to 01.
+"""
+
+import os
+import subprocess
+import sys
+
+def main():
+
+    # Get command line arguments
+    args = sys.argv[1:]
+
+    # Get the directory of the current script
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+
+    # Prepare the command
+    command = [os.path.join(dir_path, 'start.sh')] + args
+
+    # Start start.sh with the command line arguments
+    subprocess.run(command, check=True)
+    
--- a/01OS/start.sh
+++ b/01OS/start.sh
@ -0,0 +1,172 @@
+#!/usr/bin/env bash
+
+### Import Environment Variables from .env
+SCRIPT_DIR="$(dirname "$0")"
+if [ ! -f "$SCRIPT_DIR/.env" ]; then
+    echo "No .env file found. Copying from .env.example..."
+    cp "$SCRIPT_DIR/.env.example" "$SCRIPT_DIR/.env"
+fi
+set -a; source "$SCRIPT_DIR/.env"; set +a
+
+### COMMAND LINE ARGUMENTS
+
+# Set both SERVER_START and CLIENT_START to False if "--server" or "--client" is passed as an argument
+# (This way, --server runs only the server, --client runs only the client.)
+if [[ "$@" == *"--server"* ]] || [[ "$@" == *"--client"* ]]; then
+    export SERVER_START="False"
+    export CLIENT_START="False"
+fi
+
+# Check if "--local" is passed as an argument
+if [[ "$@" == *"--local"* ]]; then
+    # If "--local" is passed, set ALL_LOCAL to True
+    export ALL_LOCAL="True"
+fi
+
+# Check if "--server" is passed as an argument
+if [[ "$@" == *"--server"* ]]; then
+    # If "--server" is passed, set SERVER_START to True
+    export SERVER_START="True"
+fi
+
+# Check if "--client" is passed as an argument
+if [[ "$@" == *"--client"* ]]; then
+    # If "--client" is passed, set CLIENT_START to True
+    export CLIENT_START="True"
+    # Extract the client type from the arguments
+    CLIENT_TYPE=$(echo "$@" | sed -n -e 's/^.*--client //p' | awk '{print $1}')
+    # If client type is not empty, export it
+    if [[ ! -z "$CLIENT_TYPE" ]]; then
+        export CLIENT_TYPE
+    fi
+fi
+
+# Check if "--expose" is passed as an argument
+if [[ "$@" == *"--expose"* ]]; then
+    # If "--expose" is passed, set SERVER_EXPOSE_PUBLICALLY to True
+    export SERVER_EXPOSE_PUBLICALLY="True"
+fi
+
+### SETUP
+
+if [[ "$ALL_LOCAL" == "True" ]]; then
+    # if using local models, install the models / executables
+
+    ## WHISPER
+    
+    WHISPER_MODEL_URL="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/"
+    WHISPER_PATH="$SCRIPT_DIR/01OS/server/stt/local_service"
+    if [[ ! -f "${WHISPER_PATH}/${WHISPER_MODEL_NAME}" ]]; then
+        mkdir -p "${WHISPER_PATH}"
+        curl -L "${WHISPER_MODEL_URL}${WHISPER_MODEL_NAME}" -o "${WHISPER_PATH}/${WHISPER_MODEL_NAME}"
+    fi
+
+    ## PIPER
+
+    PIPER_FILE_PATH="$SCRIPT_DIR/01OS/server/tts/local_service${PIPER_URL}${PIPER_ASSETNAME}"
+    if [[ ! -f "$PIPER_FILE_PATH" ]]; then   
+
+        mkdir -p "${PIPER_FILE_PATH}"
+
+        OS=$(uname -s)
+        ARCH=$(uname -m)
+        if [ "$OS" = "Darwin" ]; then
+            OS="macos"
+            if [ "$ARCH" = "arm64" ]; then
+                ARCH="aarch64"
+            elif [ "$ARCH" = "x86_64" ]; then
+                ARCH="x64"
+            else
+                echo "Piper: unsupported architecture"
+            fi
+        fi
+        PIPER_ASSETNAME="piper_${OS}_${ARCH}.tar.gz"
+        PIPER_URL="https://github.com/rhasspy/piper/releases/latest/download/"
+        
+        # Save the current working directory
+        CWD=$(pwd)
+
+        # Navigate to SCRIPT_DIR/01OS/server/tts/local_service
+        cd $SCRIPT_DIR/01OS/server/tts/local_service
+
+        curl -L "${PIPER_URL}${PIPER_ASSETNAME}" -o "${PIPER_ASSETNAME}"
+        tar -xvzf $PIPER_ASSETNAME
+        cd piper
+        if [ "$OS" = "macos" ]; then
+            if [ "$ARCH" = "x64" ]; then
+                softwareupdate --install-rosetta --agree-to-license
+            fi
+            PIPER_PHONEMIZE_ASSETNAME="piper-phonemize_${OS}_${ARCH}.tar.gz"
+            PIPER_PHONEMIZE_URL="https://github.com/rhasspy/piper-phonemize/releases/latest/download/"
+
+            curl -OL "${PIPER_PHONEMIZE_URL}${PIPER_PHONEMIZE_ASSETNAME}"
+            tar -xvzf $PIPER_PHONEMIZE_ASSETNAME
+            curl -OL "${PIPER_VOICE_URL}${PIPER_VOICE_NAME}"
+            curl -OL "${PIPER_VOICE_URL}${PIPER_VOICE_NAME}.json"
+            PIPER_DIR=`pwd`
+            install_name_tool -change @rpath/libespeak-ng.1.dylib "${PIPER_DIR}/piper-phonemize/lib/libespeak-ng.1.dylib" "${PIPER_DIR}/piper"
+            install_name_tool -change @rpath/libonnxruntime.1.14.1.dylib "${PIPER_DIR}/piper-phonemize/lib/libonnxruntime.1.14.1.dylib" "${PIPER_DIR}/piper"
+            install_name_tool -change @rpath/libpiper_phonemize.1.dylib "${PIPER_DIR}/piper-phonemize/lib/libpiper_phonemize.1.dylib" "${PIPER_DIR}/piper"
+        fi
+
+        # Navigate back to the current working directory
+        cd $CWD
+    fi
+fi
+
+### START
+
+start_client() {
+    echo "Starting client..."
+    bash 01OS/clients/start.sh &
+    CLIENT_PID=$!
+    echo "client started as process $CLIENT_PID"
+}
+
+# Function to start server
+start_server() {
+    echo "Starting server..."
+    python -m 01OS.server.server &
+    SERVER_PID=$!
+    echo "Server started as process $SERVER_PID"
+}
+
+stop_processes() {
+    if [[ -n $CLIENT_PID ]]; then
+        echo "Stopping client..."
+        kill $CLIENT_PID
+    fi
+    if [[ -n $SERVER_PID ]]; then
+        echo "Stopping server..."
+        kill $SERVER_PID
+    fi
+}
+
+# Trap SIGINT and SIGTERM to stop processes when the script is terminated
+trap stop_processes SIGINT SIGTERM
+
+# SERVER
+# Start server if SERVER_START is True
+if [[ "$SERVER_START" == "True" ]]; then
+    start_server
+fi
+
+# CLIENT
+# Start client if CLIENT_START is True
+if [[ "$CLIENT_START" == "True" ]]; then
+    start_client
+fi
+
+# Wait for client and server processes to exit
+wait $CLIENT_PID
+wait $SERVER_PID
+
+# TTS, STT
+
+# (todo)
+# (i think we should start with hosted services)
+
+# LLM
+
+# (disabled, we'll start with hosted services)
+# python core/llm/start.py &
--- a/OS/01/.gitignore
+++ b/OS/01/.gitignore
@ -1 +0,0 @@
-conversations/user.json
--- a/OS/01/requirements.txt
+++ b/OS/01/requirements.txt
@ -1,14 +0,0 @@
-git+https://github.com/KillianLucas/open-interpreter.git
-asyncio
-PyAudio
-pynput
-fastapi
-uvicorn
-websockets
-playsound
-python-dotenv
-ffmpeg-python
-textual
-pydub
-python-dotenv
-ngrok
--- a/OS/01/start.sh
+++ b/OS/01/start.sh
@ -1,123 +0,0 @@
-#!/usr/bin/env bash
-
-### Import Environment Variables from .env
-if [ ! -f ".env" ]; then
-    echo "Error: .env file does not exist. To create one, see .env.example for an example."
-    exit 1
-fi
-set -a; source .env; set +a
-
-### SETUP
-
-if [[ "$ALL_LOCAL" == "True" ]]; then
-    # if using local models, install the models / executables
-    WHISPER_MODEL_URL="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/"
-    WHISPER_RUST_PATH="`pwd`/local_stt/whisper-rust"
-    curl -OL "${WHISPER_MODEL_URL}${WHISPER_MODEL_NAME}" --output-dir ${WHISPER_RUST_PATH}
-    OS=$(uname -s)
-    ARCH=$(uname -m)
-    if [ "$OS" = "Darwin" ]; then
-        OS="macos"
-        if [ "$ARCH" = "arm64" ]; then
-            ARCH="aarch64"
-        elif [ "$ARCH" = "x86_64" ]; then
-            ARCH="x64"
-        else
-            echo "Piper: unsupported architecture"
-        fi
-    fi
-    PIPER_ASSETNAME="piper_${OS}_${ARCH}.tar.gz"
-    PIPER_URL="https://github.com/rhasspy/piper/releases/latest/download/"
-    mkdir local_tts
-    cd local_tts
-    curl -OL "${PIPER_URL}${PIPER_ASSETNAME}"
-    tar -xvzf $PIPER_ASSETNAME
-    cd piper
-    curl -OL "${PIPER_VOICE_URL}${PIPER_VOICE_NAME}"
-    curl -OL "${PIPER_VOICE_URL}${PIPER_VOICE_NAME}.json"
-    if [ "$OS" = "macos" ]; then
-        if [ "$ARCH" = "x64" ]; then
-            softwareupdate --install-rosetta --agree-to-license
-        fi
-        PIPER_PHONEMIZE_ASSETNAME="piper-phonemize_${OS}_${ARCH}.tar.gz"
-        PIPER_PHONEMIZE_URL="https://github.com/rhasspy/piper-phonemize/releases/latest/download/"
-        curl -OL "${PIPER_PHONEMIZE_URL}${PIPER_PHONEMIZE_ASSETNAME}"
-        tar -xvzf $PIPER_PHONEMIZE_ASSETNAME
-        PIPER_DIR=`pwd`
-        install_name_tool -change @rpath/libespeak-ng.1.dylib "${PIPER_DIR}/piper-phonemize/lib/libespeak-ng.1.dylib" "${PIPER_DIR}/piper"
-        install_name_tool -change @rpath/libonnxruntime.1.14.1.dylib "${PIPER_DIR}/piper-phonemize/lib/libonnxruntime.1.14.1.dylib" "${PIPER_DIR}/piper"
-        install_name_tool -change @rpath/libpiper_phonemize.1.dylib "${PIPER_DIR}/piper-phonemize/lib/libpiper_phonemize.1.dylib" "${PIPER_DIR}/piper"
-    fi
-    cd ../..
-fi
-
-# (for dev, reset the ports we were using)
-
-SERVER_PORT=$(echo $SERVER_URL | grep -oE "[0-9]+")
-if [ -n "$SERVER_PORT" ]; then
-    lsof -ti tcp:$SERVER_PORT | xargs kill 2>/dev/null || true
-fi
-
-### START
-
-start_device() {
-    echo "Starting device..."
-    if [[ -n $NGROK_AUTHTOKEN ]]; then
-        echo "Waiting for Ngrok to setup"
-        sleep 7
-        read -p "Enter the Ngrok URL: " ngrok_url
-        export SERVER_CONNECTION_URL=$ngrok_url
-        echo "SERVER_CONNECTION_URL set to $SERVER_CONNECTION_URL"
-    fi
-    python device.py &
-    DEVICE_PID=$!
-    echo "Device started as process $DEVICE_PID"
-}
-
-# Function to start server
-start_server() {
-    echo "Starting server..."
-    python server.py &
-    SERVER_PID=$!
-    echo "Server started as process $SERVER_PID"
-}
-
-stop_processes() {
-    if [[ -n $DEVICE_PID ]]; then
-        echo "Stopping device..."
-        kill $DEVICE_PID
-    fi
-    if [[ -n $SERVER_PID ]]; then
-        echo "Stopping server..."
-        kill $SERVER_PID
-    fi
-}
-
-# Trap SIGINT and SIGTERM to stop processes when the script is terminated
-trap stop_processes SIGINT SIGTERM
-
-# SERVER
-# Start server if SERVER_START is True
-if [[ "$SERVER_START" == "True" ]]; then
-    start_server
-fi
-
-# DEVICE
-# Start device if DEVICE_START is True
-if [[ "$DEVICE_START" == "True" ]]; then
-    start_device
-fi
-
-# Wait for device and server processes to exit
-wait $DEVICE_PID
-wait $SERVER_PID
-
-# TTS, STT
-
-# (todo)
-# (i think we should start with hosted services)
-
-# LLM
-
-# (disabled, we'll start with hosted services)
-# python core/llm/start.py &
--- a/OS/README.md
+++ b/OS/README.md
@ -1,81 +0,0 @@
-# New: The 8th Architecture
-
-```
-/01
-    start.sh # entrypoint, runs server, device, llm
-    server.py # uses tts and stt if it must, exposes "/"
-    device.py # also uses tts and stt, hits "/"
-    llm.py # starts an openai-compatible server
-    model.llamafile
-    i.py # creates an interpreter which server just imports
-    tts.py
-    stt.py
-    /conversations
-        user.json
-    /skills # files in here will run in the 01's interpreter
-        schedule.py
-        ...
-```
-
-This is flatter and simpler.
-
-**Device** handles the device — i.e. everything the user interacts + watching the kernel + running code (which produces `computer` LMC messages) if `DEVICE_EXECUTE_CODE` is true. Runs TTS and STT, sends LMC messages to "/".
-
-**Server** serves "/", a websocket that accepts `user` LMC messages and sends back `assistant` LMC messages. Runs code (which produces `computer` LMC messages) if `SERVER_EXECUTE_CODE` is true.
-
-**Llm** starts an OpenAI-compatible server with `model.llamafile`. Downloads a heavily quantized Phi-2 if `model.llamafile` doesn't exist.
-
-**I** creates an `interpreter` object. This is where you configure the 01's behavior.
-
-# What is this?
-
-This is the operating system that powers the 01.
-
-# No, I mean what's this folder?
-
-It's the `diff` between 01OS and Ubuntu.
-
-01OS should be a customized version of Linux. Ubuntu is popular, stable, runs on lots of different hardware. **(open question: Should this be Xubuntu, which is lighter? or something else?)**
-
-We want to _build on_ Ubuntu by customizing the stable branch programatically, not by forking it — which would mean we'd have to maintain the underlying OS, merge in security patches, etc. Yuck.
-
-This folder contains everything we want to change from the base Ubuntu. A folder here represents a folder added/modified at the `root`. You can think of it like the `diff` between 01OS and Ubuntu.
-
-I imagine we'll use something like Cubic to then press this + Ubuntu into an ISO image.
-
-# Setup & Usage
-
-Clone this repo, then run `OS/01/start.sh`.
-
-# Structure
-
-### `start.sh`
-
-The start script's job is to start the `core` and the `app` (in full-screen mode).
-
-### `/core`
-
-The `core`'s job is to:
-
-1. Set up the language model
-2. Set up the interpreter
-3. Serve the interpreter at "/"
-
-### `/app`
-
-The `app`'s job is to be the interface between the user and the interpreter (text in). This could be text only, audio, video, who knows, but it becomes LMC messages or plain text.
-
-For the first version, I think we should just handle audio in/out. So the `app`'s job here is to:
-
-1. Be a fullscreen app for the user to use 01
-2. Turn the user's speech into text and send it to "/"
-3. Turn the interpreter's text into speech and play it for the user
-
-### Changes to Linux
-
-We need to make the following changes:
-
-1. Modify the bootloader to just show white circle on black
-2. Auto start the start script, `start.sh`
-3. Put detectors everywhere, which will put [LMC Messages](https://docs.openinterpreter.com/protocols/lmc-messages) from the computer into `/01/core/queue`. Michael suggested we simply watch and filter the `dmesg` stream (I think that's what it's called?), so I suppose we could have a script like `/01/core/kernel_watcher.py` that puts things into the queue? Honestly knowing we could get it all from one place like that— maybe this should be simpler. Is the queue necessary? How about we just expect the computer to send computer messages to the websocket at `/`? Then yeah, maybe we do have redis there, then instead of looking at that folder, we check the redis queue...
-4. (open question: should we do this? do we want the first 01 to be ready for GUI control?) Make the display that's shown to the user (and filled with the `app`) the _secondary_ display. The primary display will be a normal Ubuntu desktop, invisible to the user. Why? So the interpreter can control the primary display "under the hood".
--- a/README.md
+++ b/README.md
@ -1,6 +1,8 @@
 # ○

-Official repository for [The 01 Project](https://twitter.com/hellokillian/status/1745875973583896950).
+Official pre-release repository for [The 01 Project](https://twitter.com/hellokillian/status/1745875973583896950).
+
+> **11** days remaining until launch

 <br>

@ -8,33 +10,70 @@ Official repository for [The 01 Project](https://twitter.com/hellokillian/status

 <br>

-## Configuration:
-
-Copy the OS/01/.env.example file to OS/01/.env and then configure the environment variables within the file.
-
-## Install Required Libraries:
+## Install dependencies:

 ```bash
 # MacOS
 brew install portaudio ffmpeg

 # Ubuntu
-sudo apt-get install portaudio19-dev libav-tools
+sudo apt-get install portaudio19-dev ffmpeg
+```
+
+## Setup for usage (experimental):
+
+```bash
+pip install 01OS
+```
+
+**Run the 01 end-to-end:**
+
+```bash
+01 # This will run a server + attempt to determine and run a client.
+# (Behavior can be modified by changing the contents of `.env`)
+```
+
+**Expose an 01 server publically:**
+
+```bash
+01 --server --expose # This will print a URL that a client can point to.
+```
+
+**Run a specific client:**
+
+```bash
+01 --client macos # Options: macos, rpi
 ```

+**Run locally:**
+
+The current default uses OpenAI's services.
+
+The `--local` flag will install and run the [whisper.cpp](https://github.com/ggerganov/whisper.cpp) STT and [Piper](https://github.com/rhasspy/piper) TTS models.
+
 ```bash
-python -m pip install -r requirements.txt
+01 --local # Local client and server
+01 --local --server --expose # Expose a local server
 ```
-NB: Depending on your local Python version, you may run into [this issue↗](https://github.com/TaylorSMarks/playsound/issues/150) installing playsound. Workarounds are provided in the issue.

-## Usage
+<br>
+
+## Setup for development:

 ```bash
-cd OS/01
-bash start.sh
+# Clone the repo, cd into the 01OS directory
+git clone https://github.com/KillianLucas/01.git
+cd 01OS
+
+# Install dependencies, run the commands above
+poetry install
+poetry run 01
 ```

-If you want to run local text-to-speech and speech-to-text, set `ALL_LOCAL` in the `start.sh` script to True. This will use the [whisper.cpp](https://github.com/ggerganov/whisper.cpp) and [Piper](https://github.com/rhasspy/piper) models.
+**Configuration:**
+
+Copy the `01OS/.env.example` file to `01OS/.env` then configure the environment variables within the file.
+
 <br>

 ## Background
@ -60,14 +99,3 @@ What we're going to do.
 What the 01 will be able to do.

 <br>
-
-## Project Management
-
-### [Tasks ↗](https://github.com/KillianLucas/01/blob/main/TASKS.md)
-
-Our master task list.
-
-<br>
-
-> **13** days remaining until launch
-
--- a/project_management/communication/TEAM.md
+++ b/project_management/communication/TEAM.md
--- a/project_management/communication/press/TEAM.md
+++ b/project_management/communication/press/TEAM.md
--- a/project_management/experience/TEAM.md
+++ b/project_management/experience/TEAM.md
--- a/project_management/experience/design/TASKS.md
+++ b/project_management/experience/design/TASKS.md
--- a/project_management/experience/research/TASKS.md
+++ b/project_management/experience/research/TASKS.md
--- a/project_management/experience/research/TEAM.md
+++ b/project_management/experience/research/TEAM.md
--- a/project_management/experience/video_and_brand/TEAM.md
+++ b/project_management/experience/video_and_brand/TEAM.md
--- a/project_management/hardware/OPTIONS.md
+++ b/project_management/hardware/OPTIONS.md
--- a/project_management/hardware/TASKS.md
+++ b/project_management/hardware/TASKS.md
--- a/project_management/hardware/TEAM.md
+++ b/project_management/hardware/TEAM.md
--- a/project_management/hardware/devices/jetson-nano/README.md
+++ b/project_management/hardware/devices/jetson-nano/README.md
--- a/project_management/hardware/devices/jetson-nano/mac-share-internet-v2.png
+++ b/project_management/hardware/devices/jetson-nano/mac-share-internet-v2.png
--- a/project_management/hardware/devices/jetson-nano/mac-share-internet.png
+++ b/project_management/hardware/devices/jetson-nano/mac-share-internet.png
--- a/project_management/hardware/devices/raspberry-pi/README.md
+++ b/project_management/hardware/devices/raspberry-pi/README.md
--- a/project_management/hardware/devices/raspberry-pi/button-diagram.png
+++ b/project_management/hardware/devices/raspberry-pi/button-diagram.png
--- a/project_management/meetups/01-20-24.md
+++ b/project_management/meetups/01-20-24.md
--- a/project_management/software/audio/TASKS.md
+++ b/project_management/software/audio/TASKS.md
--- a/project_management/software/audio/TEAM.md
+++ b/project_management/software/audio/TEAM.md
--- a/project_management/software/oi_core/TASKS.md
+++ b/project_management/software/oi_core/TASKS.md
--- a/project_management/software/oi_core/TEAM.md
+++ b/project_management/software/oi_core/TEAM.md
--- a/project_management/software/os/TASKS.md
+++ b/project_management/software/os/TASKS.md
--- a/project_management/software/os/TEAM.md
+++ b/project_management/software/os/TEAM.md