`start.py`, modular architecture, OI flags, mutable items to user dir

2 years ago · 701d357e30
parent 91fcb94438
commit 701d357e30
40 changed files with 689 additions and 205 deletions
--- a/01OS/01OS/clients/base_device.py
+++ b/01OS/01OS/clients/base_device.py
@ -66,6 +66,7 @@ class Device:
        self.pressed_keys = set()
        self.captured_images = []
        self.audiosegments = []
        self.server_url = ""
    def fetch_image_from_camera(self, camera_index=CAMERA_DEVICE_INDEX):
        """Captures an image from the specified camera device and saves it to a temporary file. Adds the image to the captured_images list."""
@ -303,10 +304,7 @@ class Device:
    async def start_async(self):
            # Configuration for WebSocket
-            WS_URL = os.getenv('SERVER_URL')
+            WS_URL = f"ws://{self.server_url}"
            if not WS_URL:
                raise ValueError("The environment variable SERVER_URL is not set. Please set it to proceed.")
            # Start the WebSocket communication
            asyncio.create_task(self.websocket_communication(WS_URL))
--- a/01OS/01OS/clients/macos/init.py
+++ b/01OS/01OS/clients/macos/init.py
--- a/01OS/01OS/clients/mac/device.py
+++ b/01OS/01OS/clients/mac/device.py
@ -0,0 +1,10 @@
 from ..base_device import Device
 device = Device()
 def main(server_url):
    device.server_url = server_url
    device.start()
 if __name__ == "__main__":
    main()
--- a/01OS/01OS/clients/macos/device.py
+++ b/01OS/01OS/clients/macos/device.py
@ -1,4 +0,0 @@
 from ..base_device import Device
 desktop_device = Device()
 desktop_device.start()
--- a/01OS/01OS/clients/rpi/device.py
+++ b/01OS/01OS/clients/rpi/device.py
@ -1,4 +1,9 @@
 from ..base_device import Device
-rpi_device = Device()
+device = Device()
-rpi_device.start()
+
 def main():
    device.start()
 if __name__ == "__main__":
    main()
--- a/01OS/01OS/clients/start.sh
+++ b/01OS/01OS/clients/start.sh
@ -1,6 +0,0 @@
 DEVICE=$(uname -n)
 if [[ "$DEVICE" == "rpi" ]]; then
    python -m 01OS.clients.rpi.device
 else
    python -m 01OS.clients.macos.device
 fi
--- a/01OS/01OS/server/i.py
+++ b/01OS/01OS/server/i.py
@ -1,6 +1,7 @@
 from dotenv import load_dotenv
 load_dotenv()  # take environment variables from .env.
 from platformdirs import user_data_dir
 import os
 import glob
 import json
@ -36,8 +37,11 @@ def configure_interpreter(interpreter: OpenInterpreter):
    ### RESET conversations/user.json
-    script_dir = os.path.dirname(os.path.abspath(__file__))
+    
-    user_json_path = os.path.join(script_dir, 'conversations', 'user.json')
+    app_dir = user_data_dir('01')
    conversations_dir = os.path.join(app_dir, 'conversations')
    os.makedirs(conversations_dir, exist_ok=True)
    user_json_path = os.path.join(conversations_dir, 'user.json')
    with open(user_json_path, 'w') as file:
        json.dump([], file)
--- a/01OS/01OS/server/server.py
+++ b/01OS/01OS/server/server.py
@ -1,17 +1,17 @@
 from dotenv import load_dotenv
 load_dotenv()  # take environment variables from .env.
 from platformdirs import user_data_dir
 import ast
 import json
 import queue
 import os
 import traceback
 from .utils.bytes_to_wav import bytes_to_wav
 import re
 from fastapi import FastAPI, Request
 from fastapi.responses import PlainTextResponse
 from starlette.websockets import WebSocket, WebSocketDisconnect
 from .stt.stt import stt_bytes
 from .tts.tts import stream_tts
 from pathlib import Path
 import asyncio
 import urllib.parse
@ -28,7 +28,8 @@ accumulator = Accumulator()
 app = FastAPI()
-conversation_history_path = Path(__file__).parent / 'conversations' / 'user.json'
+app_dir = user_data_dir('01')
 conversation_history_path = os.path.join(app_dir, 'conversations', 'user.json')
 SERVER_LOCAL_PORT = int(os.getenv('SERVER_LOCAL_PORT', 8000))
@ -198,7 +199,9 @@ async def listener():
            # Convert bytes to audio file
            # Format will be bytes.wav or bytes.opus
            mime_type = "audio/" + message["format"].split(".")[1]
-            text = stt_bytes(message["content"], mime_type)
+            audio_file_path = bytes_to_wav(message["content"], mime_type)
            text = stt(audio_file_path)
            print(text)
            message = {"role": "user", "type": "message", "content": text}
        # At this point, we have only text messages
@ -335,30 +338,77 @@ async def stream_tts_to_device(sentence):
    ]
    if sentence.lower().strip().strip(".!?").strip() in force_task_completion_responses:
        return
    for chunk in stream_tts(sentence):
        await to_device.put(chunk)
 def stream_tts(sentence):
    audio_file = tts(sentence)
    with open(audio_file, "rb") as f:
        audio_bytes = f.read()
    os.remove(audio_file)
    file_type = "bytes.raw"
    chunk_size = 1024
    # Stream the audio
    yield {"role": "assistant", "type": "audio", "format": file_type, "start": True}
    for i in range(0, len(audio_bytes), chunk_size):
        chunk = audio_bytes[i:i+chunk_size]
        yield chunk
    yield {"role": "assistant", "type": "audio", "format": file_type, "end": True}
 from uvicorn import Config, Server
 import os
 import platform
 from importlib import import_module
-# Run the FastAPI app
+async def main(server_host, server_port, llm_service, model, llm_supports_vision, llm_supports_functions, context_window, max_tokens, temperature, tts_service, stt_service):
-if __name__ == "__main__":
+        
        # Setup services
        application_directory = user_data_dir('01')
        services_directory = os.path.join(application_directory, 'services')
        service_dict = {'llm': llm_service, 'tts': tts_service, 'stt': stt_service}
        for service in service_dict:
            service_directory = os.path.join(services_directory, service, service_dict[service])
            # This is the folder they can mess around in
            config = {"service_directory": service_directory}
            if service == "llm":
                config.update({
                    "interpreter": interpreter,
                    "model": model,
                    "llm_supports_vision": llm_supports_vision,
                    "llm_supports_functions": llm_supports_functions,
                    "context_window": context_window,
                    "max_tokens": max_tokens,
                    "temperature": temperature
                })
            module = import_module(f'.server.services.{service}.{service_dict[service]}.{service}', package='01OS')
            ServiceClass = getattr(module, service.capitalize())
            service_instance = ServiceClass(config)
            globals()[service] = getattr(service_instance, service)
        interpreter.llm.completions = llm
    async def main():
        if os.getenv('TEACH_MODE') == "True":
            teach()
        else:
        # Start listening
        asyncio.create_task(listener())
        # Start watching the kernel if it's your job to do that
-            if os.getenv('CODE_RUNNER') == "server":
+        if True: # in the future, code can run on device. for now, just server.
            asyncio.create_task(put_kernel_messages_into_queue(from_computer))
-            # Start the server
+        config = Config(app, host=server_host, port=int(server_port), lifespan='on')
            logger.info("Starting `server.py`... on localhost:" + str(SERVER_LOCAL_PORT))
            config = Config(app, host="localhost", port=SERVER_LOCAL_PORT, lifespan='on')
        server = Server(config)
        await server.serve()
 # Run the FastAPI app
 if __name__ == "__main__":
    asyncio.run(main())
--- a/01OS/01OS/server/services/init.py
+++ b/01OS/01OS/server/services/init.py
--- a/01OS/01OS/server/services/llm/init.py
+++ b/01OS/01OS/server/services/llm/init.py
--- a/01OS/01OS/server/services/llm/litellm/init.py
+++ b/01OS/01OS/server/services/llm/litellm/init.py
--- a/01OS/01OS/server/services/llm/litellm/llm.py
+++ b/01OS/01OS/server/services/llm/litellm/llm.py
@ -0,0 +1,15 @@
 class Llm:
    def __init__(self, config):
        # Litellm is used by OI by default, so we just modify OI
        interpreter = config["interpreter"]
        config.pop("interpreter", None)
        config.pop("service_directory", None)
        for key, value in config.items():
            setattr(interpreter, key.replace("-", "_"), value)
        self.llm = interpreter.llm.completions
--- a/01OS/01OS/server/services/llm/llamaedge/init.py
+++ b/01OS/01OS/server/services/llm/llamaedge/init.py
--- a/01OS/01OS/server/services/llm/llamaedge/llm.py
+++ b/01OS/01OS/server/services/llm/llamaedge/llm.py
@ -0,0 +1,49 @@
 import os
 import subprocess
 import requests
 import json
 class Llm:
    def __init__(self, config):
        self.install(config["service_directory"])
    def install(self, service_directory):
        LLM_FOLDER_PATH = service_directory
        self.llm_directory = os.path.join(LLM_FOLDER_PATH, 'llm')
        if not os.path.isdir(self.llm_directory): # Check if the LLM directory exists
            os.makedirs(LLM_FOLDER_PATH, exist_ok=True)
            # Install WasmEdge
            subprocess.run(['curl', '-sSf', 'https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install.sh', '|', 'bash', '-s', '--', '--plugin', 'wasi_nn-ggml'])
            # Download the Qwen1.5-0.5B-Chat model GGUF file
            MODEL_URL = "https://huggingface.co/second-state/Qwen1.5-0.5B-Chat-GGUF/resolve/main/Qwen1.5-0.5B-Chat-Q5_K_M.gguf"
            subprocess.run(['curl', '-LO', MODEL_URL], cwd=self.llm_directory)
            # Download the llama-api-server.wasm app
            APP_URL = "https://github.com/LlamaEdge/LlamaEdge/releases/latest/download/llama-api-server.wasm"
            subprocess.run(['curl', '-LO', APP_URL], cwd=self.llm_directory)
            # Run the API server
            subprocess.run(['wasmedge', '--dir', '.:.', '--nn-preload', 'default:GGML:AUTO:Qwen1.5-0.5B-Chat-Q5_K_M.gguf', 'llama-api-server.wasm', '-p', 'llama-2-chat'], cwd=self.llm_directory)
            print("LLM setup completed.")
        else:
            print("LLM already set up. Skipping download.")
    def llm(self, messages):
        url = "http://localhost:8080/v1/chat/completions"
        headers = {
            'accept': 'application/json',
            'Content-Type': 'application/json'
        }
        data = {
            "messages": messages,
            "model": "llama-2-chat"
        }
        with requests.post(url, headers=headers, data=json.dumps(data), stream=True) as response:
            for line in response.iter_lines():
                if line:
                    yield json.loads(line)
--- a/01OS/01OS/server/services/llm/llamafile/init.py
+++ b/01OS/01OS/server/services/llm/llamafile/init.py
--- a/01OS/01OS/server/services/llm/llamafile/llm.py
+++ b/01OS/01OS/server/services/llm/llamafile/llm.py
@ -0,0 +1,84 @@
 import os
 import platform
 import subprocess
 import time
 import wget
 import stat
 class Llm:
    def __init__(self, config):
        self.interpreter = config["interpreter"]
        config.pop("interpreter", None)
        self.install(config["service_directory"])
        config.pop("service_directory", None)
        for key, value in config.items():
            setattr(self.interpreter, key.replace("-", "_"), value)
        self.llm = self.interpreter.llm.completions
    def install(self, service_directory):
        if platform.system() == "Darwin": # Check if the system is MacOS
            result = subprocess.run(
                ["xcode-select", "-p"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT
            )
            if result.returncode != 0:
                print(
                    "Llamafile requires Mac users to have Xcode installed. You can install Xcode from https://developer.apple.com/xcode/ .\n\nAlternatively, you can use `LM Studio`, `Jan.ai`, or `Ollama` to manage local language models. Learn more at https://docs.openinterpreter.com/guides/running-locally ."
                )
                time.sleep(3)
                raise Exception("Xcode is not installed. Please install Xcode and try again.")
        # Define the path to the models directory
        models_dir = os.path.join(service_directory, "models")
        # Check and create the models directory if it doesn't exist
        if not os.path.exists(models_dir):
            os.makedirs(models_dir)
        # Define the path to the new llamafile
        llamafile_path = os.path.join(models_dir, "phi-2.Q4_K_M.llamafile")
        # Check if the new llamafile exists, if not download it
        if not os.path.exists(llamafile_path):
            print(
                "Attempting to download the `Phi-2` language model. This may take a few minutes."
            )
            time.sleep(3)
            url = "https://huggingface.co/jartine/phi-2-llamafile/resolve/main/phi-2.Q4_K_M.llamafile"
            wget.download(url, llamafile_path)
        # Make the new llamafile executable
        if platform.system() != "Windows":
            st = os.stat(llamafile_path)
            os.chmod(llamafile_path, st.st_mode | stat.S_IEXEC)
        # Run the new llamafile in the background
        if os.path.exists(llamafile_path):
            try:
                # Test if the llamafile is executable
                subprocess.check_call([llamafile_path])
            except subprocess.CalledProcessError:
                print("The llamafile is not executable. Please check the file permissions.")
                raise
            subprocess.Popen([llamafile_path, "-ngl", "9999"])
        else:
            error_message = "The llamafile does not exist or is corrupted. Please ensure it has been downloaded correctly or try again."
            print(error_message)
            print(error_message)
        self.interpreter.system_message = "You are Open Interpreter, a world-class programmer that can execute code on the user's machine."
        self.interpreter.offline = True
        self.interpreter.llm.model = "local"
        self.interpreter.llm.temperature = 0
        self.interpreter.llm.api_base = "https://localhost:8080/v1"
        self.interpreter.llm.max_tokens = 1000
        self.interpreter.llm.context_window = 3000
        self.interpreter.llm.supports_functions = False
--- a/01OS/01OS/server/services/stt/init.py
+++ b/01OS/01OS/server/services/stt/init.py
--- a/01OS/01OS/server/services/stt/local-whisper/init.py
+++ b/01OS/01OS/server/services/stt/local-whisper/init.py
--- a/01OS/01OS/server/services/stt/local-whisper/stt.py
+++ b/01OS/01OS/server/services/stt/local-whisper/stt.py
@ -0,0 +1,151 @@
 """
 Defines a function which takes a path to an audio file and turns it into text.
 """
 from datetime import datetime
 import os
 import contextlib
 import tempfile
 import shutil
 import ffmpeg
 import subprocess
 import os
 import subprocess
 class Stt:
    def __init__(self, config):
        service_directory = config["service_directory"]
        install(service_directory)
    def stt(self, audio_file_path):
        return stt(audio_file_path)
 def install(service_dir):
    ### INSTALL
    WHISPER_RUST_PATH = os.path.join(service_dir, "whisper-rust")
    script_dir = os.path.dirname(os.path.realpath(__file__))
    source_whisper_rust_path = os.path.join(script_dir, "whisper-rust")
    if not os.path.exists(source_whisper_rust_path):
        print(f"Source directory does not exist: {source_whisper_rust_path}")
        exit(1)
    if not os.path.exists(WHISPER_RUST_PATH):
        shutil.copytree(source_whisper_rust_path, WHISPER_RUST_PATH)
    os.chdir(WHISPER_RUST_PATH)
    # Check if whisper-rust executable exists before attempting to build
    if not os.path.isfile(os.path.join(WHISPER_RUST_PATH, "target/release/whisper-rust")):
        # Check if Rust is installed. Needed to build whisper executable
        rust_check = subprocess.call('command -v rustc', shell=True)
        if rust_check != 0:
            print("Rust is not installed or is not in system PATH. Please install Rust before proceeding.")
            exit(1)
        # Build Whisper Rust executable if not found
        subprocess.call('cargo build --release', shell=True)
    else:
        print("Whisper Rust executable already exists. Skipping build.")
    WHISPER_MODEL_PATH = os.path.join(service_dir, "model")
    WHISPER_MODEL_NAME = os.getenv('WHISPER_MODEL_NAME', 'ggml-tiny.en.bin')
    WHISPER_MODEL_URL = os.getenv('WHISPER_MODEL_URL', 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/')
    if not os.path.isfile(os.path.join(WHISPER_MODEL_PATH, WHISPER_MODEL_NAME)):
        os.makedirs(WHISPER_MODEL_PATH, exist_ok=True)
        subprocess.call(f'curl -L "{WHISPER_MODEL_URL}{WHISPER_MODEL_NAME}" -o "{os.path.join(WHISPER_MODEL_PATH, WHISPER_MODEL_NAME)}"', shell=True)
    else:
        print("Whisper model already exists. Skipping download.")
 def convert_mime_type_to_format(mime_type: str) -> str:
    if mime_type == "audio/x-wav" or mime_type == "audio/wav":
        return "wav"
    if mime_type == "audio/webm":
        return "webm"
    if mime_type == "audio/raw":
        return "dat"
    return mime_type
@contextlib.contextmanager
 def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
    temp_dir = tempfile.gettempdir()
    # Create a temporary file with the appropriate extension
    input_ext = convert_mime_type_to_format(mime_type)
    input_path = os.path.join(temp_dir, f"input_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.{input_ext}")
    with open(input_path, 'wb') as f:
        f.write(audio)
    # Check if the input file exists
    assert os.path.exists(input_path), f"Input file does not exist: {input_path}"
    # Export to wav
    output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
    print(mime_type, input_path, output_path)
    if mime_type == "audio/raw":
        ffmpeg.input(
            input_path,
            f='s16le',
            ar='16000',
            ac=1,
        ).output(output_path).run()
    else:
        ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
    try:
        yield output_path
    finally:
        os.remove(input_path)
        os.remove(output_path)
 def run_command(command):
    result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
    return result.stdout, result.stderr
 def get_transcription_file(wav_file_path: str):
    local_path = os.path.join(os.path.dirname(__file__), 'model')
    whisper_rust_path = os.path.join(os.path.dirname(__file__), 'whisper-rust', 'target', 'release')
    model_name = os.getenv('WHISPER_MODEL_NAME')
    if not model_name:
        raise EnvironmentError("WHISPER_MODEL_NAME environment variable is not set.")
    output, error = run_command([
        os.path.join(whisper_rust_path, 'whisper-rust'),
        '--model-path', os.path.join(local_path, model_name),
        '--file-path', wav_file_path
    ])
    return output
 def get_transcription_bytes(audio_bytes: bytearray, mime_type):
    with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path:
        return get_transcription_file(wav_file_path)
 def stt_bytes(audio_bytes: bytearray, mime_type="audio/wav"):
    with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path:
        return stt_wav(wav_file_path)
 def stt_wav(wav_file_path: str):
        temp_dir = tempfile.gettempdir()
        output_path = os.path.join(temp_dir, f"output_stt_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
        ffmpeg.input(wav_file_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
        try:
            transcript = get_transcription_file(output_path)
        finally:
            os.remove(output_path)
        return transcript
 def stt(input_data, mime_type="audio/wav"):
    if isinstance(input_data, str):
        return stt_wav(input_data)
    elif isinstance(input_data, bytearray):
        return stt_bytes(input_data, mime_type)
    else:
        raise ValueError("Input data should be either a path to a wav file (str) or audio bytes (bytearray)")
--- a/01OS/01OS/server/services/stt/local-whisper/whisper-rust/.gitignore
+++ b/01OS/01OS/server/services/stt/local-whisper/whisper-rust/.gitignore
--- a/01OS/01OS/server/services/stt/local-whisper/whisper-rust/Cargo.lock
+++ b/01OS/01OS/server/services/stt/local-whisper/whisper-rust/Cargo.lock
--- a/01OS/01OS/server/services/stt/local-whisper/whisper-rust/Cargo.toml
+++ b/01OS/01OS/server/services/stt/local-whisper/whisper-rust/Cargo.toml
--- a/01OS/01OS/server/services/stt/local-whisper/whisper-rust/src/main.rs
+++ b/01OS/01OS/server/services/stt/local-whisper/whisper-rust/src/main.rs
--- a/01OS/01OS/server/services/stt/local-whisper/whisper-rust/src/transcribe.rs
+++ b/01OS/01OS/server/services/stt/local-whisper/whisper-rust/src/transcribe.rs
--- a/01OS/01OS/server/services/stt/openai/init.py
+++ b/01OS/01OS/server/services/stt/openai/init.py
--- a/01OS/01OS/server/services/stt/openai/stt.py
+++ b/01OS/01OS/server/services/stt/openai/stt.py
@ -1,9 +1,11 @@
-"""
+class Stt:
-Defines a function which takes a path to an audio file and turns it into text.
+    def __init__(self, config):
-"""
+        pass
    def stt(self, audio_file_path):
        return stt(audio_file_path)
 from dotenv import load_dotenv
 load_dotenv()  # take environment variables from .env.
 from datetime import datetime
 import os
@ -14,9 +16,6 @@ import subprocess
 import openai
 from openai import OpenAI
 from ..utils.logs import setup_logging
 from ..utils.logs import logger
 setup_logging()
 client = OpenAI()
@ -91,7 +90,6 @@ def stt_bytes(audio_bytes: bytearray, mime_type="audio/wav"):
 def stt_wav(wav_file_path: str):
    if os.getenv('ALL_LOCAL') == 'False':
    audio_file = open(wav_file_path, "rb")
    try:
        transcript = client.audio.transcriptions.create(
@ -100,19 +98,10 @@ def stt_wav(wav_file_path: str):
            response_format="text"
        )
    except openai.BadRequestError as e:
-            logger.info(f"openai.BadRequestError: {e}")
+        print(f"openai.BadRequestError: {e}")
        return None
    return transcript
    else:
        temp_dir = tempfile.gettempdir()
        output_path = os.path.join(temp_dir, f"output_stt_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
        ffmpeg.input(wav_file_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
        try:
            transcript = get_transcription_file(output_path)
        finally:
            os.remove(output_path)
        return transcript
 def stt(input_data, mime_type="audio/wav"):
    if isinstance(input_data, str):
--- a/01OS/01OS/server/services/tts/init.py
+++ b/01OS/01OS/server/services/tts/init.py
--- a/01OS/01OS/server/services/tts/openai/init.py
+++ b/01OS/01OS/server/services/tts/openai/init.py
--- a/01OS/01OS/server/services/tts/openai/tts.py
+++ b/01OS/01OS/server/services/tts/openai/tts.py
@ -0,0 +1,30 @@
 import ffmpeg
 import tempfile
 from openai import OpenAI
 import os
 import subprocess
 import tempfile
 client = OpenAI()
 class Tts:
    def __init__(self, config):
        pass
    def tts(self, text):
            response = client.audio.speech.create(
                model="tts-1",
                voice="alloy",
                input=text,
                response_format="opus"
            )
            with tempfile.NamedTemporaryFile(suffix=".opus", delete=False) as temp_file:
                response.stream_to_file(temp_file.name)
                # TODO: hack to format audio correctly for device
                outfile = tempfile.gettempdir() + "/" + "raw.dat"
                ffmpeg.input(temp_file.name).output(outfile, f="s16le", ar="16000", ac="1").run()
                return outfile
--- a/01OS/01OS/server/services/tts/piper/init.py
+++ b/01OS/01OS/server/services/tts/piper/init.py
--- a/01OS/01OS/server/services/tts/piper/tts.py
+++ b/01OS/01OS/server/services/tts/piper/tts.py
@ -0,0 +1,84 @@
 import ffmpeg
 import tempfile
 import os
 import subprocess
 import tempfile
 import urllib.request
 import tarfile
 class Tts:
    def __init__(self, config):
        self.piper_directory = ""
        self.install(config["service_directory"])
    def tts(self, text):
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
            output_file = temp_file.name
            piper_dir = self.piper_directory
            subprocess.run([
                os.path.join(piper_dir, 'piper'),
                '--model', os.path.join(piper_dir, os.getenv('PIPER_VOICE_NAME', 'en_US-lessac-medium.onnx')),
                '--output_file', output_file
            ], input=text, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
            # TODO: hack to format audio correctly for device
            outfile = tempfile.gettempdir() + "/" + "raw.dat"
            ffmpeg.input(temp_file.name).output(outfile, f="s16le", ar="16000", ac="1").run()
            return outfile
    def install(self, service_directory):
        PIPER_FOLDER_PATH = service_directory
        self.piper_directory = os.path.join(PIPER_FOLDER_PATH, 'piper')
        if not os.path.isdir(self.piper_directory): # Check if the Piper directory exists
            os.makedirs(PIPER_FOLDER_PATH, exist_ok=True)
            # Determine OS and architecture
            OS = os.uname().sysname
            ARCH = os.uname().machine
            if OS == "Darwin":
                OS = "macos"
                if ARCH == "arm64":
                    ARCH = "aarch64"
                elif ARCH == "x86_64":
                    ARCH = "x64"
                else:
                    print("Piper: unsupported architecture")
                    return
            PIPER_ASSETNAME = f"piper_{OS}_{ARCH}.tar.gz"
            PIPER_URL = "https://github.com/rhasspy/piper/releases/latest/download/"
            # Download and extract Piper
            urllib.request.urlretrieve(f"{PIPER_URL}{PIPER_ASSETNAME}", os.path.join(PIPER_FOLDER_PATH, PIPER_ASSETNAME))
            with tarfile.open(os.path.join(PIPER_FOLDER_PATH, PIPER_ASSETNAME), 'r:gz') as tar:
                tar.extractall(path=PIPER_FOLDER_PATH)
            PIPER_VOICE_URL = os.getenv('PIPER_VOICE_URL', 'https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/')
            PIPER_VOICE_NAME = os.getenv('PIPER_VOICE_NAME', 'en_US-lessac-medium.onnx')
            # Download voice model and its json file
            urllib.request.urlretrieve(f"{PIPER_VOICE_URL}{PIPER_VOICE_NAME}", os.path.join(self.piper_directory, PIPER_VOICE_NAME))
            urllib.request.urlretrieve(f"{PIPER_VOICE_URL}{PIPER_VOICE_NAME}.json", os.path.join(self.piper_directory, f"{PIPER_VOICE_NAME}.json"))
            # Additional setup for macOS
            if OS == "macos":
                if ARCH == "x64":
                    subprocess.run(['softwareupdate', '--install-rosetta', '--agree-to-license'])
                PIPER_PHONEMIZE_ASSETNAME = f"piper-phonemize_{OS}_{ARCH}.tar.gz"
                PIPER_PHONEMIZE_URL = "https://github.com/rhasspy/piper-phonemize/releases/latest/download/"
                urllib.request.urlretrieve(f"{PIPER_PHONEMIZE_URL}{PIPER_PHONEMIZE_ASSETNAME}", os.path.join(self.piper_directory, PIPER_PHONEMIZE_ASSETNAME))
                with tarfile.open(os.path.join(self.piper_directory, PIPER_PHONEMIZE_ASSETNAME), 'r:gz') as tar:
                    tar.extractall(path=self.piper_directory)
                PIPER_DIR = self.piper_directory
                subprocess.run(['install_name_tool', '-change', '@rpath/libespeak-ng.1.dylib', f"{PIPER_DIR}/piper-phonemize/lib/libespeak-ng.1.dylib", f"{PIPER_DIR}/piper"])
                subprocess.run(['install_name_tool', '-change', '@rpath/libonnxruntime.1.14.1.dylib', f"{PIPER_DIR}/piper-phonemize/lib/libonnxruntime.1.14.1.dylib", f"{PIPER_DIR}/piper"])
                subprocess.run(['install_name_tool', '-change', '@rpath/libpiper_phonemize.1.dylib', f"{PIPER_DIR}/piper-phonemize/lib/libpiper_phonemize.1.dylib", f"{PIPER_DIR}/piper"])
            print("Piper setup completed.")
        else:
            print("Piper already set up. Skipping download.")
--- a/01OS/01OS/server/tts/tts.py
+++ b/01OS/01OS/server/tts/tts.py
@ -1,98 +0,0 @@
 """
 Defines a function which takes text and returns a path to an audio file.
 """
 from pydub import AudioSegment
 from dotenv import load_dotenv
 load_dotenv()  # take environment variables from .env.
 import ffmpeg
 import tempfile
 from openai import OpenAI
 import os
 import subprocess
 import tempfile
 from pydub import AudioSegment
 client = OpenAI()
 chunk_size = 1024
 def stream_tts(text):
    """
    A generator that streams tts as LMC messages.
    """
    if os.getenv('ALL_LOCAL') == 'False':
        response = client.audio.speech.create(
            model="tts-1",
            voice="alloy",
            input=text,
            response_format="opus"
        )
        with tempfile.NamedTemporaryFile(suffix=".opus", delete=False) as temp_file:
            response.stream_to_file(temp_file.name)
            # TODO: hack to format audio correctly for device
            outfile = tempfile.gettempdir() + "/" + "raw.dat"
            ffmpeg.input(temp_file.name).output(outfile, f="s16le", ar="16000", ac="1").run()
            with open(outfile, "rb") as f:
                audio_bytes = f.read()
            file_type = "bytes.raw"
            print(outfile, len(audio_bytes))
            os.remove(outfile)
    else:
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
            output_file = temp_file.name
            piper_dir = os.path.join(os.path.dirname(__file__), 'local_service', 'piper')
            subprocess.run([
                os.path.join(piper_dir, 'piper'),
                '--model', os.path.join(piper_dir, os.getenv('PIPER_VOICE_NAME')),
                '--output_file', output_file
            ], input=text, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
            # TODO: hack to format audio correctly for device
            outfile = tempfile.gettempdir() + "/" + "raw.dat"
            ffmpeg.input(temp_file.name).output(outfile, f="s16le", ar="16000", ac="1").run()
            with open(outfile, "rb") as f:
                audio_bytes = f.read()
            file_type = "bytes.raw"
            print(outfile, len(audio_bytes))
            os.remove(outfile)
    # Stream the audio
    yield {"role": "assistant", "type": "audio", "format": file_type, "start": True}
    for i in range(0, len(audio_bytes), chunk_size):
        chunk = audio_bytes[i:i+chunk_size]
        yield chunk
    yield {"role": "assistant", "type": "audio", "format": file_type, "end": True}
 def play_audiosegment(audio):
    """
    UNUSED
    the default makes some pops. this fixes that
    """
    # Apply a fade-out (optional but recommended to smooth the end)
    audio = audio.fade_out(500)
    # Add silence at the end
    silence_duration_ms = 500  # Duration of silence in milliseconds
    silence = AudioSegment.silent(duration=silence_duration_ms)
    audio_with_padding = audio + silence
    # Save the modified audio as a WAV file for compatibility with simpleaudio
    audio_with_padding.export("output_audio.wav", format="wav")
    # Load the processed WAV file
    wave_obj = sa.WaveObject.from_wave_file("output_audio.wav")
    # Play the audio
    play_obj = wave_obj.play()
    # Wait for the playback to finish
    play_obj.wait_done()
    # Delete the wav file
    os.remove("output_audio.wav")
--- a/01OS/01OS/server/utils/bytes_to_wav.py
+++ b/01OS/01OS/server/utils/bytes_to_wav.py
@ -0,0 +1,57 @@
 from datetime import datetime
 import os
 import contextlib
 import tempfile
 import ffmpeg
 import subprocess
 def convert_mime_type_to_format(mime_type: str) -> str:
    if mime_type == "audio/x-wav" or mime_type == "audio/wav":
        return "wav"
    if mime_type == "audio/webm":
        return "webm"
    if mime_type == "audio/raw":
        return "dat"
    return mime_type
@contextlib.contextmanager
 def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
    temp_dir = tempfile.gettempdir()
    # Create a temporary file with the appropriate extension
    input_ext = convert_mime_type_to_format(mime_type)
    input_path = os.path.join(temp_dir, f"input_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.{input_ext}")
    with open(input_path, 'wb') as f:
        f.write(audio)
    # Check if the input file exists
    assert os.path.exists(input_path), f"Input file does not exist: {input_path}"
    # Export to wav
    output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
    print(mime_type, input_path, output_path)
    if mime_type == "audio/raw":
        ffmpeg.input(
            input_path,
            f='s16le',
            ar='16000',
            ac=1,
        ).output(output_path).run()
    else:
        ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
    try:
        yield output_path
    finally:
        os.remove(input_path)
        os.remove(output_path)
 def run_command(command):
    result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
    return result.stdout, result.stderr
 def bytes_to_wav(audio_bytes: bytearray, mime_type):
    with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path:
        return wav_file_path
--- a/01OS/01OS/start.py
+++ b/01OS/01OS/start.py
@ -0,0 +1,95 @@
 import typer
 import asyncio
 import platform
 import concurrent.futures
 import threading
 import os
 import signal
 import importlib
 app = typer.Typer()
@app.command()
 def run(
            server: bool = typer.Option(False, "--server", help="Run server"),
            server_host: str = typer.Option("0.0.0.0", "--server-host", help="Specify the server host where the server will deploy"),
            server_port: int = typer.Option(8000, "--server-port", help="Specify the server port where the server will deploy"),
            tunnel_service: str = typer.Option("bore", "--tunnel-service", help="Specify the tunnel service"),
            expose: bool = typer.Option(False, "--expose", help="Expose server to internet"),
            client: bool = typer.Option(False, "--client", help="Run client"),
            server_url: str = typer.Option(None, "--server-url", help="Specify the server URL that the client should expect. Defaults to server-host and server-port"),
            client_type: str = typer.Option("auto", "--client-type", help="Specify the client type"),
            llm_service: str = typer.Option("litellm", "--llm-service", help="Specify the LLM service"),
            model: str = typer.Option("gpt-4", "--model", help="Specify the model"),
            llm_supports_vision: bool = typer.Option(False, "--llm-supports-vision", help="Specify if the LLM service supports vision"),
            llm_supports_functions: bool = typer.Option(False, "--llm-supports-functions", help="Specify if the LLM service supports functions"),
            context_window: int = typer.Option(2048, "--context-window", help="Specify the context window size"),
            max_tokens: int = typer.Option(4096, "--max-tokens", help="Specify the maximum number of tokens"),
            temperature: float = typer.Option(0.8, "--temperature", help="Specify the temperature for generation"),
            tts_service: str = typer.Option("openai", "--tts-service", help="Specify the TTS service"),
            stt_service: str = typer.Option("openai", "--stt-service", help="Specify the STT service"),
            local: bool = typer.Option(False, "--local", help="Use recommended local services for LLM, STT, and TTS"),
        ):
    if local:
        tts_service = "piper"
        llm_service = "llamafile"
        stt_service = "local-whisper"
    if not server_url:
        server_url = f"{server_host}:{server_port}"
    if not server and not client:
        server = True
        client = True
    def handle_exit(signum, frame):
        os._exit(0)
    signal.signal(signal.SIGINT, handle_exit)
    if server:
        from .server.server import main
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
        server_thread = threading.Thread(target=loop.run_until_complete, args=(main(server_host, server_port, llm_service, model, llm_supports_vision, llm_supports_functions, context_window, max_tokens, temperature, tts_service, stt_service),))
        server_thread.start()
    if expose:
        #tunnel_thread = threading.Thread(target=tunnel_service, args=[server_port])
        #tunnel_thread.start()
        tunnel_thread = threading.Thread(target=os.system, args=("./tunnel.sh",))
        tunnel_thread.start()
    if client:
        if client_type == "auto":
            system_type = platform.system()
            if system_type == "Darwin":  # Mac OS
                client_type = "mac"
            elif system_type == "Linux":  # Linux System
                try:
                    with open('/proc/device-tree/model', 'r') as m:
                        if 'raspberry pi' in m.read().lower():
                            client_type = "rpi"
                        else:
                            client_type = "linux"
                except FileNotFoundError:
                    client_type = "linux"
        module = importlib.import_module(f".clients.{client_type}.device", package='01OS')
        client_thread = threading.Thread(target=module.main, args=[server_url])
        client_thread.start()
    try:
        server_thread.join()
        tunnel_thread.join()
        client_thread.join()
    except KeyboardInterrupt:
        os.kill(os.getpid(), signal.SIGINT)
--- a/01OS/README.md
+++ b/01OS/README.md
@ -5,5 +5,5 @@ pip install 01OS
 ```
 ```bash
-01 # Runs the 01 server and client.
+01 # Runs the 01 server and client
 ```
--- a/01OS/poetry.lock
+++ b/01OS/poetry.lock
@ -8400,4 +8400,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9,<3.12"
-content-hash = "4e7112e334cb1610550bcc44ab5f0a257621d774513c24034d60272b741caf51"
+content-hash = "f582fa2573961a7bca4df34f7bf62bcbda856e57697f5e3daad6603ce2bc0589"
--- a/01OS/pyproject.toml
+++ b/01OS/pyproject.toml
@ -27,13 +27,15 @@ simpleaudio = "^1.0.4"
 opencv-python = "^4.9.0.80"
 open-interpreter = {version = "0.2.1rc1", extras = ["os"]}
 psutil = "^5.9.8"
 typer = "^0.9.0"
 platformdirs = "^4.2.0"
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
 [tool.poetry.scripts]
-01 = "start:main"
+01 = "01OS.start:app"
 [tool.poetry.group.dev.dependencies]
 black = "^23.10.1"
--- a/01OS/start.py
+++ b/01OS/start.py
@ -1,31 +0,0 @@
 """
 This is just for the Python package — we need a Python entrypoint.
 Just starts `start.sh` with all the same command line arguments. Aliased to 01.
 """
 import os
 import subprocess
 import sys
 import psutil
 import importlib
 # Can't import normally because it starts with a number
 process_utils = importlib.import_module("01OS.server.utils.process_utils")
 kill_process_tree = process_utils.kill_process_tree
 def main():
    # Get command line arguments
    args = sys.argv[1:]
    # Get the directory of the current script
    dir_path = os.path.dirname(os.path.realpath(__file__))
    # Prepare the command
    command = [os.path.join(dir_path, 'start.sh')] + args
    try:
        # Start start.sh using psutil for better process management, and to kill all processes
        psutil.Popen(command)
    except KeyboardInterrupt:
        print("Exiting...")
        kill_process_tree()
--- a/README.md
+++ b/README.md
@ -28,11 +28,10 @@ If you want to run local speech-to-text using Whisper, install Rust. Follow the
 pip install 01OS
 ```
-**Run the 01 end-to-end:**
+**Run the 01:**
 ```bash
-01 # This will run a server + attempt to determine and run a client.
+01 # This will run the server and attempt to determine and run a client.
 # (Behavior can be modified by changing the contents of `.env`)
 ```
 **Expose an 01 Server Publicly**
@ -40,6 +39,7 @@ pip install 01OS
 We currently support exposing the 01 server publicly via a couple of different tunnel services:
 - **bore.pub** ([GitHub](https://github.com/ekzhang/bore))
  - **Requirements:** Ensure that Rust is installed ([Rust Installation](https://www.rust-lang.org/tools/install)), then run:
    ```
    cargo install bore-cli
@ -50,6 +50,7 @@ We currently support exposing the 01 server publicly via a couple of different t
    ```
 - **localtunnel** ([GitHub](https://github.com/localtunnel/localtunnel))
  - **Requirements:** Ensure that Node.js is installed ([Node.js Download](https://nodejs.org/en/download)), then run:
    ```
    npm install -g localtunnel
@ -69,7 +70,6 @@ We currently support exposing the 01 server publicly via a couple of different t
    01 --server --expose-with-ngrok
    ```
 **Run a specific client:**
 ```bash
--- a/archive/start.sh
+++ b/archive/start.sh