`start.py`, modular architecture, OI flags, mutable items to user dir

10 months ago · 701d357e30
parent 91fcb94438
commit 701d357e30
40 changed files with 689 additions and 205 deletions
--- a/01OS/01OS/clients/base_device.py
+++ b/01OS/01OS/clients/base_device.py
@ -66,6 +66,7 @@ class Device:
        self.pressed_keys = set()
        self.captured_images = []
        self.audiosegments = []
+        self.server_url = ""

    def fetch_image_from_camera(self, camera_index=CAMERA_DEVICE_INDEX):
        """Captures an image from the specified camera device and saves it to a temporary file. Adds the image to the captured_images list."""
@ -303,10 +304,7 @@ class Device:

    async def start_async(self):
            # Configuration for WebSocket
-            WS_URL = os.getenv('SERVER_URL')
-            if not WS_URL:
-                raise ValueError("The environment variable SERVER_URL is not set. Please set it to proceed.")
-
+            WS_URL = f"ws://{self.server_url}"
            # Start the WebSocket communication
            asyncio.create_task(self.websocket_communication(WS_URL))

--- a/01OS/01OS/clients/macos/init.py
+++ b/01OS/01OS/clients/macos/init.py
--- a/01OS/01OS/clients/mac/device.py
+++ b/01OS/01OS/clients/mac/device.py
@ -0,0 +1,10 @@
+from ..base_device import Device
+
+device = Device()
+
+def main(server_url):
+    device.server_url = server_url
+    device.start()
+
+if __name__ == "__main__":
+    main()
--- a/01OS/01OS/clients/macos/device.py
+++ b/01OS/01OS/clients/macos/device.py
@ -1,4 +0,0 @@
-from ..base_device import Device
-
-desktop_device = Device()
-desktop_device.start()
--- a/01OS/01OS/clients/rpi/device.py
+++ b/01OS/01OS/clients/rpi/device.py
@ -1,4 +1,9 @@
 from ..base_device import Device

-rpi_device = Device()
-rpi_device.start()
+device = Device()
+
+def main():
+    device.start()
+
+if __name__ == "__main__":
+    main()
--- a/01OS/01OS/clients/start.sh
+++ b/01OS/01OS/clients/start.sh
@ -1,6 +0,0 @@
-DEVICE=$(uname -n)
-if [[ "$DEVICE" == "rpi" ]]; then
-    python -m 01OS.clients.rpi.device
-else
-    python -m 01OS.clients.macos.device
-fi
--- a/01OS/01OS/server/i.py
+++ b/01OS/01OS/server/i.py
@ -1,6 +1,7 @@
 from dotenv import load_dotenv
 load_dotenv()  # take environment variables from .env.

+from platformdirs import user_data_dir
 import os
 import glob
 import json
@ -36,8 +37,11 @@ def configure_interpreter(interpreter: OpenInterpreter):

    ### RESET conversations/user.json

-    script_dir = os.path.dirname(os.path.abspath(__file__))
-    user_json_path = os.path.join(script_dir, 'conversations', 'user.json')
+    
+    app_dir = user_data_dir('01')
+    conversations_dir = os.path.join(app_dir, 'conversations')
+    os.makedirs(conversations_dir, exist_ok=True)
+    user_json_path = os.path.join(conversations_dir, 'user.json')
    with open(user_json_path, 'w') as file:
        json.dump([], file)

--- a/01OS/01OS/server/server.py
+++ b/01OS/01OS/server/server.py
@ -1,17 +1,17 @@
 from dotenv import load_dotenv
 load_dotenv()  # take environment variables from .env.

+from platformdirs import user_data_dir
 import ast
 import json
 import queue
 import os
 import traceback
+from .utils.bytes_to_wav import bytes_to_wav
 import re
 from fastapi import FastAPI, Request
 from fastapi.responses import PlainTextResponse
 from starlette.websockets import WebSocket, WebSocketDisconnect
-from .stt.stt import stt_bytes
-from .tts.tts import stream_tts
 from pathlib import Path
 import asyncio
 import urllib.parse
@ -28,7 +28,8 @@ accumulator = Accumulator()

 app = FastAPI()

-conversation_history_path = Path(__file__).parent / 'conversations' / 'user.json'
+app_dir = user_data_dir('01')
+conversation_history_path = os.path.join(app_dir, 'conversations', 'user.json')

 SERVER_LOCAL_PORT = int(os.getenv('SERVER_LOCAL_PORT', 8000))

@ -198,7 +199,9 @@ async def listener():
            # Convert bytes to audio file
            # Format will be bytes.wav or bytes.opus
            mime_type = "audio/" + message["format"].split(".")[1]
-            text = stt_bytes(message["content"], mime_type)
+            audio_file_path = bytes_to_wav(message["content"], mime_type)
+            text = stt(audio_file_path)
+            print(text)
            message = {"role": "user", "type": "message", "content": text}

        # At this point, we have only text messages
@ -335,30 +338,77 @@ async def stream_tts_to_device(sentence):
    ]
    if sentence.lower().strip().strip(".!?").strip() in force_task_completion_responses:
        return
+
    for chunk in stream_tts(sentence):
        await to_device.put(chunk)

+def stream_tts(sentence):
+    
+    audio_file = tts(sentence)
+
+    with open(audio_file, "rb") as f:
+        audio_bytes = f.read()
+    os.remove(audio_file)
+
+    file_type = "bytes.raw"
+    chunk_size = 1024
+
+    # Stream the audio
+    yield {"role": "assistant", "type": "audio", "format": file_type, "start": True}
+    for i in range(0, len(audio_bytes), chunk_size):
+        chunk = audio_bytes[i:i+chunk_size]
+        yield chunk
+    yield {"role": "assistant", "type": "audio", "format": file_type, "end": True}
+
 from uvicorn import Config, Server
+import os
+import platform
+from importlib import import_module

-# Run the FastAPI app
-if __name__ == "__main__":
+async def main(server_host, server_port, llm_service, model, llm_supports_vision, llm_supports_functions, context_window, max_tokens, temperature, tts_service, stt_service):
+        
+        # Setup services
+        application_directory = user_data_dir('01')
+        services_directory = os.path.join(application_directory, 'services')

-    async def main():
-        if os.getenv('TEACH_MODE') == "True":
-            teach()
-        else:
-            # Start listening
-            asyncio.create_task(listener())
+        service_dict = {'llm': llm_service, 'tts': tts_service, 'stt': stt_service}

-            # Start watching the kernel if it's your job to do that
-            if os.getenv('CODE_RUNNER') == "server":
-                asyncio.create_task(put_kernel_messages_into_queue(from_computer))
-                
-            # Start the server
-            logger.info("Starting `server.py`... on localhost:" + str(SERVER_LOCAL_PORT))
+        for service in service_dict:

-            config = Config(app, host="localhost", port=SERVER_LOCAL_PORT, lifespan='on')
-            server = Server(config)
-            await server.serve()
+            service_directory = os.path.join(services_directory, service, service_dict[service])

+            # This is the folder they can mess around in
+            config = {"service_directory": service_directory}
+
+            if service == "llm":
+                config.update({
+                    "interpreter": interpreter,
+                    "model": model,
+                    "llm_supports_vision": llm_supports_vision,
+                    "llm_supports_functions": llm_supports_functions,
+                    "context_window": context_window,
+                    "max_tokens": max_tokens,
+                    "temperature": temperature
+                })
+
+            module = import_module(f'.server.services.{service}.{service_dict[service]}.{service}', package='01OS')
+            ServiceClass = getattr(module, service.capitalize())
+            service_instance = ServiceClass(config)
+            globals()[service] = getattr(service_instance, service)
+
+        interpreter.llm.completions = llm
+        
+        # Start listening
+        asyncio.create_task(listener())
+
+        # Start watching the kernel if it's your job to do that
+        if True: # in the future, code can run on device. for now, just server.
+            asyncio.create_task(put_kernel_messages_into_queue(from_computer))
+            
+        config = Config(app, host=server_host, port=int(server_port), lifespan='on')
+        server = Server(config)
+        await server.serve()
+
+# Run the FastAPI app
+if __name__ == "__main__":
    asyncio.run(main())
--- a/01OS/01OS/server/services/init.py
+++ b/01OS/01OS/server/services/init.py
--- a/01OS/01OS/server/services/llm/init.py
+++ b/01OS/01OS/server/services/llm/init.py
--- a/01OS/01OS/server/services/llm/litellm/init.py
+++ b/01OS/01OS/server/services/llm/litellm/init.py
--- a/01OS/01OS/server/services/llm/litellm/llm.py
+++ b/01OS/01OS/server/services/llm/litellm/llm.py
@ -0,0 +1,15 @@
+class Llm:
+    def __init__(self, config):
+
+        # Litellm is used by OI by default, so we just modify OI
+
+        interpreter = config["interpreter"]
+        config.pop("interpreter", None)
+        config.pop("service_directory", None)
+        for key, value in config.items():
+            setattr(interpreter, key.replace("-", "_"), value)
+
+        self.llm = interpreter.llm.completions
+
+        
+
--- a/01OS/01OS/server/services/llm/llamaedge/init.py
+++ b/01OS/01OS/server/services/llm/llamaedge/init.py
--- a/01OS/01OS/server/services/llm/llamaedge/llm.py
+++ b/01OS/01OS/server/services/llm/llamaedge/llm.py
@ -0,0 +1,49 @@
+import os
+import subprocess
+import requests
+import json
+
+class Llm:
+    def __init__(self, config):
+        self.install(config["service_directory"])
+
+    def install(self, service_directory):
+        LLM_FOLDER_PATH = service_directory
+        self.llm_directory = os.path.join(LLM_FOLDER_PATH, 'llm')
+        if not os.path.isdir(self.llm_directory): # Check if the LLM directory exists
+            os.makedirs(LLM_FOLDER_PATH, exist_ok=True)
+
+            # Install WasmEdge
+            subprocess.run(['curl', '-sSf', 'https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install.sh', '|', 'bash', '-s', '--', '--plugin', 'wasi_nn-ggml'])
+
+            # Download the Qwen1.5-0.5B-Chat model GGUF file
+            MODEL_URL = "https://huggingface.co/second-state/Qwen1.5-0.5B-Chat-GGUF/resolve/main/Qwen1.5-0.5B-Chat-Q5_K_M.gguf"
+            subprocess.run(['curl', '-LO', MODEL_URL], cwd=self.llm_directory)
+            
+            # Download the llama-api-server.wasm app
+            APP_URL = "https://github.com/LlamaEdge/LlamaEdge/releases/latest/download/llama-api-server.wasm"
+            subprocess.run(['curl', '-LO', APP_URL], cwd=self.llm_directory)
+
+            # Run the API server
+            subprocess.run(['wasmedge', '--dir', '.:.', '--nn-preload', 'default:GGML:AUTO:Qwen1.5-0.5B-Chat-Q5_K_M.gguf', 'llama-api-server.wasm', '-p', 'llama-2-chat'], cwd=self.llm_directory)
+
+            print("LLM setup completed.")
+        else:
+            print("LLM already set up. Skipping download.")
+
+    def llm(self, messages):
+        url = "http://localhost:8080/v1/chat/completions"
+        headers = {
+            'accept': 'application/json',
+            'Content-Type': 'application/json'
+        }
+        data = {
+            "messages": messages,
+            "model": "llama-2-chat"
+        }
+        with requests.post(url, headers=headers, data=json.dumps(data), stream=True) as response:
+            for line in response.iter_lines():
+                if line:
+                    yield json.loads(line)
+
+
--- a/01OS/01OS/server/services/llm/llamafile/init.py
+++ b/01OS/01OS/server/services/llm/llamafile/init.py
--- a/01OS/01OS/server/services/llm/llamafile/llm.py
+++ b/01OS/01OS/server/services/llm/llamafile/llm.py
@ -0,0 +1,84 @@
+import os
+import platform
+import subprocess
+import time
+import wget
+import stat
+
+class Llm:
+    def __init__(self, config):
+
+        self.interpreter = config["interpreter"]
+        config.pop("interpreter", None)
+        
+        self.install(config["service_directory"])
+
+        config.pop("service_directory", None)
+        for key, value in config.items():
+            setattr(self.interpreter, key.replace("-", "_"), value)
+
+        self.llm = self.interpreter.llm.completions
+
+    def install(self, service_directory):
+
+        if platform.system() == "Darwin": # Check if the system is MacOS
+            result = subprocess.run(
+                ["xcode-select", "-p"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT
+            )
+            if result.returncode != 0:
+                print(
+                    "Llamafile requires Mac users to have Xcode installed. You can install Xcode from https://developer.apple.com/xcode/ .\n\nAlternatively, you can use `LM Studio`, `Jan.ai`, or `Ollama` to manage local language models. Learn more at https://docs.openinterpreter.com/guides/running-locally ."
+                )
+                time.sleep(3)
+                raise Exception("Xcode is not installed. Please install Xcode and try again.")
+
+        # Define the path to the models directory
+        models_dir = os.path.join(service_directory, "models")
+
+        # Check and create the models directory if it doesn't exist
+        if not os.path.exists(models_dir):
+            os.makedirs(models_dir)
+
+        # Define the path to the new llamafile
+        llamafile_path = os.path.join(models_dir, "phi-2.Q4_K_M.llamafile")
+
+        # Check if the new llamafile exists, if not download it
+        if not os.path.exists(llamafile_path):
+            print(
+                "Attempting to download the `Phi-2` language model. This may take a few minutes."
+            )
+            time.sleep(3)
+            
+            url = "https://huggingface.co/jartine/phi-2-llamafile/resolve/main/phi-2.Q4_K_M.llamafile"
+            wget.download(url, llamafile_path)
+
+        
+
+        # Make the new llamafile executable
+        if platform.system() != "Windows":
+            st = os.stat(llamafile_path)
+            os.chmod(llamafile_path, st.st_mode | stat.S_IEXEC)
+
+        # Run the new llamafile in the background
+        if os.path.exists(llamafile_path):
+            try:
+                # Test if the llamafile is executable
+                subprocess.check_call([llamafile_path])
+            except subprocess.CalledProcessError:
+                print("The llamafile is not executable. Please check the file permissions.")
+                raise
+            subprocess.Popen([llamafile_path, "-ngl", "9999"])
+        else:
+            error_message = "The llamafile does not exist or is corrupted. Please ensure it has been downloaded correctly or try again."
+            print(error_message)
+            print(error_message)
+
+        self.interpreter.system_message = "You are Open Interpreter, a world-class programmer that can execute code on the user's machine."
+        self.interpreter.offline = True
+
+        self.interpreter.llm.model = "local"
+        self.interpreter.llm.temperature = 0
+        self.interpreter.llm.api_base = "https://localhost:8080/v1"
+        self.interpreter.llm.max_tokens = 1000
+        self.interpreter.llm.context_window = 3000
+        self.interpreter.llm.supports_functions = False
--- a/01OS/01OS/server/services/stt/init.py
+++ b/01OS/01OS/server/services/stt/init.py
--- a/01OS/01OS/server/services/stt/local-whisper/init.py
+++ b/01OS/01OS/server/services/stt/local-whisper/init.py
--- a/01OS/01OS/server/services/stt/local-whisper/stt.py
+++ b/01OS/01OS/server/services/stt/local-whisper/stt.py
@ -0,0 +1,151 @@
+"""
+Defines a function which takes a path to an audio file and turns it into text.
+"""
+
+from datetime import datetime
+import os
+import contextlib
+import tempfile
+import shutil
+import ffmpeg
+import subprocess
+
+import os
+import subprocess
+
+
+class Stt:
+    def __init__(self, config):
+        service_directory = config["service_directory"]
+        install(service_directory)
+
+    def stt(self, audio_file_path):
+        return stt(audio_file_path)
+
+
+
+def install(service_dir):
+
+    ### INSTALL
+
+    WHISPER_RUST_PATH = os.path.join(service_dir, "whisper-rust")
+    script_dir = os.path.dirname(os.path.realpath(__file__))
+    source_whisper_rust_path = os.path.join(script_dir, "whisper-rust")
+    if not os.path.exists(source_whisper_rust_path):
+        print(f"Source directory does not exist: {source_whisper_rust_path}")
+        exit(1)
+    if not os.path.exists(WHISPER_RUST_PATH):
+        shutil.copytree(source_whisper_rust_path, WHISPER_RUST_PATH)
+
+    os.chdir(WHISPER_RUST_PATH)
+
+    # Check if whisper-rust executable exists before attempting to build
+    if not os.path.isfile(os.path.join(WHISPER_RUST_PATH, "target/release/whisper-rust")):
+        # Check if Rust is installed. Needed to build whisper executable
+        rust_check = subprocess.call('command -v rustc', shell=True)
+        if rust_check != 0:
+            print("Rust is not installed or is not in system PATH. Please install Rust before proceeding.")
+            exit(1)
+        
+        # Build Whisper Rust executable if not found
+        subprocess.call('cargo build --release', shell=True)
+    else:
+        print("Whisper Rust executable already exists. Skipping build.")
+
+    WHISPER_MODEL_PATH = os.path.join(service_dir, "model")
+    
+    WHISPER_MODEL_NAME = os.getenv('WHISPER_MODEL_NAME', 'ggml-tiny.en.bin')
+    WHISPER_MODEL_URL = os.getenv('WHISPER_MODEL_URL', 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/')
+    
+    if not os.path.isfile(os.path.join(WHISPER_MODEL_PATH, WHISPER_MODEL_NAME)):
+        os.makedirs(WHISPER_MODEL_PATH, exist_ok=True)
+        subprocess.call(f'curl -L "{WHISPER_MODEL_URL}{WHISPER_MODEL_NAME}" -o "{os.path.join(WHISPER_MODEL_PATH, WHISPER_MODEL_NAME)}"', shell=True)
+    else:
+        print("Whisper model already exists. Skipping download.")
+
+def convert_mime_type_to_format(mime_type: str) -> str:
+    if mime_type == "audio/x-wav" or mime_type == "audio/wav":
+        return "wav"
+    if mime_type == "audio/webm":
+        return "webm"
+    if mime_type == "audio/raw":
+        return "dat"
+
+    return mime_type
+
+@contextlib.contextmanager
+def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
+    temp_dir = tempfile.gettempdir()
+
+    # Create a temporary file with the appropriate extension
+    input_ext = convert_mime_type_to_format(mime_type)
+    input_path = os.path.join(temp_dir, f"input_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.{input_ext}")
+    with open(input_path, 'wb') as f:
+        f.write(audio)
+
+    # Check if the input file exists
+    assert os.path.exists(input_path), f"Input file does not exist: {input_path}"
+
+    # Export to wav
+    output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
+    print(mime_type, input_path, output_path)
+    if mime_type == "audio/raw":
+        ffmpeg.input(
+            input_path,
+            f='s16le',
+            ar='16000',
+            ac=1,
+        ).output(output_path).run()
+    else:
+        ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
+
+    try:
+        yield output_path
+    finally:
+        os.remove(input_path)
+        os.remove(output_path)
+
+def run_command(command):
+    result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+    return result.stdout, result.stderr
+
+def get_transcription_file(wav_file_path: str):
+    local_path = os.path.join(os.path.dirname(__file__), 'model')
+    whisper_rust_path = os.path.join(os.path.dirname(__file__), 'whisper-rust', 'target', 'release')
+    model_name = os.getenv('WHISPER_MODEL_NAME')
+    if not model_name:
+        raise EnvironmentError("WHISPER_MODEL_NAME environment variable is not set.")
+
+    output, error = run_command([
+        os.path.join(whisper_rust_path, 'whisper-rust'),
+        '--model-path', os.path.join(local_path, model_name),
+        '--file-path', wav_file_path
+    ])
+
+    return output
+
+def get_transcription_bytes(audio_bytes: bytearray, mime_type):
+    with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path:
+        return get_transcription_file(wav_file_path)
+
+def stt_bytes(audio_bytes: bytearray, mime_type="audio/wav"):
+    with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path:
+        return stt_wav(wav_file_path)
+
+def stt_wav(wav_file_path: str):
+        temp_dir = tempfile.gettempdir()
+        output_path = os.path.join(temp_dir, f"output_stt_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
+        ffmpeg.input(wav_file_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
+        try:
+            transcript = get_transcription_file(output_path)
+        finally:
+            os.remove(output_path)
+        return transcript
+
+def stt(input_data, mime_type="audio/wav"):
+    if isinstance(input_data, str):
+        return stt_wav(input_data)
+    elif isinstance(input_data, bytearray):
+        return stt_bytes(input_data, mime_type)
+    else:
+        raise ValueError("Input data should be either a path to a wav file (str) or audio bytes (bytearray)")
--- a/01OS/01OS/server/services/stt/local-whisper/whisper-rust/.gitignore
+++ b/01OS/01OS/server/services/stt/local-whisper/whisper-rust/.gitignore
--- a/01OS/01OS/server/services/stt/local-whisper/whisper-rust/Cargo.lock
+++ b/01OS/01OS/server/services/stt/local-whisper/whisper-rust/Cargo.lock
--- a/01OS/01OS/server/services/stt/local-whisper/whisper-rust/Cargo.toml
+++ b/01OS/01OS/server/services/stt/local-whisper/whisper-rust/Cargo.toml
--- a/01OS/01OS/server/services/stt/local-whisper/whisper-rust/src/main.rs
+++ b/01OS/01OS/server/services/stt/local-whisper/whisper-rust/src/main.rs
--- a/01OS/01OS/server/services/stt/local-whisper/whisper-rust/src/transcribe.rs
+++ b/01OS/01OS/server/services/stt/local-whisper/whisper-rust/src/transcribe.rs
--- a/01OS/01OS/server/services/stt/openai/init.py
+++ b/01OS/01OS/server/services/stt/openai/init.py
--- a/01OS/01OS/server/services/stt/openai/stt.py
+++ b/01OS/01OS/server/services/stt/openai/stt.py
@ -1,9 +1,11 @@
-"""
-Defines a function which takes a path to an audio file and turns it into text.
-"""
+class Stt:
+    def __init__(self, config):
+        pass
+
+    def stt(self, audio_file_path):
+        return stt(audio_file_path)
+

-from dotenv import load_dotenv
-load_dotenv()  # take environment variables from .env.

 from datetime import datetime
 import os
@ -14,9 +16,6 @@ import subprocess
 import openai
 from openai import OpenAI

-from ..utils.logs import setup_logging
-from ..utils.logs import logger
-setup_logging()

 client = OpenAI()

@ -91,28 +90,18 @@ def stt_bytes(audio_bytes: bytearray, mime_type="audio/wav"):

 def stt_wav(wav_file_path: str):

-    if os.getenv('ALL_LOCAL') == 'False':
-        audio_file = open(wav_file_path, "rb")
-        try:
-            transcript = client.audio.transcriptions.create(
-                model="whisper-1", 
-                file=audio_file,
-                response_format="text"
-            )
-        except openai.BadRequestError as e:
-            logger.info(f"openai.BadRequestError: {e}")
-            return None
-
-        return transcript
-    else:
-        temp_dir = tempfile.gettempdir()
-        output_path = os.path.join(temp_dir, f"output_stt_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
-        ffmpeg.input(wav_file_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
-        try:
-            transcript = get_transcription_file(output_path)
-        finally:
-            os.remove(output_path)
-        return transcript
+    audio_file = open(wav_file_path, "rb")
+    try:
+        transcript = client.audio.transcriptions.create(
+            model="whisper-1", 
+            file=audio_file,
+            response_format="text"
+        )
+    except openai.BadRequestError as e:
+        print(f"openai.BadRequestError: {e}")
+        return None
+
+    return transcript

 def stt(input_data, mime_type="audio/wav"):
    if isinstance(input_data, str):
--- a/01OS/01OS/server/services/tts/init.py
+++ b/01OS/01OS/server/services/tts/init.py
--- a/01OS/01OS/server/services/tts/openai/init.py
+++ b/01OS/01OS/server/services/tts/openai/init.py
--- a/01OS/01OS/server/services/tts/openai/tts.py
+++ b/01OS/01OS/server/services/tts/openai/tts.py
@ -0,0 +1,30 @@
+import ffmpeg
+import tempfile
+from openai import OpenAI
+import os
+import subprocess
+import tempfile
+
+client = OpenAI()
+
+class Tts:
+    def __init__(self, config):
+        pass
+
+    def tts(self, text):
+            response = client.audio.speech.create(
+                model="tts-1",
+                voice="alloy",
+                input=text,
+                response_format="opus"
+            )
+            with tempfile.NamedTemporaryFile(suffix=".opus", delete=False) as temp_file:
+                response.stream_to_file(temp_file.name)
+
+                # TODO: hack to format audio correctly for device
+                outfile = tempfile.gettempdir() + "/" + "raw.dat"
+                ffmpeg.input(temp_file.name).output(outfile, f="s16le", ar="16000", ac="1").run()
+
+                return outfile
+
+
--- a/01OS/01OS/server/services/tts/piper/init.py
+++ b/01OS/01OS/server/services/tts/piper/init.py
--- a/01OS/01OS/server/services/tts/piper/tts.py
+++ b/01OS/01OS/server/services/tts/piper/tts.py
@ -0,0 +1,84 @@
+import ffmpeg
+import tempfile
+import os
+import subprocess
+import tempfile
+import urllib.request
+import tarfile
+
+class Tts:
+    def __init__(self, config):
+        self.piper_directory = ""
+        self.install(config["service_directory"])
+
+    def tts(self, text):
+        
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
+            output_file = temp_file.name
+            piper_dir = self.piper_directory
+            subprocess.run([
+                os.path.join(piper_dir, 'piper'),
+                '--model', os.path.join(piper_dir, os.getenv('PIPER_VOICE_NAME', 'en_US-lessac-medium.onnx')),
+                '--output_file', output_file
+            ], input=text, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+
+            # TODO: hack to format audio correctly for device
+            outfile = tempfile.gettempdir() + "/" + "raw.dat"
+            ffmpeg.input(temp_file.name).output(outfile, f="s16le", ar="16000", ac="1").run()
+
+            return outfile
+
+    def install(self, service_directory):
+        PIPER_FOLDER_PATH = service_directory
+        self.piper_directory = os.path.join(PIPER_FOLDER_PATH, 'piper')
+        if not os.path.isdir(self.piper_directory): # Check if the Piper directory exists
+            os.makedirs(PIPER_FOLDER_PATH, exist_ok=True)
+
+            # Determine OS and architecture
+            OS = os.uname().sysname
+            ARCH = os.uname().machine
+            if OS == "Darwin":
+                OS = "macos"
+                if ARCH == "arm64":
+                    ARCH = "aarch64"
+                elif ARCH == "x86_64":
+                    ARCH = "x64"
+                else:
+                    print("Piper: unsupported architecture")
+                    return
+
+            PIPER_ASSETNAME = f"piper_{OS}_{ARCH}.tar.gz"
+            PIPER_URL = "https://github.com/rhasspy/piper/releases/latest/download/"
+
+            # Download and extract Piper
+            urllib.request.urlretrieve(f"{PIPER_URL}{PIPER_ASSETNAME}", os.path.join(PIPER_FOLDER_PATH, PIPER_ASSETNAME))
+            with tarfile.open(os.path.join(PIPER_FOLDER_PATH, PIPER_ASSETNAME), 'r:gz') as tar:
+                tar.extractall(path=PIPER_FOLDER_PATH)
+
+            PIPER_VOICE_URL = os.getenv('PIPER_VOICE_URL', 'https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/')
+            PIPER_VOICE_NAME = os.getenv('PIPER_VOICE_NAME', 'en_US-lessac-medium.onnx')
+
+            # Download voice model and its json file
+            urllib.request.urlretrieve(f"{PIPER_VOICE_URL}{PIPER_VOICE_NAME}", os.path.join(self.piper_directory, PIPER_VOICE_NAME))
+            urllib.request.urlretrieve(f"{PIPER_VOICE_URL}{PIPER_VOICE_NAME}.json", os.path.join(self.piper_directory, f"{PIPER_VOICE_NAME}.json"))
+
+            # Additional setup for macOS
+            if OS == "macos":
+                if ARCH == "x64":
+                    subprocess.run(['softwareupdate', '--install-rosetta', '--agree-to-license'])
+
+                PIPER_PHONEMIZE_ASSETNAME = f"piper-phonemize_{OS}_{ARCH}.tar.gz"
+                PIPER_PHONEMIZE_URL = "https://github.com/rhasspy/piper-phonemize/releases/latest/download/"
+                urllib.request.urlretrieve(f"{PIPER_PHONEMIZE_URL}{PIPER_PHONEMIZE_ASSETNAME}", os.path.join(self.piper_directory, PIPER_PHONEMIZE_ASSETNAME))
+
+                with tarfile.open(os.path.join(self.piper_directory, PIPER_PHONEMIZE_ASSETNAME), 'r:gz') as tar:
+                    tar.extractall(path=self.piper_directory)
+
+                PIPER_DIR = self.piper_directory
+                subprocess.run(['install_name_tool', '-change', '@rpath/libespeak-ng.1.dylib', f"{PIPER_DIR}/piper-phonemize/lib/libespeak-ng.1.dylib", f"{PIPER_DIR}/piper"])
+                subprocess.run(['install_name_tool', '-change', '@rpath/libonnxruntime.1.14.1.dylib', f"{PIPER_DIR}/piper-phonemize/lib/libonnxruntime.1.14.1.dylib", f"{PIPER_DIR}/piper"])
+                subprocess.run(['install_name_tool', '-change', '@rpath/libpiper_phonemize.1.dylib', f"{PIPER_DIR}/piper-phonemize/lib/libpiper_phonemize.1.dylib", f"{PIPER_DIR}/piper"])
+
+            print("Piper setup completed.")
+        else:
+            print("Piper already set up. Skipping download.")
--- a/01OS/01OS/server/tts/tts.py
+++ b/01OS/01OS/server/tts/tts.py
@ -1,98 +0,0 @@
-"""
-Defines a function which takes text and returns a path to an audio file.
-"""
-
-from pydub import AudioSegment
-from dotenv import load_dotenv
-load_dotenv()  # take environment variables from .env.
-
-import ffmpeg
-import tempfile
-from openai import OpenAI
-import os
-import subprocess
-import tempfile
-from pydub import AudioSegment
-
-client = OpenAI()
-
-chunk_size = 1024
-
-def stream_tts(text):
-    """
-    A generator that streams tts as LMC messages.
-    """
-    if os.getenv('ALL_LOCAL') == 'False':
-        response = client.audio.speech.create(
-            model="tts-1",
-            voice="alloy",
-            input=text,
-            response_format="opus"
-        )
-        with tempfile.NamedTemporaryFile(suffix=".opus", delete=False) as temp_file:
-            response.stream_to_file(temp_file.name)
-
-            # TODO: hack to format audio correctly for device
-            outfile = tempfile.gettempdir() + "/" + "raw.dat"
-            ffmpeg.input(temp_file.name).output(outfile, f="s16le", ar="16000", ac="1").run()
-            with open(outfile, "rb") as f:
-                audio_bytes = f.read()
-            file_type = "bytes.raw"
-            print(outfile, len(audio_bytes))
-            os.remove(outfile)
-
-    else:
-        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
-            output_file = temp_file.name
-            piper_dir = os.path.join(os.path.dirname(__file__), 'local_service', 'piper')
-            subprocess.run([
-                os.path.join(piper_dir, 'piper'),
-                '--model', os.path.join(piper_dir, os.getenv('PIPER_VOICE_NAME')),
-                '--output_file', output_file
-            ], input=text, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
-
-            # TODO: hack to format audio correctly for device
-            outfile = tempfile.gettempdir() + "/" + "raw.dat"
-            ffmpeg.input(temp_file.name).output(outfile, f="s16le", ar="16000", ac="1").run()
-            with open(outfile, "rb") as f:
-                audio_bytes = f.read()
-            file_type = "bytes.raw"
-            print(outfile, len(audio_bytes))
-            os.remove(outfile)
-
-    # Stream the audio
-    yield {"role": "assistant", "type": "audio", "format": file_type, "start": True}
-    for i in range(0, len(audio_bytes), chunk_size):
-        chunk = audio_bytes[i:i+chunk_size]
-        yield chunk
-    yield {"role": "assistant", "type": "audio", "format": file_type, "end": True}
-
-def play_audiosegment(audio):
-    """
-    UNUSED
-    the default makes some pops. this fixes that
-    """
-
-    # Apply a fade-out (optional but recommended to smooth the end)
-    audio = audio.fade_out(500)
-
-    # Add silence at the end
-    silence_duration_ms = 500  # Duration of silence in milliseconds
-    silence = AudioSegment.silent(duration=silence_duration_ms)
-    audio_with_padding = audio + silence
-
-    # Save the modified audio as a WAV file for compatibility with simpleaudio
-    audio_with_padding.export("output_audio.wav", format="wav")
-
-    # Load the processed WAV file
-    wave_obj = sa.WaveObject.from_wave_file("output_audio.wav")
-
-    # Play the audio
-    play_obj = wave_obj.play()
-
-    # Wait for the playback to finish
-    play_obj.wait_done()
-
-    # Delete the wav file
-    os.remove("output_audio.wav")
-
--- a/01OS/01OS/server/utils/bytes_to_wav.py
+++ b/01OS/01OS/server/utils/bytes_to_wav.py
@ -0,0 +1,57 @@
+from datetime import datetime
+import os
+import contextlib
+import tempfile
+import ffmpeg
+import subprocess
+
+def convert_mime_type_to_format(mime_type: str) -> str:
+    if mime_type == "audio/x-wav" or mime_type == "audio/wav":
+        return "wav"
+    if mime_type == "audio/webm":
+        return "webm"
+    if mime_type == "audio/raw":
+        return "dat"
+
+    return mime_type
+
+@contextlib.contextmanager
+def export_audio_to_wav_ffmpeg(audio: bytearray, mime_type: str) -> str:
+    temp_dir = tempfile.gettempdir()
+
+    # Create a temporary file with the appropriate extension
+    input_ext = convert_mime_type_to_format(mime_type)
+    input_path = os.path.join(temp_dir, f"input_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.{input_ext}")
+    with open(input_path, 'wb') as f:
+        f.write(audio)
+
+    # Check if the input file exists
+    assert os.path.exists(input_path), f"Input file does not exist: {input_path}"
+
+    # Export to wav
+    output_path = os.path.join(temp_dir, f"output_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav")
+    print(mime_type, input_path, output_path)
+    if mime_type == "audio/raw":
+        ffmpeg.input(
+            input_path,
+            f='s16le',
+            ar='16000',
+            ac=1,
+        ).output(output_path).run()
+    else:
+        ffmpeg.input(input_path).output(output_path, acodec='pcm_s16le', ac=1, ar='16k').run()
+
+    try:
+        yield output_path
+    finally:
+        os.remove(input_path)
+        os.remove(output_path)
+
+def run_command(command):
+    result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+    return result.stdout, result.stderr
+
+
+def bytes_to_wav(audio_bytes: bytearray, mime_type):
+    with export_audio_to_wav_ffmpeg(audio_bytes, mime_type) as wav_file_path:
+        return wav_file_path
--- a/01OS/01OS/start.py
+++ b/01OS/01OS/start.py
@ -0,0 +1,95 @@
+import typer
+import asyncio
+import platform
+import concurrent.futures
+import threading
+import os
+import signal
+import importlib
+
+app = typer.Typer()
+
+@app.command()
+def run(
+            server: bool = typer.Option(False, "--server", help="Run server"),
+            server_host: str = typer.Option("0.0.0.0", "--server-host", help="Specify the server host where the server will deploy"),
+            server_port: int = typer.Option(8000, "--server-port", help="Specify the server port where the server will deploy"),
+            
+            tunnel_service: str = typer.Option("bore", "--tunnel-service", help="Specify the tunnel service"),
+            expose: bool = typer.Option(False, "--expose", help="Expose server to internet"),
+            
+            client: bool = typer.Option(False, "--client", help="Run client"),
+            server_url: str = typer.Option(None, "--server-url", help="Specify the server URL that the client should expect. Defaults to server-host and server-port"),
+            client_type: str = typer.Option("auto", "--client-type", help="Specify the client type"),
+            
+            llm_service: str = typer.Option("litellm", "--llm-service", help="Specify the LLM service"),
+            
+            model: str = typer.Option("gpt-4", "--model", help="Specify the model"),
+            llm_supports_vision: bool = typer.Option(False, "--llm-supports-vision", help="Specify if the LLM service supports vision"),
+            llm_supports_functions: bool = typer.Option(False, "--llm-supports-functions", help="Specify if the LLM service supports functions"),
+            context_window: int = typer.Option(2048, "--context-window", help="Specify the context window size"),
+            max_tokens: int = typer.Option(4096, "--max-tokens", help="Specify the maximum number of tokens"),
+            temperature: float = typer.Option(0.8, "--temperature", help="Specify the temperature for generation"),
+            
+            tts_service: str = typer.Option("openai", "--tts-service", help="Specify the TTS service"),
+            
+            stt_service: str = typer.Option("openai", "--stt-service", help="Specify the STT service"),
+
+            local: bool = typer.Option(False, "--local", help="Use recommended local services for LLM, STT, and TTS"),
+        ):
+    
+    if local:
+        tts_service = "piper"
+        llm_service = "llamafile"
+        stt_service = "local-whisper"
+    
+    if not server_url:
+        server_url = f"{server_host}:{server_port}"
+    
+    if not server and not client:
+        server = True
+        client = True
+
+    def handle_exit(signum, frame):
+        os._exit(0)
+
+    signal.signal(signal.SIGINT, handle_exit)
+
+    if server:
+        from .server.server import main
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+        server_thread = threading.Thread(target=loop.run_until_complete, args=(main(server_host, server_port, llm_service, model, llm_supports_vision, llm_supports_functions, context_window, max_tokens, temperature, tts_service, stt_service),))
+        server_thread.start()
+
+    if expose:
+        #tunnel_thread = threading.Thread(target=tunnel_service, args=[server_port])
+        #tunnel_thread.start()
+        tunnel_thread = threading.Thread(target=os.system, args=("./tunnel.sh",))
+        tunnel_thread.start()
+
+    if client:
+        if client_type == "auto":
+            system_type = platform.system()
+            if system_type == "Darwin":  # Mac OS
+                client_type = "mac"
+            elif system_type == "Linux":  # Linux System
+                try:
+                    with open('/proc/device-tree/model', 'r') as m:
+                        if 'raspberry pi' in m.read().lower():
+                            client_type = "rpi"
+                        else:
+                            client_type = "linux"
+                except FileNotFoundError:
+                    client_type = "linux"
+
+        module = importlib.import_module(f".clients.{client_type}.device", package='01OS')
+        client_thread = threading.Thread(target=module.main, args=[server_url])
+        client_thread.start()
+
+    try:
+        server_thread.join()
+        tunnel_thread.join()
+        client_thread.join()
+    except KeyboardInterrupt:
+        os.kill(os.getpid(), signal.SIGINT)
--- a/01OS/README.md
+++ b/01OS/README.md
@ -5,5 +5,5 @@ pip install 01OS
 ```

 ```bash
-01 # Runs the 01 server and client.
+01 # Runs the 01 server and client
 ```
--- a/01OS/poetry.lock
+++ b/01OS/poetry.lock
@ -8400,4 +8400,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9,<3.12"
-content-hash = "4e7112e334cb1610550bcc44ab5f0a257621d774513c24034d60272b741caf51"
+content-hash = "f582fa2573961a7bca4df34f7bf62bcbda856e57697f5e3daad6603ce2bc0589"
--- a/01OS/pyproject.toml
+++ b/01OS/pyproject.toml
@ -27,13 +27,15 @@ simpleaudio = "^1.0.4"
 opencv-python = "^4.9.0.80"
 open-interpreter = {version = "0.2.1rc1", extras = ["os"]}
 psutil = "^5.9.8"
+typer = "^0.9.0"
+platformdirs = "^4.2.0"

 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"

 [tool.poetry.scripts]
-01 = "start:main"
+01 = "01OS.start:app"

 [tool.poetry.group.dev.dependencies]
 black = "^23.10.1"
--- a/01OS/start.py
+++ b/01OS/start.py
@ -1,31 +0,0 @@
-"""
-This is just for the Python package — we need a Python entrypoint.
-Just starts `start.sh` with all the same command line arguments. Aliased to 01.
-"""
-
-import os
-import subprocess
-import sys
-import psutil
-import importlib
-# Can't import normally because it starts with a number
-process_utils = importlib.import_module("01OS.server.utils.process_utils")
-kill_process_tree = process_utils.kill_process_tree
-
-def main():
-
-    # Get command line arguments
-    args = sys.argv[1:]
-
-    # Get the directory of the current script
-    dir_path = os.path.dirname(os.path.realpath(__file__))
-
-    # Prepare the command
-    command = [os.path.join(dir_path, 'start.sh')] + args
-
-    try:
-        # Start start.sh using psutil for better process management, and to kill all processes
-        psutil.Popen(command)
-    except KeyboardInterrupt:
-        print("Exiting...")
-        kill_process_tree()
--- a/README.md
+++ b/README.md
@ -28,11 +28,10 @@ If you want to run local speech-to-text using Whisper, install Rust. Follow the
 pip install 01OS
 ```

-**Run the 01 end-to-end:**
+**Run the 01:**

 ```bash
-01 # This will run a server + attempt to determine and run a client.
-# (Behavior can be modified by changing the contents of `.env`)
+01 # This will run the server and attempt to determine and run a client.
 ```

 **Expose an 01 Server Publicly**
@ -40,6 +39,7 @@ pip install 01OS
 We currently support exposing the 01 server publicly via a couple of different tunnel services:

 - **bore.pub** ([GitHub](https://github.com/ekzhang/bore))
+
  - **Requirements:** Ensure that Rust is installed ([Rust Installation](https://www.rust-lang.org/tools/install)), then run:
    ```
    cargo install bore-cli
@ -50,6 +50,7 @@ We currently support exposing the 01 server publicly via a couple of different t
    ```

 - **localtunnel** ([GitHub](https://github.com/localtunnel/localtunnel))
+
  - **Requirements:** Ensure that Node.js is installed ([Node.js Download](https://nodejs.org/en/download)), then run:
    ```
    npm install -g localtunnel
@ -69,7 +70,6 @@ We currently support exposing the 01 server publicly via a couple of different t
    01 --server --expose-with-ngrok
    ```

-
 **Run a specific client:**

 ```bash
--- a/archive/start.sh
+++ b/archive/start.sh