Use official OI server. 3 second latency.

9 months ago · 4640b4f1a0
parent fef311e5b3
commit 4640b4f1a0
12 changed files with 4676 additions and 2179 deletions
--- a/.gitignore
+++ b/.gitignore
@ -7,6 +7,7 @@ __pycache__/
 software/source/server/conversations/user.json
 software/source/server/tts/local_service/*
 software/source/server/stt/local_service/*
 software/models/*
 # C extensions
 *.so
--- a/software/poetry.lock
+++ b/software/poetry.lock
--- a/software/pyproject.toml
+++ b/software/pyproject.toml
@ -14,8 +14,6 @@ readme = "../README.md"
 python = ">=3.9,<3.12"
 pyaudio = "^0.2.14"
 pynput = "^1.7.6"
 fastapi = "^0.110.0"
 uvicorn = "^0.27.1"
 websockets = "^12.0"
 python-dotenv = "^1.0.1"
 ffmpeg-python = "^0.2.0"
@ -25,7 +23,6 @@ ngrok = "^1.0.0"
 simpleaudio = "^1.0.4"
 opencv-python = "^4.9.0.80"
 psutil = "^5.9.8"
 typer = "^0.9.0"
 platformdirs = "^4.2.0"
 rich = "^13.7.1"
 pytimeparse = "^1.1.8"
@ -33,23 +30,25 @@ python-crontab = "^3.0.0"
 inquirer = "^3.2.4"
 pyqrcode = "^1.2.1"
 realtimestt = "^0.1.16"
-realtimetts = "^0.4.1"
+realtimetts = { version = "^0.4.2", extras = ["all"] }
 keyboard = "^0.13.5"
 pyautogui = "^0.9.54"
 ctranslate2 = "4.1.0"
-py3-tts = "^3.5"
+#py3-tts = "^3.5"
-elevenlabs = "1.2.2"
+#elevenlabs = "1.2.2"
 groq = "^0.5.0"
-open-interpreter = {git = "https://github.com/OpenInterpreter/open-interpreter.git", extras = ["os"]}
+open-interpreter = {git = "https://github.com/OpenInterpreter/open-interpreter.git", branch = "development", extras = ["os", "server"]}
 litellm = "*"
 openai = "*"
 pywebview = "*"
 pyobjc = "*"
 sentry-sdk = "^2.4.0"
 plyer = "^2.1.0"
 pywinctl = "^0.3"
 certifi = "^2024.7.4"
 pygame = "^2.6.0"
 mpv = "^1.0.7"
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
--- a/software/source/clients/archive_base_device.py
+++ b/software/source/clients/archive_base_device.py
@ -0,0 +1,482 @@
 from dotenv import load_dotenv
 load_dotenv()  # take environment variables from .env.
 import subprocess
 import os
 import sys
 import asyncio
 import threading
 import pyaudio
 from pynput import keyboard
 import json
 import traceback
 import websockets
 import queue
 from pydub import AudioSegment
 from pydub.playback import play
 import time
 import wave
 import tempfile
 from datetime import datetime
 import cv2
 import base64
 import platform
 from interpreter import (
    interpreter,
 )  # Just for code execution. Maybe we should let people do from interpreter.computer import run?
 # In the future, I guess kernel watching code should be elsewhere? Somewhere server / client agnostic?
 from ..server.utils.kernel import put_kernel_messages_into_queue
 from ..server.utils.get_system_info import get_system_info
 from ..server.utils.process_utils import kill_process_tree
 from ..server.utils.logs import setup_logging
 from ..server.utils.logs import logger
 setup_logging()
 os.environ["STT_RUNNER"] = "server"
 os.environ["TTS_RUNNER"] = "server"
 from ..utils.accumulator import Accumulator
 accumulator = Accumulator()
 # Configuration for Audio Recording
 CHUNK = 1024  # Record in chunks of 1024 samples
 FORMAT = pyaudio.paInt16  # 16 bits per sample
 CHANNELS = 1  # Mono
 RATE = 16000  # Sample rate
 RECORDING = False  # Flag to control recording state
 SPACEBAR_PRESSED = False  # Flag to track spacebar press state
 # Camera configuration
 CAMERA_ENABLED = os.getenv("CAMERA_ENABLED", False)
 if type(CAMERA_ENABLED) == str:
    CAMERA_ENABLED = CAMERA_ENABLED.lower() == "true"
 CAMERA_DEVICE_INDEX = int(os.getenv("CAMERA_DEVICE_INDEX", 0))
 CAMERA_WARMUP_SECONDS = float(os.getenv("CAMERA_WARMUP_SECONDS", 0))
 # Specify OS
 current_platform = get_system_info()
 def is_win11():
    return sys.getwindowsversion().build >= 22000
 def is_win10():
    try:
        return (
            platform.system() == "Windows"
            and "10" in platform.version()
            and not is_win11()
        )
    except:
        return False
 # Initialize PyAudio
 p = pyaudio.PyAudio()
 send_queue = queue.Queue()
 class Device:
    def __init__(self):
        self.pressed_keys = set()
        self.captured_images = []
        self.audiosegments = asyncio.Queue()
        self.server_url = ""
        self.ctrl_pressed = False
        self.tts_service = ""
        self.debug = False
        self.playback_latency = None
    def fetch_image_from_camera(self, camera_index=CAMERA_DEVICE_INDEX):
        """Captures an image from the specified camera device and saves it to a temporary file. Adds the image to the captured_images list."""
        image_path = None
        cap = cv2.VideoCapture(camera_index)
        ret, frame = cap.read()  # Capture a single frame to initialize the camera
        if CAMERA_WARMUP_SECONDS > 0:
            # Allow camera to warm up, then snap a picture again
            # This is a workaround for some cameras that don't return a properly exposed
            # picture immediately when they are first turned on
            time.sleep(CAMERA_WARMUP_SECONDS)
            ret, frame = cap.read()
        if ret:
            temp_dir = tempfile.gettempdir()
            image_path = os.path.join(
                temp_dir, f"01_photo_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.png"
            )
            self.captured_images.append(image_path)
            cv2.imwrite(image_path, frame)
            logger.info(f"Camera image captured to {image_path}")
            logger.info(
                f"You now have {len(self.captured_images)} images which will be sent along with your next audio message."
            )
        else:
            logger.error(
                f"Error: Couldn't capture an image from camera ({camera_index})"
            )
        cap.release()
        return image_path
    def encode_image_to_base64(self, image_path):
        """Encodes an image file to a base64 string."""
        with open(image_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode("utf-8")
    def add_image_to_send_queue(self, image_path):
        """Encodes an image and adds an LMC message to the send queue with the image data."""
        base64_image = self.encode_image_to_base64(image_path)
        image_message = {
            "role": "user",
            "type": "image",
            "format": "base64.png",
            "content": base64_image,
        }
        send_queue.put(image_message)
        # Delete the image file from the file system after sending it
        os.remove(image_path)
    def queue_all_captured_images(self):
        """Queues all captured images to be sent."""
        for image_path in self.captured_images:
            self.add_image_to_send_queue(image_path)
        self.captured_images.clear()  # Clear the list after sending
    async def play_audiosegments(self):
        """Plays them sequentially."""
        if self.tts_service == "elevenlabs":
            print("Ensure `mpv` in installed to use `elevenlabs`.\n\n(On macOSX, you can run `brew install mpv`.)")
            mpv_command = ["mpv", "--no-cache", "--no-terminal", "--", "fd://0"]
            mpv_process = subprocess.Popen(
                mpv_command,
                stdin=subprocess.PIPE,
                stdout=subprocess.DEVNULL,
                stderr=subprocess.DEVNULL,
            )
        while True:
            try:
                audio = await self.audiosegments.get()
                if self.debug and self.playback_latency and isinstance(audio, bytes):
                    elapsed_time = time.time() - self.playback_latency
                    print(f"Time from request to playback: {elapsed_time} seconds")
                    self.playback_latency = None
                if self.tts_service == "elevenlabs":
                    mpv_process.stdin.write(audio)  # type: ignore
                    mpv_process.stdin.flush()  # type: ignore
                else:
                    play(audio)
                await asyncio.sleep(0.1)
            except asyncio.exceptions.CancelledError:
                # This happens once at the start?
                pass
            except:
                logger.info(traceback.format_exc())
    def record_audio(self):
        if os.getenv("STT_RUNNER") == "server":
            # STT will happen on the server. we're sending audio.
            send_queue.put(
                {"role": "user", "type": "audio", "format": "bytes.wav", "start": True}
            )
        elif os.getenv("STT_RUNNER") == "client":
            # STT will happen here, on the client. we're sending text.
            send_queue.put({"role": "user", "type": "message", "start": True})
        else:
            raise Exception("STT_RUNNER must be set to either 'client' or 'server'.")
        """Record audio from the microphone and add it to the queue."""
        stream = p.open(
            format=FORMAT,
            channels=CHANNELS,
            rate=RATE,
            input=True,
            frames_per_buffer=CHUNK,
        )
        print("Recording started...")
        global RECORDING
        # Create a temporary WAV file to store the audio data
        temp_dir = tempfile.gettempdir()
        wav_path = os.path.join(
            temp_dir, f"audio_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav"
        )
        wav_file = wave.open(wav_path, "wb")
        wav_file.setnchannels(CHANNELS)
        wav_file.setsampwidth(p.get_sample_size(FORMAT))
        wav_file.setframerate(RATE)
        while RECORDING:
            data = stream.read(CHUNK, exception_on_overflow=False)
            wav_file.writeframes(data)
        wav_file.close()
        stream.stop_stream()
        stream.close()
        print("Recording stopped.")
        if self.debug:
            self.playback_latency = time.time()
        duration = wav_file.getnframes() / RATE
        if duration < 0.3:
            # Just pressed it. Send stop message
            if os.getenv("STT_RUNNER") == "client":
                send_queue.put({"role": "user", "type": "message", "content": "stop"})
                send_queue.put({"role": "user", "type": "message", "end": True})
            else:
                send_queue.put(
                    {
                        "role": "user",
                        "type": "audio",
                        "format": "bytes.wav",
                        "content": "",
                    }
                )
                send_queue.put(
                    {
                        "role": "user",
                        "type": "audio",
                        "format": "bytes.wav",
                        "end": True,
                    }
                )
        else:
            self.queue_all_captured_images()
            if os.getenv("STT_RUNNER") == "client":
                # THIS DOES NOT WORK. We moved to this very cool stt_service, llm_service
                # way of doing things. stt_wav is not a thing anymore. Needs work to work
                # Run stt then send text
                text = stt_wav(wav_path)
                logger.debug(f"STT result: {text}")
                send_queue.put({"role": "user", "type": "message", "content": text})
                send_queue.put({"role": "user", "type": "message", "end": True})
            else:
                # Stream audio
                with open(wav_path, "rb") as audio_file:
                    byte_data = audio_file.read(CHUNK)
                    while byte_data:
                        send_queue.put(byte_data)
                        byte_data = audio_file.read(CHUNK)
                send_queue.put(
                    {
                        "role": "user",
                        "type": "audio",
                        "format": "bytes.wav",
                        "end": True,
                    }
                )
        if os.path.exists(wav_path):
            os.remove(wav_path)
    def toggle_recording(self, state):
        """Toggle the recording state."""
        global RECORDING, SPACEBAR_PRESSED
        if state and not SPACEBAR_PRESSED:
            SPACEBAR_PRESSED = True
            if not RECORDING:
                RECORDING = True
                threading.Thread(target=self.record_audio).start()
        elif not state and SPACEBAR_PRESSED:
            SPACEBAR_PRESSED = False
            RECORDING = False
    def on_press(self, key):
        """Detect spacebar press and Ctrl+C combination."""
        self.pressed_keys.add(key)  # Add the pressed key to the set
        if keyboard.Key.space in self.pressed_keys:
            self.toggle_recording(True)
        elif {keyboard.Key.ctrl, keyboard.KeyCode.from_char("c")} <= self.pressed_keys:
            logger.info("Ctrl+C pressed. Exiting...")
            kill_process_tree()
            os._exit(0)
        # Windows alternative to the above
        if key == keyboard.Key.ctrl_l:
            self.ctrl_pressed = True
        try:
            if key.vk == 67 and self.ctrl_pressed:
                logger.info("Ctrl+C pressed. Exiting...")
                kill_process_tree()
                os._exit(0)
        # For non-character keys
        except:
            pass
    def on_release(self, key):
        """Detect spacebar release and 'c' key press for camera, and handle key release."""
        self.pressed_keys.discard(
            key
        )  # Remove the released key from the key press tracking set
        if key == keyboard.Key.ctrl_l:
            self.ctrl_pressed = False
        if key == keyboard.Key.space:
            self.toggle_recording(False)
        elif CAMERA_ENABLED and key == keyboard.KeyCode.from_char("c"):
            self.fetch_image_from_camera()
    async def message_sender(self, websocket):
        while True:
            message = await asyncio.get_event_loop().run_in_executor(
                None, send_queue.get
            )
            if isinstance(message, bytes):
                await websocket.send(message)
            else:
                await websocket.send(json.dumps(message))
            send_queue.task_done()
            await asyncio.sleep(0.01)
    async def websocket_communication(self, WS_URL):
        show_connection_log = True
        async def exec_ws_communication(websocket):
            if CAMERA_ENABLED:
                print(
                    "\nHold the spacebar to start recording. Press 'c' to capture an image from the camera. Press CTRL-C to exit."
                )
            else:
                print("\nHold the spacebar to start recording. Press CTRL-C to exit.")
            asyncio.create_task(self.message_sender(websocket))
            while True:
                await asyncio.sleep(0.01)
                chunk = await websocket.recv()
                logger.debug(f"Got this message from the server: {type(chunk)} {chunk}")
                # print("received chunk from server")
                if type(chunk) == str:
                    chunk = json.loads(chunk)
                    if chunk.get("type") == "config":
                        self.tts_service = chunk.get("tts_service")
                        continue
                if self.tts_service == "elevenlabs":
                    message = chunk
                else:
                    message = accumulator.accumulate(chunk)
                if message == None:
                    # Will be None until we have a full message ready
                    continue
                # At this point, we have our message
                if isinstance(message, bytes) or (
                    message["type"] == "audio" and message["format"].startswith("bytes")
                ):
                    # Convert bytes to audio file
                    if self.tts_service == "elevenlabs":
                        audio_bytes = message
                        audio = audio_bytes
                    else:
                        audio_bytes = message["content"]
                        # Create an AudioSegment instance with the raw data
                        audio = AudioSegment(
                            # raw audio data (bytes)
                            data=audio_bytes,
                            # signed 16-bit little-endian format
                            sample_width=2,
                            # 16,000 Hz frame rate
                            frame_rate=22050,
                            # mono sound
                            channels=1,
                        )
                    await self.audiosegments.put(audio)
                # Run the code if that's the client's job
                if os.getenv("CODE_RUNNER") == "client":
                    if message["type"] == "code" and "end" in message:
                        language = message["format"]
                        code = message["content"]
                        result = interpreter.computer.run(language, code)
                        send_queue.put(result)
        if is_win10():
            logger.info("Windows 10 detected")
            # Workaround for Windows 10 not latching to the websocket server.
            # See https://github.com/OpenInterpreter/01/issues/197
            try:
                ws = websockets.connect(WS_URL)
                await exec_ws_communication(ws)
            except Exception as e:
                logger.error(f"Error while attempting to connect: {e}")
        else:
            while True:
                try:
                    async with websockets.connect(WS_URL) as websocket:
                        await exec_ws_communication(websocket)
                except:
                    logger.debug(traceback.format_exc())
                    if show_connection_log:
                        logger.info(f"Connecting to `{WS_URL}`...")
                        show_connection_log = False
                        await asyncio.sleep(2)
    async def start_async(self):
        # Configuration for WebSocket
        WS_URL = f"ws://{self.server_url}"
        # Start the WebSocket communication
        asyncio.create_task(self.websocket_communication(WS_URL))
        # Start watching the kernel if it's your job to do that
        if os.getenv("CODE_RUNNER") == "client":
            # client is not running code!
            asyncio.create_task(put_kernel_messages_into_queue(send_queue))
        asyncio.create_task(self.play_audiosegments())
        # If Raspberry Pi, add the button listener, otherwise use the spacebar
        if current_platform.startswith("raspberry-pi"):
            logger.info("Raspberry Pi detected, using button on GPIO pin 15")
            # Use GPIO pin 15
            pindef = ["gpiochip4", "15"]  # gpiofind PIN15
            print("PINDEF", pindef)
            # HACK: needs passwordless sudo
            process = await asyncio.create_subprocess_exec(
                "sudo", "gpiomon", "-brf", *pindef, stdout=asyncio.subprocess.PIPE
            )
            while True:
                line = await process.stdout.readline()
                if line:
                    line = line.decode().strip()
                    if "FALLING" in line:
                        self.toggle_recording(False)
                    elif "RISING" in line:
                        self.toggle_recording(True)
                else:
                    break
        else:
            # Keyboard listener for spacebar press/release
            listener = keyboard.Listener(
                on_press=self.on_press, on_release=self.on_release
            )
            listener.start()
    def start(self):
        if os.getenv("TEACH_MODE") != "True":
            asyncio.run(self.start_async())
            p.terminate()
--- a/software/source/clients/base_device.py
+++ b/software/source/clients/base_device.py
@ -1,482 +1,88 @@
 from dotenv import load_dotenv
 load_dotenv()  # take environment variables from .env.
 import subprocess
 import os
 import sys
 import asyncio
-import threading
+import websockets
 import pyaudio
 from pynput import keyboard
 import json
 import traceback
 import websockets
 import queue
 from pydub import AudioSegment
 from pydub.playback import play
 import time
 import wave
 import tempfile
 from datetime import datetime
 import cv2
 import base64
 import platform
 from interpreter import (
    interpreter,
 )  # Just for code execution. Maybe we should let people do from interpreter.computer import run?
 # In the future, I guess kernel watching code should be elsewhere? Somewhere server / client agnostic?
 from ..server.utils.kernel import put_kernel_messages_into_queue
 from ..server.utils.get_system_info import get_system_info
 from ..server.utils.process_utils import kill_process_tree
 from ..server.utils.logs import setup_logging
 from ..server.utils.logs import logger
 setup_logging()
 os.environ["STT_RUNNER"] = "server"
 os.environ["TTS_RUNNER"] = "server"
 from ..utils.accumulator import Accumulator
 accumulator = Accumulator()
 # Configuration for Audio Recording
 CHUNK = 1024  # Record in chunks of 1024 samples
 FORMAT = pyaudio.paInt16  # 16 bits per sample
 CHANNELS = 1  # Mono
 RATE = 16000  # Sample rate
 RECORDING = False  # Flag to control recording state
 SPACEBAR_PRESSED = False  # Flag to track spacebar press state
 # Camera configuration
 CAMERA_ENABLED = os.getenv("CAMERA_ENABLED", False)
 if type(CAMERA_ENABLED) == str:
    CAMERA_ENABLED = CAMERA_ENABLED.lower() == "true"
 CAMERA_DEVICE_INDEX = int(os.getenv("CAMERA_DEVICE_INDEX", 0))
 CAMERA_WARMUP_SECONDS = float(os.getenv("CAMERA_WARMUP_SECONDS", 0))
 # Specify OS
 current_platform = get_system_info()
 def is_win11():
    return sys.getwindowsversion().build >= 22000
 def is_win10():
    try:
        return (
            platform.system() == "Windows"
            and "10" in platform.version()
            and not is_win11()
        )
    except:
        return False
 # Initialize PyAudio
 p = pyaudio.PyAudio()
 send_queue = queue.Queue()
 CHUNK = 1024
 FORMAT = pyaudio.paInt16
 CHANNELS = 1
 RECORDING_RATE = 16000
 PLAYBACK_RATE = 24000
 class Device:
    def __init__(self):
-        self.pressed_keys = set()
+        self.server_url = "0.0.0.0:10001"
-        self.captured_images = []
+        self.p = pyaudio.PyAudio()
-        self.audiosegments = asyncio.Queue()
+        self.websocket = None
-        self.server_url = ""
+        self.recording = False
-        self.ctrl_pressed = False
+        self.input_stream = None
-        self.tts_service = ""
+        self.output_stream = None
-        self.debug = False
+
-        self.playback_latency = None
+    async def connect_with_retry(self, max_retries=50, retry_delay=2):
-
+        for attempt in range(max_retries):
    def fetch_image_from_camera(self, camera_index=CAMERA_DEVICE_INDEX):
        """Captures an image from the specified camera device and saves it to a temporary file. Adds the image to the captured_images list."""
        image_path = None
        cap = cv2.VideoCapture(camera_index)
        ret, frame = cap.read()  # Capture a single frame to initialize the camera
        if CAMERA_WARMUP_SECONDS > 0:
            # Allow camera to warm up, then snap a picture again
            # This is a workaround for some cameras that don't return a properly exposed
            # picture immediately when they are first turned on
            time.sleep(CAMERA_WARMUP_SECONDS)
            ret, frame = cap.read()
        if ret:
            temp_dir = tempfile.gettempdir()
            image_path = os.path.join(
                temp_dir, f"01_photo_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.png"
            )
            self.captured_images.append(image_path)
            cv2.imwrite(image_path, frame)
            logger.info(f"Camera image captured to {image_path}")
            logger.info(
                f"You now have {len(self.captured_images)} images which will be sent along with your next audio message."
            )
        else:
            logger.error(
                f"Error: Couldn't capture an image from camera ({camera_index})"
            )
        cap.release()
        return image_path
    def encode_image_to_base64(self, image_path):
        """Encodes an image file to a base64 string."""
        with open(image_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode("utf-8")
    def add_image_to_send_queue(self, image_path):
        """Encodes an image and adds an LMC message to the send queue with the image data."""
        base64_image = self.encode_image_to_base64(image_path)
        image_message = {
            "role": "user",
            "type": "image",
            "format": "base64.png",
            "content": base64_image,
        }
        send_queue.put(image_message)
        # Delete the image file from the file system after sending it
        os.remove(image_path)
    def queue_all_captured_images(self):
        """Queues all captured images to be sent."""
        for image_path in self.captured_images:
            self.add_image_to_send_queue(image_path)
        self.captured_images.clear()  # Clear the list after sending
    async def play_audiosegments(self):
        """Plays them sequentially."""
        if self.tts_service == "elevenlabs":
            print("Ensure `mpv` in installed to use `elevenlabs`.\n\n(On macOSX, you can run `brew install mpv`.)")
            mpv_command = ["mpv", "--no-cache", "--no-terminal", "--", "fd://0"]
            mpv_process = subprocess.Popen(
                mpv_command,
                stdin=subprocess.PIPE,
                stdout=subprocess.DEVNULL,
                stderr=subprocess.DEVNULL,
            )
        while True:
            try:
-                audio = await self.audiosegments.get()
+                self.websocket = await websockets.connect(f"ws://{self.server_url}")
-                if self.debug and self.playback_latency and isinstance(audio, bytes):
+                print("Connected to server.")
-                    elapsed_time = time.time() - self.playback_latency
+                return
-                    print(f"Time from request to playback: {elapsed_time} seconds")
+            except ConnectionRefusedError:
-                    self.playback_latency = None
+                print(f"Waiting for the server to be ready. Retrying in {retry_delay} seconds...")
-
+                await asyncio.sleep(retry_delay)
-                if self.tts_service == "elevenlabs":
+        raise Exception("Failed to connect to the server after multiple attempts")
-                    mpv_process.stdin.write(audio)  # type: ignore
+
-                    mpv_process.stdin.flush()  # type: ignore
+    async def send_audio(self):
-                else:
+        self.input_stream = self.p.open(format=FORMAT, channels=CHANNELS, rate=RECORDING_RATE, input=True, frames_per_buffer=CHUNK)
-                    play(audio)
+        while True:
-
+            if self.recording:
                await asyncio.sleep(0.1)
            except asyncio.exceptions.CancelledError:
                # This happens once at the start?
                pass
            except:
                logger.info(traceback.format_exc())
    def record_audio(self):
        if os.getenv("STT_RUNNER") == "server":
            # STT will happen on the server. we're sending audio.
            send_queue.put(
                {"role": "user", "type": "audio", "format": "bytes.wav", "start": True}
            )
        elif os.getenv("STT_RUNNER") == "client":
            # STT will happen here, on the client. we're sending text.
            send_queue.put({"role": "user", "type": "message", "start": True})
        else:
            raise Exception("STT_RUNNER must be set to either 'client' or 'server'.")
        """Record audio from the microphone and add it to the queue."""
        stream = p.open(
            format=FORMAT,
            channels=CHANNELS,
            rate=RATE,
            input=True,
            frames_per_buffer=CHUNK,
        )
        print("Recording started...")
        global RECORDING
        # Create a temporary WAV file to store the audio data
        temp_dir = tempfile.gettempdir()
        wav_path = os.path.join(
            temp_dir, f"audio_{datetime.now().strftime('%Y%m%d%H%M%S%f')}.wav"
        )
        wav_file = wave.open(wav_path, "wb")
        wav_file.setnchannels(CHANNELS)
        wav_file.setsampwidth(p.get_sample_size(FORMAT))
        wav_file.setframerate(RATE)
        while RECORDING:
            data = stream.read(CHUNK, exception_on_overflow=False)
            wav_file.writeframes(data)
        wav_file.close()
        stream.stop_stream()
        stream.close()
        print("Recording stopped.")
        if self.debug:
            self.playback_latency = time.time()
        duration = wav_file.getnframes() / RATE
        if duration < 0.3:
            # Just pressed it. Send stop message
            if os.getenv("STT_RUNNER") == "client":
                send_queue.put({"role": "user", "type": "message", "content": "stop"})
                send_queue.put({"role": "user", "type": "message", "end": True})
            else:
                send_queue.put(
                    {
                        "role": "user",
                        "type": "audio",
                        "format": "bytes.wav",
                        "content": "",
                    }
                )
                send_queue.put(
                    {
                        "role": "user",
                        "type": "audio",
                        "format": "bytes.wav",
                        "end": True,
                    }
                )
        else:
            self.queue_all_captured_images()
            if os.getenv("STT_RUNNER") == "client":
                # THIS DOES NOT WORK. We moved to this very cool stt_service, llm_service
                # way of doing things. stt_wav is not a thing anymore. Needs work to work
                # Run stt then send text
                text = stt_wav(wav_path)
                logger.debug(f"STT result: {text}")
                send_queue.put({"role": "user", "type": "message", "content": text})
                send_queue.put({"role": "user", "type": "message", "end": True})
            else:
                # Stream audio
                with open(wav_path, "rb") as audio_file:
                    byte_data = audio_file.read(CHUNK)
                    while byte_data:
                        send_queue.put(byte_data)
                        byte_data = audio_file.read(CHUNK)
                send_queue.put(
                    {
                        "role": "user",
                        "type": "audio",
                        "format": "bytes.wav",
                        "end": True,
                    }
                )
        if os.path.exists(wav_path):
            os.remove(wav_path)
    def toggle_recording(self, state):
        """Toggle the recording state."""
        global RECORDING, SPACEBAR_PRESSED
        if state and not SPACEBAR_PRESSED:
            SPACEBAR_PRESSED = True
            if not RECORDING:
                RECORDING = True
                threading.Thread(target=self.record_audio).start()
        elif not state and SPACEBAR_PRESSED:
            SPACEBAR_PRESSED = False
            RECORDING = False
    def on_press(self, key):
        """Detect spacebar press and Ctrl+C combination."""
        self.pressed_keys.add(key)  # Add the pressed key to the set
        if keyboard.Key.space in self.pressed_keys:
            self.toggle_recording(True)
        elif {keyboard.Key.ctrl, keyboard.KeyCode.from_char("c")} <= self.pressed_keys:
            logger.info("Ctrl+C pressed. Exiting...")
            kill_process_tree()
            os._exit(0)
        # Windows alternative to the above
        if key == keyboard.Key.ctrl_l:
            self.ctrl_pressed = True
                try:
-            if key.vk == 67 and self.ctrl_pressed:
+                    # Send start flag
-                logger.info("Ctrl+C pressed. Exiting...")
+                    await self.websocket.send(json.dumps({"role": "user", "type": "audio", "format": "bytes.wav", "start": True}))
-                kill_process_tree()
+                    print("Sending audio start message")
                os._exit(0)
        # For non-character keys
        except:
            pass
-    def on_release(self, key):
+                    while self.recording:
-        """Detect spacebar release and 'c' key press for camera, and handle key release."""
+                        data = self.input_stream.read(CHUNK, exception_on_overflow=False)
-        self.pressed_keys.discard(
+                        await self.websocket.send(data)
            key
        )  # Remove the released key from the key press tracking set
-        if key == keyboard.Key.ctrl_l:
+                    # Send stop flag
-            self.ctrl_pressed = False
+                    await self.websocket.send(json.dumps({"role": "user", "type": "audio", "format": "bytes.wav", "end": True}))
-        if key == keyboard.Key.space:
+                    print("Sending audio end message")
-            self.toggle_recording(False)
+                except Exception as e:
-        elif CAMERA_ENABLED and key == keyboard.KeyCode.from_char("c"):
+                    print(f"Error in send_audio: {e}")
            self.fetch_image_from_camera()
    async def message_sender(self, websocket):
        while True:
            message = await asyncio.get_event_loop().run_in_executor(
                None, send_queue.get
            )
            if isinstance(message, bytes):
                await websocket.send(message)
            else:
                await websocket.send(json.dumps(message))
            send_queue.task_done()
            await asyncio.sleep(0.01)
-    async def websocket_communication(self, WS_URL):
+    async def receive_audio(self):
-        show_connection_log = True
+        self.output_stream = self.p.open(format=FORMAT, channels=CHANNELS, rate=PLAYBACK_RATE, output=True, frames_per_buffer=CHUNK)
        async def exec_ws_communication(websocket):
            if CAMERA_ENABLED:
                print(
                    "\nHold the spacebar to start recording. Press 'c' to capture an image from the camera. Press CTRL-C to exit."
                )
            else:
                print("\nHold the spacebar to start recording. Press CTRL-C to exit.")
            asyncio.create_task(self.message_sender(websocket))
        while True:
                await asyncio.sleep(0.01)
                chunk = await websocket.recv()
                logger.debug(f"Got this message from the server: {type(chunk)} {chunk}")
                # print("received chunk from server")
                if type(chunk) == str:
                    chunk = json.loads(chunk)
                    if chunk.get("type") == "config":
                        self.tts_service = chunk.get("tts_service")
                        continue
                if self.tts_service == "elevenlabs":
                    message = chunk
                else:
                    message = accumulator.accumulate(chunk)
                if message == None:
                    # Will be None until we have a full message ready
                    continue
                # At this point, we have our message
                if isinstance(message, bytes) or (
                    message["type"] == "audio" and message["format"].startswith("bytes")
                ):
                    # Convert bytes to audio file
                    if self.tts_service == "elevenlabs":
                        audio_bytes = message
                        audio = audio_bytes
                    else:
                        audio_bytes = message["content"]
                        # Create an AudioSegment instance with the raw data
                        audio = AudioSegment(
                            # raw audio data (bytes)
                            data=audio_bytes,
                            # signed 16-bit little-endian format
                            sample_width=2,
                            # 16,000 Hz frame rate
                            frame_rate=22050,
                            # mono sound
                            channels=1,
                        )
                    await self.audiosegments.put(audio)
                # Run the code if that's the client's job
                if os.getenv("CODE_RUNNER") == "client":
                    if message["type"] == "code" and "end" in message:
                        language = message["format"]
                        code = message["content"]
                        result = interpreter.computer.run(language, code)
                        send_queue.put(result)
        if is_win10():
            logger.info("Windows 10 detected")
            # Workaround for Windows 10 not latching to the websocket server.
            # See https://github.com/OpenInterpreter/01/issues/197
            try:
-                ws = websockets.connect(WS_URL)
+                data = await self.websocket.recv()
-                await exec_ws_communication(ws)
+                if isinstance(data, bytes) and not self.recording:
                    self.output_stream.write(data)
            except Exception as e:
-                logger.error(f"Error while attempting to connect: {e}")
+                print(f"Error in receive_audio: {e}")
        else:
            while True:
                try:
                    async with websockets.connect(WS_URL) as websocket:
                        await exec_ws_communication(websocket)
                except:
                    logger.debug(traceback.format_exc())
                    if show_connection_log:
                        logger.info(f"Connecting to `{WS_URL}`...")
                        show_connection_log = False
                        await asyncio.sleep(2)
    async def start_async(self):
        # Configuration for WebSocket
        WS_URL = f"ws://{self.server_url}"
        # Start the WebSocket communication
        asyncio.create_task(self.websocket_communication(WS_URL))
-        # Start watching the kernel if it's your job to do that
+    def on_press(self, key):
-        if os.getenv("CODE_RUNNER") == "client":
+        if key == keyboard.Key.space and not self.recording:
-            # client is not running code!
+            print("Space pressed, starting recording")
-            asyncio.create_task(put_kernel_messages_into_queue(send_queue))
+            self.recording = True
        asyncio.create_task(self.play_audiosegments())
-        # If Raspberry Pi, add the button listener, otherwise use the spacebar
+    def on_release(self, key):
-        if current_platform.startswith("raspberry-pi"):
+        if key == keyboard.Key.space:
-            logger.info("Raspberry Pi detected, using button on GPIO pin 15")
+            print("Space released, stopping recording")
-            # Use GPIO pin 15
+            self.recording = False
-            pindef = ["gpiochip4", "15"]  # gpiofind PIN15
+        elif key == keyboard.Key.esc:
-            print("PINDEF", pindef)
+            print("Esc pressed, stopping the program")
            return False
-            # HACK: needs passwordless sudo
+    async def main(self):
-            process = await asyncio.create_subprocess_exec(
+        await self.connect_with_retry()
-                "sudo", "gpiomon", "-brf", *pindef, stdout=asyncio.subprocess.PIPE
+        print("Hold spacebar to record. Press 'Esc' to quit.")
-            )
+        listener = keyboard.Listener(on_press=self.on_press, on_release=self.on_release)
            while True:
                line = await process.stdout.readline()
                if line:
                    line = line.decode().strip()
                    if "FALLING" in line:
                        self.toggle_recording(False)
                    elif "RISING" in line:
                        self.toggle_recording(True)
                else:
                    break
        else:
            # Keyboard listener for spacebar press/release
            listener = keyboard.Listener(
                on_press=self.on_press, on_release=self.on_release
            )
        listener.start()
        await asyncio.gather(self.send_audio(), self.receive_audio())
    def start(self):
-        if os.getenv("TEACH_MODE") != "True":
+        asyncio.run(self.main())
-            asyncio.run(self.start_async())
+
-            p.terminate()
+if __name__ == "__main__":
    device = Device()
    device.start()
--- a/software/source/server/archive_async_interpreter.py
+++ b/software/source/server/archive_async_interpreter.py
@ -178,6 +178,7 @@ class AsyncInterpreter:
        """
        self.interpreter.messages = self.active_chat_messages
        self.stt.stop()
        input_queue = []
--- a/software/source/server/archive_async_server.py
+++ b/software/source/server/archive_async_server.py
@ -0,0 +1,124 @@
 import asyncio
 import traceback
 import json
 from fastapi import FastAPI, WebSocket, Depends
 from fastapi.responses import PlainTextResponse
 from uvicorn import Config, Server
 from .async_interpreter import AsyncInterpreter
 from fastapi.middleware.cors import CORSMiddleware
 from typing import List, Dict, Any
 import os
 import importlib.util
 os.environ["STT_RUNNER"] = "server"
 os.environ["TTS_RUNNER"] = "server"
 app = FastAPI()
 app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],  # Allow all methods (GET, POST, etc.)
    allow_headers=["*"],  # Allow all headers
 )
 async def get_debug_flag():
    return app.state.debug
@app.get("/ping")
 async def ping():
    return PlainTextResponse("pong")
@app.websocket("/")
 async def websocket_endpoint(
    websocket: WebSocket, debug: bool = Depends(get_debug_flag)
 ):
    await websocket.accept()
    global global_interpreter
    interpreter = global_interpreter
    # Send the tts_service value to the client
    await websocket.send_text(
        json.dumps({"type": "config", "tts_service": interpreter.interpreter.tts})
    )
    try:
        async def receive_input():
            while True:
                if websocket.client_state == "DISCONNECTED":
                    break
                data = await websocket.receive()
                await asyncio.sleep(0)
                if isinstance(data, bytes):
                    await interpreter.input(data)
                elif "bytes" in data:
                    await interpreter.input(data["bytes"])
                    # print("RECEIVED INPUT", data)
                elif "text" in data:
                    # print("RECEIVED INPUT", data)
                    await interpreter.input(data["text"])
        async def send_output():
            while True:
                output = await interpreter.output()
                await asyncio.sleep(0)
                if isinstance(output, bytes):
                    # print(f"Sending {len(output)} bytes of audio data.")
                    await websocket.send_bytes(output)
                elif isinstance(output, dict):
                    # print("sending text")
                    await websocket.send_text(json.dumps(output))
        await asyncio.gather(send_output(), receive_input())
    except Exception as e:
        print(f"WebSocket connection closed with exception: {e}")
        traceback.print_exc()
    finally:
        if not websocket.client_state == "DISCONNECTED":
            await websocket.close()
 async def main(server_host, server_port, profile, debug):
    app.state.debug = debug
    # Load the profile module from the provided path
    spec = importlib.util.spec_from_file_location("profile", profile)
    profile_module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(profile_module)
    # Get the interpreter from the profile
    interpreter = profile_module.interpreter
    if not hasattr(interpreter, 'tts'):
        print("Setting TTS provider to default: openai")
        interpreter.tts = "openai"
    # Make it async
    interpreter = AsyncInterpreter(interpreter, debug)
    global global_interpreter
    global_interpreter = interpreter
    print(f"Starting server on {server_host}:{server_port}")
    config = Config(app, host=server_host, port=server_port, lifespan="on")
    server = Server(config)
    await server.serve()
 if __name__ == "__main__":
    asyncio.run(main())
--- a/software/source/server/async_server.py
+++ b/software/source/server/async_server.py
@ -1,124 +1,125 @@
-import asyncio
+import importlib
 import traceback
 import json
 from fastapi import FastAPI, WebSocket, Depends
 from fastapi.responses import PlainTextResponse
 from uvicorn import Config, Server
 from .async_interpreter import AsyncInterpreter
 from fastapi.middleware.cors import CORSMiddleware
 from typing import List, Dict, Any
 import os
-import importlib.util
+from RealtimeTTS import TextToAudioStream, CoquiEngine, OpenAIEngine, ElevenlabsEngine
-
+from RealtimeSTT import AudioToTextRecorder
 import types
 import time
 import wave
 import asyncio
 from fastapi.responses import PlainTextResponse
 def start_server(server_host, server_port, profile, debug):
-os.environ["STT_RUNNER"] = "server"
+    # Load the profile module from the provided path
-os.environ["TTS_RUNNER"] = "server"
+    spec = importlib.util.spec_from_file_location("profile", profile)
    profile_module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(profile_module)
-app = FastAPI()
+    # Get the interpreter from the profile
    interpreter = profile_module.interpreter
-app.add_middleware(
+    # STT
-    CORSMiddleware,
+    interpreter.stt = AudioToTextRecorder(
-    allow_origins=["*"],
+        model="tiny.en", spinner=False, use_microphone=False
    allow_credentials=True,
    allow_methods=["*"],  # Allow all methods (GET, POST, etc.)
    allow_headers=["*"],  # Allow all headers
    )
    interpreter.stt.stop()  # It needs this for some reason
-async def get_debug_flag():
+    # TTS
-    return app.state.debug
+    if not hasattr(interpreter, 'tts'):
-
+        print("Setting TTS provider to default: openai")
-
+        interpreter.tts = "openai"
-@app.get("/ping")
+    if interpreter.tts == "coqui":
-async def ping():
+        engine = CoquiEngine()
-    return PlainTextResponse("pong")
+    elif interpreter.tts == "openai":
-
+        engine = OpenAIEngine(voice="onyx")
    elif interpreter.tts == "elevenlabs":
        engine = ElevenlabsEngine(api_key=os.environ["ELEVEN_LABS_API_KEY"])
        engine.set_voice("Michael")
    else:
        raise ValueError(f"Unsupported TTS engine: {interpreter.interpreter.tts}")
    interpreter.tts = TextToAudioStream(engine)
-@app.websocket("/")
+    # Misc Settings
-async def websocket_endpoint(
+    interpreter.verbose = debug
-    websocket: WebSocket, debug: bool = Depends(get_debug_flag)
+    interpreter.server.host = server_host
-):
+    interpreter.server.port = server_port
    await websocket.accept()
    global global_interpreter
    interpreter = global_interpreter
-    # Send the tts_service value to the client
+    interpreter.audio_chunks = []
    await websocket.send_text(
        json.dumps({"type": "config", "tts_service": interpreter.interpreter.tts})
    )
    try:
-        async def receive_input():
+    old_input = interpreter.input
-            while True:
+    old_output = interpreter.output
                if websocket.client_state == "DISCONNECTED":
                    break
                data = await websocket.receive()
    async def new_input(self, chunk):
        await asyncio.sleep(0)
-
+        if isinstance(chunk, bytes):
-                if isinstance(data, bytes):
+            self.stt.feed_audio(chunk)
-                    await interpreter.input(data)
+            self.audio_chunks.append(chunk)
-                elif "bytes" in data:
+        elif isinstance(chunk, dict):
-                    await interpreter.input(data["bytes"])
+            if "start" in chunk:
-                    # print("RECEIVED INPUT", data)
+                self.stt.start()
-                elif "text" in data:
+                self.audio_chunks = []
-                    # print("RECEIVED INPUT", data)
+                await old_input({"role": "user", "type": "message", "start": True})
-                    await interpreter.input(data["text"])
+            if "end" in chunk:
-
+                self.stt.stop()
-        async def send_output():
+                content = self.stt.text()
                print("User: ", content)
                if False:
                    audio_bytes = bytearray(b"".join(self.audio_chunks))
                    with wave.open('audio.wav', 'wb') as wav_file:
                        wav_file.setnchannels(1)
                        wav_file.setsampwidth(2)  # Assuming 16-bit audio
                        wav_file.setframerate(16000)  # Assuming 16kHz sample rate
                        wav_file.writeframes(audio_bytes)
                    print(os.path.abspath('audio.wav'))
                await old_input({"role": "user", "type": "message", "content": content})
                await old_input({"role": "user", "type": "message", "end": True})
    async def new_output(self):
        while True:
-                output = await interpreter.output()
+            output = await old_output()
-
+            # if output == {"role": "assistant", "type": "message", "start": True}:
-                await asyncio.sleep(0)
+            #     return {"role": "assistant", "type": "audio", "format": "bytes.wav", "start": True}
            if isinstance(output, bytes):
-                    # print(f"Sending {len(output)} bytes of audio data.")
+                return output
                    await websocket.send_bytes(output)
                elif isinstance(output, dict):
                    # print("sending text")
                    await websocket.send_text(json.dumps(output))
        await asyncio.gather(send_output(), receive_input())
    except Exception as e:
        print(f"WebSocket connection closed with exception: {e}")
        traceback.print_exc()
    finally:
        if not websocket.client_state == "DISCONNECTED":
            await websocket.close()
            await asyncio.sleep(0)
-async def main(server_host, server_port, profile, debug):
+            delimiters = ".?!;,\n…)]}"
    app.state.debug = debug
    # Load the profile module from the provided path
    spec = importlib.util.spec_from_file_location("profile", profile)
    profile_module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(profile_module)
    # Get the interpreter from the profile
    interpreter = profile_module.interpreter
-    if not hasattr(interpreter, 'tts'):
+            if output["type"] == "message" and len(output.get("content", "")) > 0:
-        print("Setting TTS provider to default: openai")
+                self.tts.feed(output.get("content"))
-        interpreter.tts = "openai"
+                if not self.tts.is_playing() and any([c in delimiters for c in output.get("content")]): # Start playing once the first delimiter is encountered.
                    self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=True, sentence_fragment_delimiters=delimiters)
                    return {"role": "assistant", "type": "audio", "format": "bytes.wav", "start": True}
-    # Make it async
+            if output == {"role": "assistant", "type": "message", "end": True}:
-    interpreter = AsyncInterpreter(interpreter, debug)
+                if not self.tts.is_playing(): # We put this here in case it never outputs a delimiter and never triggers play_async^
                    self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=True, sentence_fragment_delimiters=delimiters)
                    return {"role": "assistant", "type": "audio", "format": "bytes.wav", "start": True}
                return {"role": "assistant", "type": "audio", "format": "bytes.wav", "end": True}
-    global global_interpreter
+    def on_tts_chunk(self, chunk):
-    global_interpreter = interpreter
+        self.output_queue.sync_q.put(chunk)
-    print(f"Starting server on {server_host}:{server_port}")
+    # Wrap in voice interface
-    config = Config(app, host=server_host, port=server_port, lifespan="on")
+    interpreter.input = types.MethodType(new_input, interpreter)
-    server = Server(config)
+    interpreter.output = types.MethodType(new_output, interpreter)
-    await server.serve()
+    interpreter.on_tts_chunk = types.MethodType(on_tts_chunk, interpreter)
    @interpreter.server.app.get("/ping")
    async def ping():
        return PlainTextResponse("pong")
-if __name__ == "__main__":
+    # Start server
-    asyncio.run(main())
+    interpreter.server.run()
--- a/software/source/server/profiles/default.py
+++ b/software/source/server/profiles/default.py
@ -1,4 +1,5 @@
-from interpreter import interpreter
+from interpreter import AsyncInterpreter
 interpreter = AsyncInterpreter()
 # This is an Open Interpreter compatible profile.
 # Visit https://01.openinterpreter.com/profile for all options.
--- a/software/source/server/profiles/fast.py
+++ b/software/source/server/profiles/fast.py
@ -1,4 +1,5 @@
-from interpreter import interpreter
+from interpreter import AsyncInterpreter
 interpreter = AsyncInterpreter()
 # This is an Open Interpreter compatible profile.
 # Visit https://01.openinterpreter.com/profile for all options.
@ -7,14 +8,12 @@ from interpreter import interpreter
 # {OpenAI: "openai", ElevenLabs: "elevenlabs", Coqui: "coqui"}
 interpreter.tts = "elevenlabs"
-# 01 Language Model Config.
+interpreter.llm.model = "groq/llama3-70b-8192"
 interpreter.llm_service = "litellm"
 interpreter.llm.model = "groq/llama3-8b-8192"
 interpreter.llm.supports_vision = False
 interpreter.llm.supports_functions = False
-interpreter.llm.context_window = 2048
+interpreter.llm.context_window = 8000
-interpreter.llm.max_tokens = 4096
+interpreter.llm.max_tokens = 1000
-interpreter.llm.temperature = 0.8
+interpreter.llm.temperature = 0
 interpreter.computer.import_computer_api = False
--- a/software/source/server/profiles/local.py
+++ b/software/source/server/profiles/local.py
@ -1,12 +1,10 @@
-from interpreter import interpreter
+from interpreter import AsyncInterpreter
 interpreter = AsyncInterpreter()
 # 01 supports OpenAI, ElevenLabs, and Coqui (Local) TTS providers
 # {OpenAI: "openai", ElevenLabs: "elevenlabs", Coqui: "coqui"}
 interpreter.tts = "coqui"
 # Local setup
 interpreter.local_setup()
 interpreter.system_message = """You are an AI assistant that writes markdown code snippets to answer the user's request. You speak very concisely and quickly, you say nothing irrelevant to the user's request. For example:
 User: Open the chrome app.
@ -17,22 +15,57 @@ webbrowser.open('https://chrome.google.com')
 ```
 User: The code you ran produced no output. Was this expected, or are we finished?
 Assistant: No further action is required; the provided snippet opens Chrome.
 User: How large are all the files on my desktop combined?
 Assistant: I will sum up the file sizes of every file on your desktop.
 ```python
 import os
 import string
 from pathlib import Path
 # Get the user's home directory in a cross-platform way
 home_dir = Path.home()
 # Define the path to the desktop
 desktop_dir = home_dir / 'Desktop'
 # Initialize a variable to store the total size
 total_size = 0
-Now, your turn:"""
+# Loop through all files on the desktop
 for file in desktop_dir.iterdir():
    # Add the file size to the total
    total_size += file.stat().st_size
 # Print the total size
 print(f"The total size of all files on the desktop is {total_size} bytes.")
 ```
 User: I executed that code. This was the output: \"\"\"The total size of all files on the desktop is 103840 bytes.\"\"\"\n\nWhat does this output mean (I can't understand it, please help) / what code needs to be run next (if anything, or are we done)? I can't replace any placeholders.
 Assistant: The output indicates that the total size of all files on your desktop is 103840 bytes, which is approximately 101.4 KB or 0.1 MB. We are finished.
 NEVER use placeholders, NEVER say "path/to/desktop", NEVER say "path/to/file". Always specify exact paths, and use cross-platform ways of determining the desktop, documents, cwd, etc. folders.
 Now, your turn:""".strip()
 # Message templates
 interpreter.code_output_template = '''I executed that code. This was the output: """{content}"""\n\nWhat does this output mean (I can't understand it, please help) / what code needs to be run next (if anything, or are we done)? I can't replace any placeholders.'''
 interpreter.empty_code_output_template = "The code above was executed on my machine. It produced no text output. What's next (if anything, or are we done?)"
 interpreter.code_output_sender = "user"
 # LLM settings
 interpreter.llm.model = "ollama/codestral"
 interpreter.llm.supports_functions = False
 interpreter.llm.execution_instructions = False
 interpreter.llm.load()
 # Computer settings
 interpreter.computer.import_computer_api = False
 # Misc settings
-interpreter.auto_run = False
+interpreter.auto_run = True
 interpreter.offline = True
 interpreter.max_output = 600
 # Final message
 interpreter.display_message(
-    f"> Model set to `{interpreter.llm.model}`\n\n**Open Interpreter** will require approval before running code.\n\nUse `interpreter -y` to bypass this.\n\nPress `CTRL-C` to exit.\n"
+    "> Local model set to `Codestral`, Local TTS set to `Coqui`.\n"
 )
--- a/software/start.py
+++ b/software/start.py
@ -5,7 +5,7 @@ import threading
 import os
 import importlib
 from source.server.tunnel import create_tunnel
-from source.server.async_server import main
+from source.server.async_server import start_server
 import subprocess
 import signal
@ -134,18 +134,14 @@ def _run(
    signal.signal(signal.SIGINT, handle_exit)
    if server:
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
        server_thread = threading.Thread(
-            target=loop.run_until_complete,
+            target=start_server,
            args=(
                main(
                server_host,
                server_port,
                profile,
                debug,
            ),
            ),
        )
        server_thread.start()