Merge 8c89960299 into befddaf205

11 months ago · 8d1d5af1aa
parent befddaf205 8c89960299
commit 8d1d5af1aa
59 changed files with 9690 additions and 5286 deletions
--- a/docs/server/configure.mdx
+++ b/docs/server/configure.mdx
@ -23,11 +23,11 @@ poetry run 01 --profile <profile_name>

 ### Standard Profiles

-`default.py` is the default profile that is used when no profile is specified. The default TTS is OpenAI.
+`default.py` is the default profile that is used when no profile is specified. The default TTS service is Elevenlabs.

-`fast.py` uses elevenlabs and groq, which are the fastest providers.
+`fast.py` uses Cartesia for TTS and Cerebras Llama3.1-8b, which are the fastest providers.

-`local.py` uses coqui TTS and runs the --local explorer from Open Interpreter.
+`local.py` requires additional setup to be used with LiveKit. Uses faster-whisper for STT, ollama/codestral for LLM (default), and piper for TTS (default).

 ### Custom Profiles

@ -46,38 +46,16 @@ poetry run 01 --profile <profile_name>
 ### Example Profile

 ````python
-from interpreter import AsyncInterpreter
-interpreter = AsyncInterpreter()
+from interpreter import Interpreter
+interpreter = Interpreter()

 # This is an Open Interpreter compatible profile.
 # Visit https://01.openinterpreter.com/profile for all options.

-# 01 supports OpenAI, ElevenLabs, and Coqui (Local) TTS providers
+# 01 supports OpenAI, ElevenLabs, Cartesia, and Coqui (Local) TTS providers
 # {OpenAI: "openai", ElevenLabs: "elevenlabs", Coqui: "coqui"}
-interpreter.tts = "openai"
-
-# Connect your 01 to a language model
-interpreter.llm.model = "gpt-4o"
-interpreter.llm.context_window = 100000
-interpreter.llm.max_tokens = 4096
-# interpreter.llm.api_key = "<your_openai_api_key_here>"
-
-# Tell your 01 where to find and save skills
-interpreter.computer.skills.path = "./skills"
-
-# Extra settings
-interpreter.computer.import_computer_api = True
-interpreter.computer.import_skills = True
-interpreter.computer.run("python", "computer")  # This will trigger those imports
-interpreter.auto_run = True
-interpreter.loop = True
-interpreter.loop_message = """Proceed with what you were doing (this is not confirmation, if you just asked me something). You CAN run code on my machine. If you want to run code, start your message with "```"! If the entire task is done, say exactly 'The task is done.' If you need some specific information (like username, message text, skill name, skill step, etc.) say EXACTLY 'Please provide more information.' If it's impossible, say 'The task is impossible.' (If I haven't provided a task, say exactly 'Let me know what you'd like to do next.') Otherwise keep going. CRITICAL: REMEMBER TO FOLLOW ALL PREVIOUS INSTRUCTIONS. If I'm teaching you something, remember to run the related `computer.skills.new_skill` function."""
-interpreter.loop_breakers = [
-    "The task is done.",
-    "The task is impossible.",
-    "Let me know what you'd like to do next.",
-    "Please provide more information.",
-]
+interpreter.tts = "elevenlabs"
+interpreter.stt = "deepgram"

 # Set the identity and personality of your 01
 interpreter.system_message = """
@ -89,17 +67,37 @@ You can install new packages.
 Be concise. Your messages are being read aloud to the user. DO NOT MAKE PLANS. RUN CODE QUICKLY.
 Try to spread complex tasks over multiple code blocks. Don't try to complex tasks in one go.
 Manually summarize text."""
+
+# Add additional instructions for the 01 
+interpreter.instructions = "Be very concise in your responses."
+
+
+# Connect your 01 to a language model
+interpreter.model = "claude-3-5-sonnet-20240620"
+interpreter.provider = "anthropic"
+interpreter.max_tokens = 4096
+interpreter.temperature = 0
+interpreter.api_key = "<your_anthropic_api_key_here>"
+
+# Extra settings
+interpreter.tools = ["interpreter", "editor"]  # Enabled tool modules
+interpreter.auto_run = True  # Whether to auto-run tools without confirmation
+interpreter.tool_calling = True  # Whether to allow tool/function calling
+
+interpreter.allowed_paths = []  # List of allowed paths
+interpreter.allowed_commands = []  # List of allowed commands
 ````

 ### Hosted LLMs

-The default LLM for 01 is GPT-4-Turbo. You can find this in the default profile in `software/source/server/profiles/default.py`.
+The default LLM for 01 is Claude 3.5 Sonnet. You can find this in the default profile in `software/source/server/profiles/default.py`.

-The fast profile uses Llama3-8b served by Groq. You can find this in the fast profile in `software/source/server/profiles/fast.py`.
+The fast profile uses Llama3.1-8b served by Cerebras. You can find this in the fast profile in `software/source/server/profiles/fast.py`.

 ```python
 # Set your profile with a hosted LLM
-interpreter.llm.model = "gpt-4o"
+interpreter.model = "claude-3-5-sonnet-20240620"
+interpreter.provider = "anthropic"
 ```

 ### Local LLMs
@ -110,7 +108,7 @@ Using the local profile launches the Local Explorer where you can select your in

 ```python
 # Set your profile with a local LLM
-interpreter.llm.model = "ollama/codestral"
+interpreter.model = "ollama/codestral"

 # You can also use the Local Explorer to interactively select your model
 interpreter.local_setup()
@ -118,26 +116,30 @@ interpreter.local_setup()

 ### Hosted TTS

-01 supports OpenAI and Elevenlabs for hosted TTS.
+01 supports OpenAI, Elevenlabs, and Cartesia for hosted TTS.

 ```python
 # Set your profile with a hosted TTS service
 interpreter.tts = "elevenlabs"
 ```

-### Local TTS
+### Local TTS and STT with LiveKit

-For local TTS, Coqui is used.
+We recommend having Docker installed for the easiest setup. Local TTS and STT relies on the [openedai-speech](https://github.com/matatonic/openedai-speech?tab=readme-ov-file) and [faster-whisper-server](https://github.com/fedirz/faster-whisper-server) repositories respectively. 

+#### Local TTS
+1. Clone the [openedai-speech](https://github.com/matatonic/openedai-speech?tab=readme-ov-file) repository
+2. Follow the Docker Image instructions for your system. Default run `docker compose -f docker-compose.min.yml up --publish 9001:8000` in the root. 
+3. Set your profile with local TTS service
 ```python
-# Set your profile with a local TTS service
-interpreter.tts = "coqui"
+interpreter.tts = "local"
 ```

-<Note>
-  When using the Livekit server, the interpreter.tts setting in your profile
-  will be ignored. The Livekit server currently only works with Deepgram for
-  speech recognition and Eleven Labs for text-to-speech. We are working on
-  introducing all-local functionality for the Livekit server as soon as
-  possible.
-</Note>
+#### Local STT
+1. Clone the [faster-whisper-server](https://github.com/fedirz/faster-whisper-server) repository
+2. Follow the Docker Compose Quick Start instructions for your respective system. 
+3. Run `docker run --publish 9002:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --env WHISPER__MODEL=Systran/faster-whisper-small --detach fedirz/faster-whisper-server:latest-cpu` to publish to port 8001 instead of the default 8000 (since our TTS uses this port).
+4. Set your profile with local STT service
+```python
+interpreter.stt = "local"
+```
--- a/docs/server/livekit.mdx
+++ b/docs/server/livekit.mdx
@ -69,6 +69,18 @@ Replace the placeholders with your actual API keys.

 ### Starting the Server

+**To use the mobile app, run the following command**
+
+```bash
+poetry run 01 --server livekit --qr --expose
+```
+
+To customize the profile, append the --profile flag with the profile name: 
+
+```bash
+poetry run 01 --server livekit --qr --expose --profile fast
+```
+
 To start the Livekit server, run the following command:

 ```bash
@ -87,18 +99,10 @@ To expose over the internet via ngrok
 poetry run 01 --server livekit --expose
 ```

-In order to use the mobile app over the web, use both flags
-
-```bash
-poetry run 01 --server livekit --qr --expose
-```
-
 <Note>
-  Currently, our Livekit server only works with Deepgram and Eleven Labs. We are
-  working to introduce all-local functionality as soon as possible. By setting
-  your profile (see [Configure Your Profile](/software/configure)), you can
-  still change your LLM to be a local LLM, but the `interpreter.tts` value will
-  be ignored for the Livekit server.
+  Livekit server now supports Local STT and TTS for fully local pipeline. 
+  Setup instructions are provided in the [configuring your 01](/server/configure#local-tts-and-stt-with-livekit) section.
+
 </Note>

 ## Livekit vs. Light Server
--- a/software/main.py
+++ b/software/main.py
@ -1,62 +1,68 @@
-from yaspin import yaspin
-spinner = yaspin()
-spinner.start()
-
+import subprocess
+import time
+import os
 import typer
-import ngrok
 import platform
-import threading
-import os
-import importlib
-from source.server.server import start_server
-import subprocess
-import webview
-import socket
-import json
+import webbrowser
+import psutil
+import ngrok
 import segno
+import json
+
+from pathlib import Path
 from livekit import api
-import time
-from dotenv import load_dotenv
-import signal
 from source.server.livekit.worker import main as worker_main
 from source.server.livekit.multimodal import main as multimodal_main
-import warnings
-import requests

-load_dotenv()
+from dotenv import load_dotenv
+

-system_type = platform.system()

+load_dotenv()
+system_type = platform.system()
 app = typer.Typer()

+
+
+ROOM_NAME = "my-room"
+
+
+def pre_clean_process(port):
+    """Find and kill process running on specified port"""
+    for proc in psutil.process_iter(['pid', 'name', 'connections']):
+        try:
+            for conn in proc.connections():
+                if conn.laddr.port == port:
+                    print(f"Killing process {proc.pid} ({proc.name()}) on port {port}")
+                    proc.terminate()
+                    proc.wait()
+                    return True
+        except (psutil.NoSuchProcess, psutil.AccessDenied):
+            pass
+    return False
+
+
+def cleanup_processes(processes):
+    for process in processes:
+        if process.poll() is None:  # if process is still running
+            process.terminate()
+            process.wait()  # wait for process to terminate
+
+
@app.command()
 def run(
-    server: str = typer.Option(
-        None,
-        "--server",
-        help="Run server (accepts `livekit` or `light`)",
-    ),
-    server_host: str = typer.Option(
+    lk_host: str = typer.Option(
        "0.0.0.0",
-        "--server-host",
-        help="Specify the server host where the server will deploy",
+        "--lk-host",
+        help="Specify the server host where the livekit server will deploy. For other devices on your network to connect to it, keep it on default `0.0.0.0`",
    ),
-    server_port: int = typer.Option(
+    lk_port: int = typer.Option(
        10101,
-        "--server-port",
-        help="Specify the server port where the server will deploy",
-    ),
-    expose: bool = typer.Option(False, "--expose", help="Expose server over the internet"),
-    domain: str = typer.Option(None, "--domain", help="Use `--expose` with a custom ngrok domain"),
-    client: str = typer.Option(None, "--client", help="Run client of a particular type. Accepts `light-python`, defaults to `light-python`"),
-    server_url: str = typer.Option(
-        None,
-        "--server-url",
-        help="Specify the server URL that the --client should expect. Defaults to server-host and server-port",
-    ),
-    qr: bool = typer.Option(
-        False, "--qr", help="Display QR code containing the server connection information (will be ngrok url if `--expose` is used)"
+        "--lk-port",
+        help="Specify the server port where the livekit server will deploy",
    ),
+    domain: str = typer.Option(None, "--domain", help="Pass in a custom ngrok domain to expose the livekit server over the internet"),
+    client: str = typer.Option(None, "--client", help="Run client of a particular type. Accepts `meet` or `mobile`, defaults to `meet`"),
    profiles: bool = typer.Option(
        False,
        "--profiles",
@ -76,19 +82,12 @@ def run(
        False,
        "--multimodal",
        help="Run the multimodal agent",
-    ),
+    )
 ):  
-
-    threads = []
-
-    # Handle `01` with no arguments, which should start server + client
-    if not server and not client:
-        server = "light"
-        client = "light-python"
-
-    ### PROFILES
-
-    profiles_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "source", "server", "profiles")
+    # preprocess ports
+    ports = [10101, 8000, 3000]
+    for port in ports:
+        pre_clean_process(port)

    if profiles:
        if platform.system() == "Windows":
@ -101,6 +100,8 @@ def run(
            subprocess.Popen(['open', profiles_dir])
        exit(0)

+    profiles_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "source", "server", "profiles")
+
    if profile:
        if not os.path.isfile(profile):
            profile = os.path.join(profiles_dir, profile)
@ -110,192 +111,66 @@ def run(
                    print(f"Invalid profile path: {profile}")
                    exit(1)

-    # Load the profile module from the provided path
-    spec = importlib.util.spec_from_file_location("profile", profile)
-    profile_module = importlib.util.module_from_spec(spec)
-    spec.loader.exec_module(profile_module)
-
-    # Get the interpreter from the profile
-    interpreter = profile_module.interpreter
-
-    ### SERVER
-
-    if system_type == "Windows":
-        server_host = "localhost"
-
-    if not server_url:
-        server_url = f"{server_host}:{server_port}"
-
-    if server:
-
-        ### LIGHT SERVER (required by livekit)
-
-        if server == "light":
-            light_server_port = server_port
-            light_server_host = server_host
-            voice = True # The light server will support voice
-        elif server == "livekit":
-            # The light server should run at a different port if we want to run a livekit server
-            spinner.stop()
-            print(f"Starting light server (required for livekit server) on localhost, on the port before `--server-port` (port {server_port-1}), unless the `AN_OPEN_PORT` env var is set.")
-            print(f"The livekit server will be started on port {server_port}.")
-            light_server_port = os.getenv('AN_OPEN_PORT', server_port-1)
-            light_server_host = "localhost"
-            voice = False # The light server will NOT support voice. It will just run Open Interpreter. The Livekit server will handle voice
-
-        server_thread = threading.Thread(
-            target=start_server,
-            args=(
-                light_server_host,
-                light_server_port,
-                interpreter,
-                voice,
-                debug
-            ),
-        )
-        spinner.stop()
-        print("Starting server...")
-        server_thread.start()
-        threads.append(server_thread)
-
-        if server == "livekit":

-            ### LIVEKIT SERVER
+    OI_CMD = f"interpreter --serve --profile {profile}"
+    oi_server = subprocess.Popen(OI_CMD, shell=True)
+    print("Interpreter server started")
    
-            def run_command(command):
-                subprocess.run(command, shell=True, check=True)

-            # Start the livekit server
+    print("Starting livekit server...")
    if debug: 
-                command = f'livekit-server --dev --bind "{server_host}" --port {server_port}'
-            else:
-                command = f'livekit-server --dev --bind "{server_host}" --port {server_port} > /dev/null 2>&1'
-            livekit_thread = threading.Thread(
-                target=run_command, args=(command,)
-            )
-            time.sleep(7)
-            livekit_thread.start()
-            threads.append(livekit_thread)
-
-            local_livekit_url = f"ws://{server_host}:{server_port}"
-
-        if expose:
-
-            ### EXPOSE OVER INTERNET
-            listener = ngrok.forward(f"{server_host}:{server_port}", authtoken_from_env=True, domain=domain)
-            url = listener.url()
-
+        LK_CMD = f"livekit-server --dev --bind {lk_host} --port {lk_port}"
    else:
+        LK_CMD = f"livekit-server --dev --bind {lk_host} --port {lk_port} > /dev/null 2>&1"
    
-            ### GET LOCAL URL
-            s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
-            s.connect(("8.8.8.8", 80))
-            ip_address = s.getsockname()[0]
-            s.close()
-            url = f"http://{ip_address}:{server_port}"
-
-
-        if server == "livekit":
-            print("Livekit server will run at:", url)
+    lk_server = subprocess.Popen(LK_CMD, shell=True)
+    print("Livekit server started")
+    time.sleep(2)

+    lk_url = f"http://{lk_host}:{lk_port}"
+    participant_token = str(api.AccessToken('devkey', 'secret') \
+                .with_identity("Participant") \
+                .with_name("You") \
+                .with_grants(api.VideoGrants(
+                    room_join=True,
+                    room=ROOM_NAME,))
+                .to_jwt())

-    ### CLIENT
-
-    if client:
-        
-        module = importlib.import_module(
-            f".clients.{client}.client", package="source"
-        )
-
-        client_thread = threading.Thread(target=module.run, args=[server_url, debug])
-        spinner.stop()
-        print("Starting client...")
-        client_thread.start()
-        threads.append(client_thread)
-
-
-    ### WAIT FOR THREADS TO FINISH, HANDLE CTRL-C
-
-    # Signal handler for termination signals
-    def signal_handler(sig, frame):
-        print("Termination signal received. Shutting down...")
-        for thread in threads:
-            if thread.is_alive():
-                # Kill subprocess associated with thread
-                subprocess.run(f"pkill -P {os.getpid()}", shell=True)
-        os._exit(0)
-
-    # Register signal handler for SIGINT and SIGTERM
-    signal.signal(signal.SIGINT, signal_handler)
-    signal.signal(signal.SIGTERM, signal_handler)
-
-    try:
+    processes = [lk_server, oi_server]

-        # Verify the server is running
-        for attempt in range(10):
-            try:
-                response = requests.get(url)
-                status = "OK" if response.status_code == 200 else "Not OK"
-                if status == "OK":
-                    break
-            except requests.RequestException:
-                pass
-            time.sleep(1)
-        else:
-            raise Exception(f"Server at {url} failed to respond after 10 attempts")
+    if client == 'mobile':
+        listener =  ngrok.forward(f"{lk_host}:{lk_port}", authtoken_from_env=True, domain=domain)
+        lk_url = listener.url()
+        print(f"Livekit server forwarded to: {lk_url}")

-        ### DISPLAY QR CODE
-        if qr:
-            def display_qr_code():
-                time.sleep(10)
-                content = json.dumps({"livekit_server": url})
+        print("Scan the QR code below with your mobile app to connect to the livekit server.")
+        content = json.dumps({"livekit_server": lk_url, "token": participant_token})
        qr_code = segno.make(content)
        qr_code.terminal(compact=True)
+    else: # meet client
+        # Get the path to the meet client directory
+        meet_client_path = Path(__file__).parent / "source" / "clients" / "meet"

-            qr_thread = threading.Thread(target=display_qr_code)
-            qr_thread.start()
-            threads.append(qr_thread)
+        print("Starting Next.js dev server...")
+        next_server = subprocess.Popen(["pnpm", "dev"], cwd=meet_client_path,)
+        print("Next.js dev server started")

-        ### START LIVEKIT WORKER
-        if server == "livekit":
-            time.sleep(1)
-            # These are needed to communicate with the worker's entrypoint
-            os.environ['INTERPRETER_SERVER_HOST'] = light_server_host
-            os.environ['INTERPRETER_SERVER_PORT'] = str(light_server_port)
-            os.environ['01_TTS'] = interpreter.tts
-            os.environ['01_STT'] = interpreter.stt
+        time.sleep(2)
+        meet_url = f'http://localhost:3000/custom?liveKitUrl={lk_url.replace("http", "ws")}&token={participant_token}'
+        print(f"\nOpening meet interface at: {meet_url}")
+        webbrowser.open(meet_url)

-            token = str(api.AccessToken('devkey', 'secret') \
-                .with_identity("identity") \
-                .with_name("my name") \
-                .with_grants(api.VideoGrants(
-                    room_join=True,
-                    room="my-room",
-            )).to_jwt())
-
-            # meet_url = f'http://localhost:3000/custom?liveKitUrl={url.replace("http", "ws")}&token={token}\n\n'
-            meet_url = f'https://meet.livekit.io/custom?liveKitUrl={url.replace("http", "ws")}&token={token}\n\n'
-            print("\n")
-            print("For debugging, you can join a video call with your assistant. Click the link below, then send a chat message that says {CONTEXT_MODE_OFF}, then begin speaking:")
-            print(meet_url)
+        processes.append(next_server)
    
-            for attempt in range(30):
    try:
+        print("Starting worker...")
        if multimodal:
-                        multimodal_main(local_livekit_url)
+            multimodal_main(lk_url)
        else:
-                        worker_main(local_livekit_url)
-                except KeyboardInterrupt:
-                    print("Exiting.")
-                    raise
-                except Exception as e:
-                    print(f"Error occurred: {e}")
-                print("Retrying...")
-                time.sleep(1)
-
-        # Wait for all threads to complete
-        for thread in threads:
-            thread.join()
+            worker_main(lk_url)
+        print("Worker started")
    except KeyboardInterrupt:
-        # On KeyboardInterrupt, send SIGINT to self
-        os.kill(os.getpid(), signal.SIGINT)
+        print("\nReceived interrupt signal, shutting down...")
+    finally:
+        print("Cleaning up processes...")
+        cleanup_processes(processes)
--- a/software/poetry.lock
+++ b/software/poetry.lock
--- a/software/pyproject.toml
+++ b/software/pyproject.toml
@ -13,20 +13,21 @@ readme = "../README.md"
 [tool.poetry.dependencies]
 python = ">=3.10,<3.12"
 livekit = "^0.17.2"
-livekit-agents = "^0.10.0"
+livekit-agents = "^0.12.0"
 livekit-plugins-deepgram = "^0.6.7"
 livekit-plugins-openai = "^0.10.1"
 livekit-plugins-silero = "^0.7.1"
 livekit-plugins-elevenlabs = "^0.7.5"
+livekit-plugins-cartesia = "^0.4.2"
 segno = "^1.6.1"
-open-interpreter = {extras = ["os", "server"], version = "^0.3.12"} # You should add a "browser" extra, so selenium isn't in the main package
+open-interpreter = { git = "https://github.com/OpenInterpreter/open-interpreter.git", branch = "development" }
 ngrok = "^1.4.0"
 realtimetts = {extras = ["all"], version = "^0.4.5"}
 realtimestt = "^0.2.41"
 pynput = "^1.7.7"
 yaspin = "^3.0.2"
 pywebview = "^5.2"
-livekit-plugins-cartesia = "^0.4.2"
+resampy = "^0.4.2"

 [build-system]
 requires = ["poetry-core"]
--- a/software/source/clients/meet/.env.example
+++ b/software/source/clients/meet/.env.example
@ -0,0 +1,30 @@
+# 1. Copy this file and rename it to .env.local
+# 2. Update the enviroment variables below.
+
+# REQUIRED SETTINGS
+# ################# 
+# If you are using LiveKit Cloud, the API key and secret can be generated from the Cloud Dashboard.
+LIVEKIT_API_KEY=
+LIVEKIT_API_SECRET=
+# URL pointing to the LiveKit server. (example: `wss://my-livekit-project.livekit.cloud`)
+LIVEKIT_URL=http://localhost:10101
+
+
+# OPTIONAL SETTINGS
+# ################# 
+# Recording
+# S3_KEY_ID=
+# S3_KEY_SECRET=
+# S3_ENDPOINT=
+# S3_BUCKET=
+# S3_REGION=
+
+# PUBLIC
+# Uncomment settings menu when using a LiveKit Cloud, it'll enable Krisp noise filters.
+# NEXT_PUBLIC_SHOW_SETTINGS_MENU=true
+# NEXT_PUBLIC_LK_RECORD_ENDPOINT=/api/record
+
+# Optional, to pipe logs to datadog
+# NEXT_PUBLIC_DATADOG_CLIENT_TOKEN=client-token
+# NEXT_PUBLIC_DATADOG_SITE=datadog-site
+
--- a/software/source/clients/meet/.eslintrc.json
+++ b/software/source/clients/meet/.eslintrc.json
@ -0,0 +1,3 @@
+{
+  "extends": "next/core-web-vitals"
+}
--- a/software/source/clients/meet/.github/assets/livekit-mark.png
+++ b/software/source/clients/meet/.github/assets/livekit-mark.png
--- a/software/source/clients/meet/.github/assets/livekit-meet.jpg
+++ b/software/source/clients/meet/.github/assets/livekit-meet.jpg
--- a/software/source/clients/meet/.github/assets/template-graphic.svg
+++ b/software/source/clients/meet/.github/assets/template-graphic.svg
@ -0,0 +1,31 @@
+<svg width="270" height="151" viewBox="0 0 270 151" fill="none" xmlns="http://www.w3.org/2000/svg">
+<rect width="270" height="151" fill="#070707"/>
+<rect x="8.5" y="8.5" width="192" height="134" rx="1.5" fill="#131313"/>
+<rect x="8.5" y="8.5" width="192" height="134" rx="1.5" stroke="#1F1F1F"/>
+<path fill-rule="evenodd" clip-rule="evenodd" d="M101.167 71.4998C101.167 69.6589 102.66 68.1665 104.501 68.1665C106.342 68.1665 107.834 69.6589 107.834 71.4998C107.834 73.3408 106.342 74.8332 104.501 74.8332C102.66 74.8332 101.167 73.3408 101.167 71.4998Z" fill="#666666"/>
+<path fill-rule="evenodd" clip-rule="evenodd" d="M97.834 82.1665C97.834 78.4846 100.819 75.4998 104.501 75.4998C108.183 75.4998 111.167 78.4846 111.167 82.1665V82.8332H97.834V82.1665Z" fill="#666666"/>
+<rect x="209.5" y="8.5" width="52" height="38.6667" rx="1.5" fill="#131313"/>
+<rect x="209.5" y="8.5" width="52" height="38.6667" rx="1.5" stroke="#1F1F1F"/>
+<g clip-path="url(#clip0_834_19648)">
+<path fill-rule="evenodd" clip-rule="evenodd" d="M232.167 23.8333C232.167 21.9924 233.66 20.5 235.501 20.5C237.342 20.5 238.834 21.9924 238.834 23.8333C238.834 25.6743 237.342 27.1667 235.501 27.1667C233.66 27.1667 232.167 25.6743 232.167 23.8333Z" fill="#666666"/>
+<path fill-rule="evenodd" clip-rule="evenodd" d="M228.834 34.5C228.834 30.8181 231.819 27.8333 235.501 27.8333C239.183 27.8333 242.167 30.8181 242.167 34.5V35.1667H228.834V34.5Z" fill="#666666"/>
+</g>
+<rect x="209.5" y="56.1665" width="52" height="38.6667" rx="1.5" fill="#131313"/>
+<rect x="209.5" y="56.1665" width="52" height="38.6667" rx="1.5" stroke="#CCCCCC"/>
+<path fill-rule="evenodd" clip-rule="evenodd" d="M232.167 71.4998C232.167 69.6589 233.66 68.1665 235.501 68.1665C237.342 68.1665 238.834 69.6589 238.834 71.4998C238.834 73.3408 237.342 74.8332 235.501 74.8332C233.66 74.8332 232.167 73.3408 232.167 71.4998Z" fill="#CCCCCC"/>
+<path fill-rule="evenodd" clip-rule="evenodd" d="M228.834 82.1665C228.834 78.4846 231.819 75.4998 235.501 75.4998C239.183 75.4998 242.167 78.4846 242.167 82.1665V82.8332H228.834V82.1665Z" fill="#CCCCCC"/>
+<rect x="209.5" y="103.833" width="52" height="38.6667" rx="1.5" fill="#131313"/>
+<rect x="209.5" y="103.833" width="52" height="38.6667" rx="1.5" stroke="#1F1F1F"/>
+<g clip-path="url(#clip1_834_19648)">
+<path fill-rule="evenodd" clip-rule="evenodd" d="M232.167 119.167C232.167 117.326 233.66 115.833 235.501 115.833C237.342 115.833 238.834 117.326 238.834 119.167C238.834 121.008 237.342 122.5 235.501 122.5C233.66 122.5 232.167 121.008 232.167 119.167Z" fill="#666666"/>
+<path fill-rule="evenodd" clip-rule="evenodd" d="M228.834 129.833C228.834 126.152 231.819 123.167 235.501 123.167C239.183 123.167 242.167 126.152 242.167 129.833V130.5H228.834V129.833Z" fill="#666666"/>
+</g>
+<defs>
+<clipPath id="clip0_834_19648">
+<rect width="16" height="16" fill="white" transform="translate(227.5 19.8335)"/>
+</clipPath>
+<clipPath id="clip1_834_19648">
+<rect width="16" height="16" fill="white" transform="translate(227.5 115.167)"/>
+</clipPath>
+</defs>
+</svg>
--- a/software/source/clients/meet/.github/workflows/sync-to-production.yaml
+++ b/software/source/clients/meet/.github/workflows/sync-to-production.yaml
@ -0,0 +1,33 @@
+name: Sync main to sandbox-production
+
+on:
+  push:
+    branches:
+      - main
+
+permissions:
+  contents: write
+  pull-requests: write
+
+jobs:
+  sync:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0 # Fetch all history so we can force push
+
+      - name: Set up Git
+        run: |
+          git config --global user.name 'github-actions[bot]'
+          git config --global user.email 'github-actions[bot]@livekit.io'
+
+      - name: Sync to sandbox-production
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          git checkout sandbox-production || git checkout -b sandbox-production
+          git merge --strategy-option theirs main
+          git push origin sandbox-production
--- a/software/source/clients/meet/.gitignore
+++ b/software/source/clients/meet/.gitignore
@ -0,0 +1,38 @@
+# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
+
+# dependencies
+/node_modules
+/.pnp
+.pnp.js
+
+# testing
+/coverage
+
+# next.js
+/.next/
+/out/
+
+# production
+/build
+
+# misc
+.DS_Store
+*.pem
+
+# debug
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+.pnpm-debug.log*
+
+# local env files
+.env.local
+.env.development.local
+.env.test.local
+.env.production.local
+
+# vercel
+.vercel
+
+# typescript
+*.tsbuildinfo
--- a/software/source/clients/meet/.prettierignore
+++ b/software/source/clients/meet/.prettierignore
@ -0,0 +1,3 @@
+.github/
+.next/
+node_modules/
--- a/software/source/clients/meet/.prettierrc
+++ b/software/source/clients/meet/.prettierrc
@ -0,0 +1,7 @@
+{
+  "singleQuote": true,
+  "trailingComma": "all",
+  "semi": true,
+  "tabWidth": 2,
+  "printWidth": 100
+}
--- a/software/source/clients/meet/LICENSE
+++ b/software/source/clients/meet/LICENSE
@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/software/source/clients/meet/README.md
+++ b/software/source/clients/meet/README.md
@ -0,0 +1,16 @@
+# LiveKit Meet Client
+
+This is a clone of the LiveKit Meet open source video conferencing app built on [LiveKit Components](https://github.com/livekit/components-js), [LiveKit Cloud](https://livekit.io/cloud), and Next.js. Used as a simple web interface to the 01 with screenshare and camera functionality. Can be run using a fully local model, STT, and TTS.
+
+## Usage
+
+Run the following command in the software directory to open a Meet instance.
+
+```
+poetry run 01 --server livekit --meet
+```
+
+
+## Setup
+
+Ensure that you're in the meet directory. Then run `pnpm install` to install all dependencies. You're now all set to get up and running!
--- a/software/source/clients/meet/app/api/connection-details/route.ts
+++ b/software/source/clients/meet/app/api/connection-details/route.ts
@ -0,0 +1,81 @@
+import { randomString } from '@/lib/client-utils';
+import { ConnectionDetails } from '@/lib/types';
+import { AccessToken, AccessTokenOptions, VideoGrant } from 'livekit-server-sdk';
+import { NextRequest, NextResponse } from 'next/server';
+
+const API_KEY = process.env.LIVEKIT_API_KEY;
+const API_SECRET = process.env.LIVEKIT_API_SECRET;
+const LIVEKIT_URL = process.env.LIVEKIT_URL;
+
+export async function GET(request: NextRequest) {
+  try {
+    // Parse query parameters
+    const roomName = request.nextUrl.searchParams.get('roomName');
+    const participantName = request.nextUrl.searchParams.get('participantName');
+    const metadata = request.nextUrl.searchParams.get('metadata') ?? '';
+    const region = request.nextUrl.searchParams.get('region');
+    const livekitServerUrl = region ? getLiveKitURL(region) : LIVEKIT_URL;
+    if (livekitServerUrl === undefined) {
+      throw new Error('Invalid region');
+    }
+
+    if (typeof roomName !== 'string') {
+      return new NextResponse('Missing required query parameter: roomName', { status: 400 });
+    }
+    if (participantName === null) {
+      return new NextResponse('Missing required query parameter: participantName', { status: 400 });
+    }
+
+    // Generate participant token
+    const participantToken = await createParticipantToken(
+      {
+        identity: `${participantName}__${randomString(4)}`,
+        name: participantName,
+        metadata,
+      },
+      roomName,
+    );
+
+    // Return connection details
+    const data: ConnectionDetails = {
+      serverUrl: livekitServerUrl,
+      roomName: roomName,
+      participantToken: participantToken,
+      participantName: participantName,
+    };
+    return NextResponse.json(data);
+  } catch (error) {
+    if (error instanceof Error) {
+      return new NextResponse(error.message, { status: 500 });
+    }
+  }
+}
+
+function createParticipantToken(userInfo: AccessTokenOptions, roomName: string) {
+  const at = new AccessToken(API_KEY, API_SECRET, userInfo);
+  at.ttl = '5m';
+  const grant: VideoGrant = {
+    room: roomName,
+    roomJoin: true,
+    canPublish: true,
+    canPublishData: true,
+    canSubscribe: true,
+  };
+  at.addGrant(grant);
+  return at.toJwt();
+}
+
+/**
+ * Get the LiveKit server URL for the given region.
+ */
+function getLiveKitURL(region: string | null): string {
+  let targetKey = 'LIVEKIT_URL';
+  if (region) {
+    targetKey = `LIVEKIT_URL_${region}`.toUpperCase();
+  }
+  const url = process.env[targetKey];
+  if (!url) {
+    throw new Error(`${targetKey} is not defined`);
+  }
+  return url;
+}
--- a/software/source/clients/meet/app/api/record/start/route.ts
+++ b/software/source/clients/meet/app/api/record/start/route.ts
@ -0,0 +1,70 @@
+import { EgressClient, EncodedFileOutput, S3Upload } from 'livekit-server-sdk';
+import { NextRequest, NextResponse } from 'next/server';
+
+export async function GET(req: NextRequest) {
+  try {
+    const roomName = req.nextUrl.searchParams.get('roomName');
+
+    /**
+     * CAUTION:
+     * for simplicity this implementation does not authenticate users and therefore allows anyone with knowledge of a roomName
+     * to start/stop recordings for that room.
+     * DO NOT USE THIS FOR PRODUCTION PURPOSES AS IS
+     */
+
+    if (roomName === null) {
+      return new NextResponse('Missing roomName parameter', { status: 403 });
+    }
+
+    const {
+      LIVEKIT_API_KEY,
+      LIVEKIT_API_SECRET,
+      LIVEKIT_URL,
+      S3_KEY_ID,
+      S3_KEY_SECRET,
+      S3_BUCKET,
+      S3_ENDPOINT,
+      S3_REGION,
+    } = process.env;
+
+    const hostURL = new URL(LIVEKIT_URL!);
+    hostURL.protocol = 'https:';
+
+    const egressClient = new EgressClient(hostURL.origin, LIVEKIT_API_KEY, LIVEKIT_API_SECRET);
+
+    const existingEgresses = await egressClient.listEgress({ roomName });
+    if (existingEgresses.length > 0 && existingEgresses.some((e) => e.status < 2)) {
+      return new NextResponse('Meeting is already being recorded', { status: 409 });
+    }
+
+    const fileOutput = new EncodedFileOutput({
+      filepath: `${new Date(Date.now()).toISOString()}-${roomName}.mp4`,
+      output: {
+        case: 's3',
+        value: new S3Upload({
+          endpoint: S3_ENDPOINT,
+          accessKey: S3_KEY_ID,
+          secret: S3_KEY_SECRET,
+          region: S3_REGION,
+          bucket: S3_BUCKET,
+        }),
+      },
+    });
+
+    await egressClient.startRoomCompositeEgress(
+      roomName,
+      {
+        file: fileOutput,
+      },
+      {
+        layout: 'speaker',
+      },
+    );
+
+    return new NextResponse(null, { status: 200 });
+  } catch (error) {
+    if (error instanceof Error) {
+      return new NextResponse(error.message, { status: 500 });
+    }
+  }
+}
--- a/software/source/clients/meet/app/api/record/stop/route.ts
+++ b/software/source/clients/meet/app/api/record/stop/route.ts
@ -0,0 +1,39 @@
+import { EgressClient } from 'livekit-server-sdk';
+import { NextRequest, NextResponse } from 'next/server';
+
+export async function GET(req: NextRequest) {
+  try {
+    const roomName = req.nextUrl.searchParams.get('roomName');
+
+    /**
+     * CAUTION:
+     * for simplicity this implementation does not authenticate users and therefore allows anyone with knowledge of a roomName
+     * to start/stop recordings for that room.
+     * DO NOT USE THIS FOR PRODUCTION PURPOSES AS IS
+     */
+
+    if (roomName === null) {
+      return new NextResponse('Missing roomName parameter', { status: 403 });
+    }
+
+    const { LIVEKIT_API_KEY, LIVEKIT_API_SECRET, LIVEKIT_URL } = process.env;
+
+    const hostURL = new URL(LIVEKIT_URL!);
+    hostURL.protocol = 'https:';
+
+    const egressClient = new EgressClient(hostURL.origin, LIVEKIT_API_KEY, LIVEKIT_API_SECRET);
+    const activeEgresses = (await egressClient.listEgress({ roomName })).filter(
+      (info) => info.status < 2,
+    );
+    if (activeEgresses.length === 0) {
+      return new NextResponse('No active recording found', { status: 404 });
+    }
+    await Promise.all(activeEgresses.map((info) => egressClient.stopEgress(info.egressId)));
+
+    return new NextResponse(null, { status: 200 });
+  } catch (error) {
+    if (error instanceof Error) {
+      return new NextResponse(error.message, { status: 500 });
+    }
+  }
+}
--- a/software/source/clients/meet/app/components/VideoConference.tsx
+++ b/software/source/clients/meet/app/components/VideoConference.tsx
@ -0,0 +1,210 @@
+import type {
+    MessageDecoder,
+    MessageEncoder,
+    TrackReferenceOrPlaceholder,
+    WidgetState,
+  } from '@livekit/components-react';
+  import { isTrackReference } from '@livekit/components-react';
+  import { log } from './logger';
+  import { isWeb } from './detectMobileBrowser';
+  import { isEqualTrackRef } from './track-reference';
+  import { RoomEvent, Track } from 'livekit-client';
+  import * as React from 'react';
+  import type { MessageFormatter } from '@livekit/components-react';
+  import {
+    CarouselLayout,
+    ConnectionStateToast,
+    FocusLayout,
+    FocusLayoutContainer,
+    GridLayout,
+    LayoutContextProvider,
+    ParticipantTile,
+    RoomAudioRenderer,
+  } from '@livekit/components-react';
+  import { useCreateLayoutContext } from '@livekit/components-react';
+  import { usePinnedTracks, useTracks } from '@livekit/components-react';
+  import { ControlBar } from '@livekit/components-react';
+  import { useWarnAboutMissingStyles } from './useWarnAboutMissingStyles';
+  import { useLocalParticipant } from '@livekit/components-react';
+  
+  /**
+   * @public
+   */
+  export interface VideoConferenceProps extends React.HTMLAttributes<HTMLDivElement> {
+    chatMessageFormatter?: MessageFormatter;
+    chatMessageEncoder?: MessageEncoder;
+    chatMessageDecoder?: MessageDecoder;
+    /** @alpha */
+    SettingsComponent?: React.ComponentType;
+  }
+  
+  /**
+   * The `VideoConference` ready-made component is your drop-in solution for a classic video conferencing application.
+   * It provides functionality such as focusing on one participant, grid view with pagination to handle large numbers
+   * of participants, basic non-persistent chat, screen sharing, and more.
+   *
+   * @remarks
+   * The component is implemented with other LiveKit components like `FocusContextProvider`,
+   * `GridLayout`, `ControlBar`, `FocusLayoutContainer` and `FocusLayout`.
+   * You can use these components as a starting point for your own custom video conferencing application.
+   *
+   * @example
+   * ```tsx
+   * <LiveKitRoom>
+   *   <VideoConference />
+   * <LiveKitRoom>
+   * ```
+   * @public
+   */
+  export function VideoConference({
+    chatMessageFormatter,
+    chatMessageDecoder,
+    chatMessageEncoder,
+    SettingsComponent,
+    ...props
+  }: VideoConferenceProps) {
+    const [widgetState, setWidgetState] = React.useState<WidgetState>({
+      showChat: false,
+      unreadMessages: 0,
+      showSettings: false,
+    });
+    const lastAutoFocusedScreenShareTrack = React.useRef<TrackReferenceOrPlaceholder | null>(null);
+  
+    const tracks = useTracks(
+      [
+        { source: Track.Source.Camera, withPlaceholder: true },
+        { source: Track.Source.ScreenShare, withPlaceholder: false },
+      ],
+      { updateOnlyOn: [RoomEvent.ActiveSpeakersChanged], onlySubscribed: false },
+    );
+  
+    const widgetUpdate = (state: WidgetState) => {
+      log.debug('updating widget state', state);
+      setWidgetState(state);
+    };
+  
+    const layoutContext = useCreateLayoutContext();
+  
+    const screenShareTracks = tracks
+      .filter(isTrackReference)
+      .filter((track) => track.publication.source === Track.Source.ScreenShare);
+  
+    const focusTrack = usePinnedTracks(layoutContext)?.[0];
+    const carouselTracks = tracks.filter((track) => !isEqualTrackRef(track, focusTrack));
+
+    const { localParticipant } = useLocalParticipant();
+
+    const [isAlwaysListening, setIsAlwaysListening] = React.useState(false);
+
+    const toggleAlwaysListening = () => {
+      const newValue = !isAlwaysListening;
+      setIsAlwaysListening(newValue);
+      handleAlwaysListeningToggle(newValue);
+    };
+
+    const handleAlwaysListeningToggle = (newValue: boolean) => {
+  
+        if (newValue) {
+            console.log("SETTING VIDEO CONTEXT ON")
+            const data = new TextEncoder().encode("{VIDEO_CONTEXT_ON}")
+            localParticipant.publishData(data, {reliable: true, topic: "video_context"})
+        } else {
+            console.log("SETTING VIDEO CONTEXT OFF")
+            const data = new TextEncoder().encode("{VIDEO_CONTEXT_OFF}")
+            localParticipant.publishData(data, {reliable: true, topic: "video_context"})
+        }
+    }
+  
+    React.useEffect(() => {
+      // If screen share tracks are published, and no pin is set explicitly, auto set the screen share.
+      if (
+        screenShareTracks.some((track) => track.publication.isSubscribed) &&
+        lastAutoFocusedScreenShareTrack.current === null
+      ) {
+        log.debug('Auto set screen share focus:', { newScreenShareTrack: screenShareTracks[0] });
+        layoutContext.pin.dispatch?.({ msg: 'set_pin', trackReference: screenShareTracks[0] });
+        lastAutoFocusedScreenShareTrack.current = screenShareTracks[0];
+      } else if (
+        lastAutoFocusedScreenShareTrack.current &&
+        !screenShareTracks.some(
+          (track) =>
+            track.publication.trackSid ===
+            lastAutoFocusedScreenShareTrack.current?.publication?.trackSid,
+        )
+      ) {
+        log.debug('Auto clearing screen share focus.');
+        layoutContext.pin.dispatch?.({ msg: 'clear_pin' });
+        lastAutoFocusedScreenShareTrack.current = null;
+      }
+      if (focusTrack && !isTrackReference(focusTrack)) {
+        const updatedFocusTrack = tracks.find(
+          (tr) =>
+            tr.participant.identity === focusTrack.participant.identity &&
+            tr.source === focusTrack.source,
+        );
+        if (updatedFocusTrack !== focusTrack && isTrackReference(updatedFocusTrack)) {
+          layoutContext.pin.dispatch?.({ msg: 'set_pin', trackReference: updatedFocusTrack });
+        }
+      }
+    }, [
+      screenShareTracks
+        .map((ref) => `${ref.publication.trackSid}_${ref.publication.isSubscribed}`)
+        .join(),
+      focusTrack?.publication?.trackSid,
+      tracks,
+    ]);
+  
+    useWarnAboutMissingStyles();
+  
+    return (
+      <div className="lk-video-conference" {...props}>
+        {isWeb() && (
+          <LayoutContextProvider
+            value={layoutContext}
+            // onPinChange={handleFocusStateChange}
+            onWidgetChange={widgetUpdate}
+          >
+            <div className="lk-video-conference-inner">
+              {!focusTrack ? (
+                <div className="lk-grid-layout-wrapper">
+                  <GridLayout tracks={tracks}>
+                    <ParticipantTile />
+                  </GridLayout>
+                </div>
+              ) : (
+                <div className="lk-focus-layout-wrapper">
+                  <FocusLayoutContainer>
+                    <CarouselLayout tracks={carouselTracks}>
+                      <ParticipantTile />
+                    </CarouselLayout>
+                    {focusTrack && <FocusLayout trackRef={focusTrack} />}
+                  </FocusLayoutContainer>
+                </div>
+              )}
+              <ControlBar
+                controls={{ settings: !!SettingsComponent }}
+              />
+            <button
+                className={`lk-button ${isAlwaysListening ? 'lk-button-active' : ''}`}
+                onClick={toggleAlwaysListening}
+            >
+                {isAlwaysListening ? 'Stop Listening' : 'Start Listening'}
+            </button>
+              
+            </div>
+            {SettingsComponent && (
+              <div
+                className="lk-settings-menu-modal"
+                style={{ display: widgetState.showSettings ? 'block' : 'none' }}
+              >
+                <SettingsComponent />
+              </div>
+            )}
+          </LayoutContextProvider>
+        )}
+        <RoomAudioRenderer />
+        <ConnectionStateToast />
+      </div>
+    );
+  }
+  
--- a/software/source/clients/meet/app/components/detectMobileBrowser.ts
+++ b/software/source/clients/meet/app/components/detectMobileBrowser.ts
@ -0,0 +1,20 @@
+/**
+ * @internal
+ */
+export function isWeb(): boolean {
+    return typeof document !== 'undefined';
+  }
+  
+  /**
+   * Mobile browser detection based on `navigator.userAgent` string.
+   * Defaults to returning `false` if not in a browser.
+   *
+   * @remarks
+   * This should only be used if feature detection or other methods do not work!
+   *
+   * @see https://developer.mozilla.org/en-US/docs/Web/HTTP/Browser_detection_using_the_user_agent#mobile_device_detection
+   */
+  export function isMobileBrowser(): boolean {
+    return isWeb() ? /Mobi/i.test(window.navigator.userAgent) : false;
+  }
+  
--- a/software/source/clients/meet/app/components/logger.ts
+++ b/software/source/clients/meet/app/components/logger.ts
@ -0,0 +1,56 @@
+import {
+    setLogLevel as setClientSdkLogLevel,
+    setLogExtension as setClientSdkLogExtension,
+    LogLevel as LogLevelEnum,
+  } from 'livekit-client';
+  import loglevel from 'loglevel'
+  
+  export const log = loglevel.getLogger('lk-components-js');
+  log.setDefaultLevel('WARN');
+  
+  type LogLevel = Parameters<typeof setClientSdkLogLevel>[0];
+  type SetLogLevelOptions = {
+    liveKitClientLogLevel?: LogLevel;
+  };
+  
+  /**
+   * Set the log level for both the `@livekit/components-react` package and the `@livekit-client` package.
+   * To set the `@livekit-client` log independently, use the `liveKitClientLogLevel` prop on the `options` object.
+   * @public
+   */
+  export function setLogLevel(level: LogLevel, options: SetLogLevelOptions = {}): void {
+    log.setLevel(level);
+    setClientSdkLogLevel(options.liveKitClientLogLevel ?? level);
+  }
+  
+  type LogExtension = (level: LogLevel, msg: string, context?: object) => void;
+  type SetLogExtensionOptions = {
+    liveKitClientLogExtension?: LogExtension;
+  };
+  
+  /**
+   * Set the log extension for both the `@livekit/components-react` package and the `@livekit-client` package.
+   * To set the `@livekit-client` log extension, use the `liveKitClientLogExtension` prop on the `options` object.
+   * @public
+   */
+  export function setLogExtension(extension: LogExtension, options: SetLogExtensionOptions = {}) {
+    const originalFactory = log.methodFactory;
+  
+    log.methodFactory = (methodName, configLevel, loggerName) => {
+      const rawMethod = originalFactory(methodName, configLevel, loggerName);
+  
+      const logLevel = LogLevelEnum[methodName];
+      const needLog = logLevel >= configLevel && logLevel < LogLevelEnum.silent;
+  
+      return (msg, context?: [msg: string, context: object]) => {
+        if (context) rawMethod(msg, context);
+        else rawMethod(msg);
+        if (needLog) {
+          extension(logLevel, msg, context);
+        }
+      };
+    };
+    log.setLevel(log.getLevel()); // Be sure to call setLevel method in order to apply plugin
+    setClientSdkLogExtension(options.liveKitClientLogExtension ?? extension);
+  }
+  
--- a/software/source/clients/meet/app/components/mergeProps.ts
+++ b/software/source/clients/meet/app/components/mergeProps.ts
@ -0,0 +1,87 @@
+/*
+ * Copyright 2020 Adobe. All rights reserved.
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License. You may obtain a copy
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
+ * OF ANY KIND, either express or implied. See the License for the specific language
+ * governing permissions and limitations under the License.
+ */
+
+import clsx from 'clsx';
+
+/**
+ * Calls all functions in the order they were chained with the same arguments.
+ * @internal
+ */
+export function chain(...callbacks: any[]): (...args: any[]) => void {
+  return (...args: any[]) => {
+    for (const callback of callbacks) {
+      if (typeof callback === 'function') {
+        try {
+          callback(...args);
+        } catch (e) {
+          console.error(e);
+        }
+      }
+    }
+  };
+}
+
+interface Props {
+  [key: string]: any;
+}
+
+// taken from: https://stackoverflow.com/questions/51603250/typescript-3-parameter-list-intersection-type/51604379#51604379
+type TupleTypes<T> = { [P in keyof T]: T[P] } extends { [key: number]: infer V } ? V : never;
+type UnionToIntersection<U> = (U extends any ? (k: U) => void : never) extends (k: infer I) => void
+  ? I
+  : never;
+
+/**
+ * Merges multiple props objects together. Event handlers are chained,
+ * classNames are combined, and ids are deduplicated - different ids
+ * will trigger a side-effect and re-render components hooked up with `useId`.
+ * For all other props, the last prop object overrides all previous ones.
+ * @param args - Multiple sets of props to merge together.
+ * @internal
+ */
+export function mergeProps<T extends Props[]>(...args: T): UnionToIntersection<TupleTypes<T>> {
+  // Start with a base clone of the first argument. This is a lot faster than starting
+  // with an empty object and adding properties as we go.
+  const result: Props = { ...args[0] };
+  for (let i = 1; i < args.length; i++) {
+    const props = args[i];
+    for (const key in props) {
+      const a = result[key];
+      const b = props[key];
+
+      // Chain events
+      if (
+        typeof a === 'function' &&
+        typeof b === 'function' &&
+        // This is a lot faster than a regex.
+        key[0] === 'o' &&
+        key[1] === 'n' &&
+        key.charCodeAt(2) >= /* 'A' */ 65 &&
+        key.charCodeAt(2) <= /* 'Z' */ 90
+      ) {
+        result[key] = chain(a, b);
+
+        // Merge classnames, sometimes classNames are empty string which eval to false, so we just need to do a type check
+      } else if (
+        (key === 'className' || key === 'UNSAFE_className') &&
+        typeof a === 'string' &&
+        typeof b === 'string'
+      ) {
+        result[key] = clsx(a, b);
+      } else {
+        result[key] = b !== undefined ? b : a;
+      }
+    }
+  }
+
+  return result as UnionToIntersection<TupleTypes<T>>;
+}
--- a/software/source/clients/meet/app/components/track-reference-types.ts
+++ b/software/source/clients/meet/app/components/track-reference-types.ts
@ -0,0 +1,73 @@
+/**
+ * The TrackReference type is a logical grouping of participant publication and/or subscribed track.
+ *
+ */
+
+import type { Participant, Track, TrackPublication } from 'livekit-client';
+// ## TrackReference Types
+
+/** @public */
+export type TrackReferencePlaceholder = {
+  participant: Participant;
+  publication?: never;
+  source: Track.Source;
+};
+
+/** @public */
+export type TrackReference = {
+  participant: Participant;
+  publication: TrackPublication;
+  source: Track.Source;
+};
+
+/** @public */
+export type TrackReferenceOrPlaceholder = TrackReference | TrackReferencePlaceholder;
+
+// ### TrackReference Type Predicates
+/** @internal */
+export function isTrackReference(trackReference: unknown): trackReference is TrackReference {
+  if (typeof trackReference === 'undefined') {
+    return false;
+  }
+  return (
+    isTrackReferenceSubscribed(trackReference as TrackReference) ||
+    isTrackReferencePublished(trackReference as TrackReference)
+  );
+}
+
+function isTrackReferenceSubscribed(trackReference?: TrackReferenceOrPlaceholder): boolean {
+  if (!trackReference) {
+    return false;
+  }
+  return (
+    trackReference.hasOwnProperty('participant') &&
+    trackReference.hasOwnProperty('source') &&
+    trackReference.hasOwnProperty('track') &&
+    typeof trackReference.publication?.track !== 'undefined'
+  );
+}
+
+function isTrackReferencePublished(trackReference?: TrackReferenceOrPlaceholder): boolean {
+  if (!trackReference) {
+    return false;
+  }
+  return (
+    trackReference.hasOwnProperty('participant') &&
+    trackReference.hasOwnProperty('source') &&
+    trackReference.hasOwnProperty('publication') &&
+    typeof trackReference.publication !== 'undefined'
+  );
+}
+
+export function isTrackReferencePlaceholder(
+  trackReference?: TrackReferenceOrPlaceholder,
+): trackReference is TrackReferencePlaceholder {
+  if (!trackReference) {
+    return false;
+  }
+  return (
+    trackReference.hasOwnProperty('participant') &&
+    trackReference.hasOwnProperty('source') &&
+    typeof trackReference.publication === 'undefined'
+  );
+}
--- a/software/source/clients/meet/app/components/track-reference.ts
+++ b/software/source/clients/meet/app/components/track-reference.ts
@ -0,0 +1,97 @@
+import type { Track } from 'livekit-client';
+import type { PinState } from './types';
+import type { TrackReferenceOrPlaceholder } from './track-reference-types';
+import { isTrackReference, isTrackReferencePlaceholder } from './track-reference-types';
+
+/**
+ * Returns a id to identify the `TrackReference` or `TrackReferencePlaceholder` based on
+ * participant, track source and trackSid.
+ * @remarks
+ * The id pattern is: `${participantIdentity}_${trackSource}_${trackSid}` for `TrackReference`
+ * and `${participantIdentity}_${trackSource}_placeholder` for `TrackReferencePlaceholder`.
+ */
+export function getTrackReferenceId(trackReference: TrackReferenceOrPlaceholder | number) {
+  if (typeof trackReference === 'string' || typeof trackReference === 'number') {
+    return `${trackReference}`;
+  } else if (isTrackReferencePlaceholder(trackReference)) {
+    return `${trackReference.participant.identity}_${trackReference.source}_placeholder`;
+  } else if (isTrackReference(trackReference)) {
+    return `${trackReference.participant.identity}_${trackReference.publication.source}_${trackReference.publication.trackSid}`;
+  } else {
+    throw new Error(`Can't generate a id for the given track reference: ${trackReference}`);
+  }
+}
+
+export type TrackReferenceId = ReturnType<typeof getTrackReferenceId>;
+
+/** Returns the Source of the TrackReference. */
+export function getTrackReferenceSource(trackReference: TrackReferenceOrPlaceholder): Track.Source {
+  if (isTrackReference(trackReference)) {
+    return trackReference.publication.source;
+  } else {
+    return trackReference.source;
+  }
+}
+
+export function isEqualTrackRef(
+  a?: TrackReferenceOrPlaceholder,
+  b?: TrackReferenceOrPlaceholder,
+): boolean {
+  if (a === undefined || b === undefined) {
+    return false;
+  }
+  if (isTrackReference(a) && isTrackReference(b)) {
+    return a.publication.trackSid === b.publication.trackSid;
+  } else {
+    return getTrackReferenceId(a) === getTrackReferenceId(b);
+  }
+}
+
+/**
+ * Check if the `TrackReference` is pinned.
+ */
+export function isTrackReferencePinned(
+  trackReference: TrackReferenceOrPlaceholder,
+  pinState: PinState | undefined,
+): boolean {
+  if (typeof pinState === 'undefined') {
+    return false;
+  }
+  if (isTrackReference(trackReference)) {
+    return pinState.some(
+      (pinnedTrackReference) =>
+        pinnedTrackReference.participant.identity === trackReference.participant.identity &&
+        isTrackReference(pinnedTrackReference) &&
+        pinnedTrackReference.publication.trackSid === trackReference.publication.trackSid,
+    );
+  } else if (isTrackReferencePlaceholder(trackReference)) {
+    return pinState.some(
+      (pinnedTrackReference) =>
+        pinnedTrackReference.participant.identity === trackReference.participant.identity &&
+        isTrackReferencePlaceholder(pinnedTrackReference) &&
+        pinnedTrackReference.source === trackReference.source,
+    );
+  } else {
+    return false;
+  }
+}
+
+/**
+ * Check if the current `currentTrackRef` is the placeholder for next `nextTrackRef`.
+ * Based on the participant identity and the source.
+ * @internal
+ */
+export function isPlaceholderReplacement(
+  currentTrackRef: TrackReferenceOrPlaceholder,
+  nextTrackRef: TrackReferenceOrPlaceholder,
+) {
+  // if (typeof nextTrackRef === 'number' || typeof currentTrackRef === 'number') {
+  //   return false;
+  // }
+  return (
+    isTrackReferencePlaceholder(currentTrackRef) &&
+    isTrackReference(nextTrackRef) &&
+    nextTrackRef.participant.identity === currentTrackRef.participant.identity &&
+    nextTrackRef.source === currentTrackRef.source
+  );
+}
--- a/software/source/clients/meet/app/components/types.ts
+++ b/software/source/clients/meet/app/components/types.ts
@ -0,0 +1,90 @@
+import type { Participant, ParticipantKind, Track, TrackPublication } from 'livekit-client';
+import type { TrackReference, TrackReferenceOrPlaceholder } from './track-reference-types';
+
+// ## PinState Type
+/** @public */
+export type PinState = TrackReferenceOrPlaceholder[];
+export const PIN_DEFAULT_STATE: PinState = [];
+
+// ## WidgetState Types
+/** @public */
+export type WidgetState = {
+  showChat: boolean;
+  unreadMessages: number;
+  showSettings?: boolean;
+};
+export const WIDGET_DEFAULT_STATE: WidgetState = {
+  showChat: false,
+  unreadMessages: 0,
+  showSettings: false,
+};
+
+// ## Track Source Types
+export type TrackSourceWithOptions = { source: Track.Source; withPlaceholder: boolean };
+
+export type SourcesArray = Track.Source[] | TrackSourceWithOptions[];
+
+// ### Track Source Type Predicates
+export function isSourceWitOptions(source: SourcesArray[number]): source is TrackSourceWithOptions {
+  return typeof source === 'object';
+}
+
+export function isSourcesWithOptions(sources: SourcesArray): sources is TrackSourceWithOptions[] {
+  return (
+    Array.isArray(sources) &&
+    (sources as TrackSourceWithOptions[]).filter(isSourceWitOptions).length > 0
+  );
+}
+
+// ## Loop Filter Types
+export type TrackReferenceFilter = Parameters<TrackReferenceOrPlaceholder[]['filter']>['0'];
+export type ParticipantFilter = Parameters<Participant[]['filter']>['0'];
+
+// ## Other Types
+/** @internal */
+export interface ParticipantClickEvent {
+  participant: Participant;
+  track?: TrackPublication;
+}
+
+export type TrackSource<T extends Track.Source> = RequireAtLeastOne<
+  { source: T; name: string; participant: Participant },
+  'name' | 'source'
+>;
+
+export type ParticipantTrackIdentifier = RequireAtLeastOne<
+  { sources: Track.Source[]; name: string; kind: Track.Kind },
+  'sources' | 'name' | 'kind'
+>;
+
+/**
+ * @beta
+ */
+export type ParticipantIdentifier = RequireAtLeastOne<
+  { kind: ParticipantKind; identity: string },
+  'identity' | 'kind'
+>;
+
+/**
+ * The TrackIdentifier type is used to select Tracks either based on
+ * - Track.Source and/or name of the track, e.g. `{source: Track.Source.Camera}` or `{name: "my-track"}`
+ * - TrackReference (participant and publication)
+ * @internal
+ */
+export type TrackIdentifier<T extends Track.Source = Track.Source> =
+  | TrackSource<T>
+  | TrackReference;
+
+// ## Util Types
+type RequireAtLeastOne<T, Keys extends keyof T = keyof T> = Pick<T, Exclude<keyof T, Keys>> &
+  {
+    [K in Keys]-?: Required<Pick<T, K>> & Partial<Pick<T, Exclude<Keys, K>>>;
+  }[Keys];
+
+export type RequireOnlyOne<T, Keys extends keyof T = keyof T> = Pick<T, Exclude<keyof T, Keys>> &
+  {
+    [K in Keys]-?: Required<Pick<T, K>> & Partial<Record<Exclude<Keys, K>, undefined>>;
+  }[Keys];
+
+export type AudioSource = Track.Source.Microphone | Track.Source.ScreenShareAudio;
+export type VideoSource = Track.Source.Camera | Track.Source.ScreenShare;
--- a/software/source/clients/meet/app/components/useWarnAboutMissingStyles.ts
+++ b/software/source/clients/meet/app/components/useWarnAboutMissingStyles.ts
@ -0,0 +1,11 @@
+import * as React from 'react';
+import { warnAboutMissingStyles } from './utils';
+
+/**
+ * @internal
+ */
+export function useWarnAboutMissingStyles() {
+  React.useEffect(() => {
+    warnAboutMissingStyles();
+  }, []);
+}
--- a/software/source/clients/meet/app/components/utils.ts
+++ b/software/source/clients/meet/app/components/utils.ts
@ -0,0 +1,62 @@
+import * as React from 'react';
+import { mergeProps as mergePropsReactAria } from './mergeProps'
+import { log } from './logger';
+import clsx from 'clsx';
+
+/** @internal */
+export function isProp<U extends HTMLElement, T extends React.HTMLAttributes<U>>(
+  prop: T | undefined,
+): prop is T {
+  return prop !== undefined;
+}
+
+/** @internal */
+export function mergeProps<
+  U extends HTMLElement,
+  T extends Array<React.HTMLAttributes<U> | undefined>,
+>(...props: T) {
+  return mergePropsReactAria(...props.filter(isProp));
+}
+
+/** @internal */
+export function cloneSingleChild(
+  children: React.ReactNode | React.ReactNode[],
+  props?: Record<string, any>,
+  key?: any,
+) {
+  return React.Children.map(children, (child) => {
+    // Checking isValidElement is the safe way and avoids a typescript
+    // error too.
+    if (React.isValidElement(child) && React.Children.only(children)) {
+      if (child.props.class) {
+        // make sure we retain classnames of both passed props and child
+        props ??= {};
+        props.class = clsx(child.props.class, props.class);
+        props.style = { ...child.props.style, ...props.style };
+      }
+      return React.cloneElement(child, { ...props, key });
+    }
+    return child;
+  });
+}
+
+/**
+ * @internal
+ */
+export function warnAboutMissingStyles(el?: HTMLElement) {
+  if (
+    typeof window !== 'undefined' &&
+    typeof process !== 'undefined' &&
+    // eslint-disable-next-line turbo/no-undeclared-env-vars
+    (process?.env?.NODE_ENV === 'dev' ||
+      // eslint-disable-next-line turbo/no-undeclared-env-vars
+      process?.env?.NODE_ENV === 'development')
+  ) {
+    const target = el ?? document.querySelector('.lk-room-container');
+    if (target && !getComputedStyle(target).getPropertyValue('--lk-has-imported-styles')) {
+      log.warn(
+        "It looks like you're not using the `@livekit/components-styles package`. To render the UI with the default styling, please import it in your layout or page.",
+      );
+    }
+  }
+}
--- a/software/source/clients/meet/app/custom/VideoConferenceClientImpl.tsx
+++ b/software/source/clients/meet/app/custom/VideoConferenceClientImpl.tsx
@ -0,0 +1,79 @@
+'use client';
+
+import { formatChatMessageLinks, LiveKitRoom } from '@livekit/components-react';
+import {
+  ExternalE2EEKeyProvider,
+  LogLevel,
+  Room,
+  RoomConnectOptions,
+  RoomOptions,
+  VideoPresets,
+  type VideoCodec,
+} from 'livekit-client';
+import { DebugMode } from '@/lib/Debug';
+import { useMemo } from 'react';
+import { decodePassphrase } from '@/lib/client-utils';
+import { SettingsMenu } from '@/lib/SettingsMenu';
+import { VideoConference } from '../components/VideoConference'
+
+export function VideoConferenceClientImpl(props: {
+  liveKitUrl: string;
+  token: string;
+  codec: VideoCodec | undefined;
+}) {
+  const worker =
+    typeof window !== 'undefined' &&
+    new Worker(new URL('livekit-client/e2ee-worker', import.meta.url));
+  const keyProvider = new ExternalE2EEKeyProvider();
+
+  const e2eePassphrase =
+    typeof window !== 'undefined' ? decodePassphrase(window.location.hash.substring(1)) : undefined;
+  const e2eeEnabled = !!(e2eePassphrase && worker);
+  const roomOptions = useMemo((): RoomOptions => {
+    return {
+      publishDefaults: {
+        videoSimulcastLayers: [VideoPresets.h540, VideoPresets.h216],
+        red: !e2eeEnabled,
+        videoCodec: props.codec,
+      },
+      adaptiveStream: { pixelDensity: 'screen' },
+      dynacast: true,
+      e2ee: e2eeEnabled
+        ? {
+            keyProvider,
+            worker,
+          }
+        : undefined,
+    };
+  }, []);
+
+  const room = useMemo(() => new Room(roomOptions), []);
+  if (e2eeEnabled) {
+    keyProvider.setKey(e2eePassphrase);
+    room.setE2EEEnabled(true);
+  }
+  const connectOptions = useMemo((): RoomConnectOptions => {
+    return {
+      autoSubscribe: true,
+    };
+  }, []);
+
+  return (
+    <LiveKitRoom
+      room={room}
+      token={props.token}
+      connectOptions={connectOptions}
+      serverUrl={props.liveKitUrl}
+      audio={true}
+      video={true}
+    >
+      <VideoConference
+        chatMessageFormatter={formatChatMessageLinks}
+        SettingsComponent={
+          process.env.NEXT_PUBLIC_SHOW_SETTINGS_MENU === 'true' ? SettingsMenu : undefined
+        }
+      />
+      <DebugMode logLevel={LogLevel.debug} />
+    </LiveKitRoom>
+  );
+}
--- a/software/source/clients/meet/app/custom/page.tsx
+++ b/software/source/clients/meet/app/custom/page.tsx
@ -0,0 +1,28 @@
+import { videoCodecs } from 'livekit-client';
+import { VideoConferenceClientImpl } from './VideoConferenceClientImpl';
+import { isVideoCodec } from '@/lib/types';
+
+export default function CustomRoomConnection(props: {
+  searchParams: {
+    liveKitUrl?: string;
+    token?: string;
+    codec?: string;
+  };
+}) {
+  const { liveKitUrl, token, codec } = props.searchParams;
+  if (typeof liveKitUrl !== 'string') {
+    return <h2>Missing LiveKit URL</h2>;
+  }
+  if (typeof token !== 'string') {
+    return <h2>Missing LiveKit token</h2>;
+  }
+  if (codec !== undefined && !isVideoCodec(codec)) {
+    return <h2>Invalid codec, if defined it has to be [{videoCodecs.join(', ')}].</h2>;
+  }
+
+  return (
+    <main data-lk-theme="default" style={{ height: '100%' }}>
+      <VideoConferenceClientImpl liveKitUrl={liveKitUrl} token={token} codec={codec} />
+    </main>
+  );
+}
--- a/software/source/clients/meet/app/layout.tsx
+++ b/software/source/clients/meet/app/layout.tsx
@ -0,0 +1,56 @@
+import '../styles/globals.css';
+import '@livekit/components-styles';
+import '@livekit/components-styles/prefabs';
+import type { Metadata, Viewport } from 'next';
+
+export const metadata: Metadata = {
+  title: {
+    default: 'LiveKit Meet | Conference app build with LiveKit open source',
+    template: '%s',
+  },
+  description:
+    'LiveKit is an open source WebRTC project that gives you everything needed to build scalable and real-time audio and/or video experiences in your applications.',
+  twitter: {
+    creator: '@livekitted',
+    site: '@livekitted',
+    card: 'summary_large_image',
+  },
+  openGraph: {
+    url: 'https://meet.livekit.io',
+    images: [
+      {
+        url: 'https://meet.livekit.io/images/livekit-meet-open-graph.png',
+        width: 2000,
+        height: 1000,
+        type: 'image/png',
+      },
+    ],
+    siteName: 'LiveKit Meet',
+  },
+  icons: {
+    icon: {
+      rel: 'icon',
+      url: '/favicon.ico',
+    },
+    apple: [
+      {
+        rel: 'apple-touch-icon',
+        url: '/images/livekit-apple-touch.png',
+        sizes: '180x180',
+      },
+      { rel: 'mask-icon', url: '/images/livekit-safari-pinned-tab.svg', color: '#070707' },
+    ],
+  },
+};
+
+export const viewport: Viewport = {
+  themeColor: '#070707',
+};
+
+export default function RootLayout({ children }: { children: React.ReactNode }) {
+  return (
+    <html lang="en">
+      <body>{children}</body>
+    </html>
+  );
+}
--- a/software/source/clients/meet/app/page.tsx
+++ b/software/source/clients/meet/app/page.tsx
@ -0,0 +1,201 @@
+'use client';
+
+import { useRouter, useSearchParams } from 'next/navigation';
+import React, { Suspense, useState } from 'react';
+import { encodePassphrase, generateRoomId, randomString } from '@/lib/client-utils';
+import styles from '../styles/Home.module.css';
+
+function Tabs(props: React.PropsWithChildren<{}>) {
+  const searchParams = useSearchParams();
+  const tabIndex = searchParams?.get('tab') === 'custom' ? 1 : 0;
+
+  const router = useRouter();
+  function onTabSelected(index: number) {
+    const tab = index === 1 ? 'custom' : 'demo';
+    router.push(`/?tab=${tab}`);
+  }
+
+  let tabs = React.Children.map(props.children, (child, index) => {
+    return (
+      <button
+        className="lk-button"
+        onClick={() => {
+          if (onTabSelected) {
+            onTabSelected(index);
+          }
+        }}
+        aria-pressed={tabIndex === index}
+      >
+        {/* @ts-ignore */}
+        {child?.props.label}
+      </button>
+    );
+  });
+
+  return (
+    <div className={styles.tabContainer}>
+      <div className={styles.tabSelect}>{tabs}</div>
+      {/* @ts-ignore */}
+      {props.children[tabIndex]}
+    </div>
+  );
+}
+
+function DemoMeetingTab(props: { label: string }) {
+  const router = useRouter();
+  const [e2ee, setE2ee] = useState(false);
+  const [sharedPassphrase, setSharedPassphrase] = useState(randomString(64));
+  const startMeeting = () => {
+    if (e2ee) {
+      router.push(`/rooms/${generateRoomId()}#${encodePassphrase(sharedPassphrase)}`);
+    } else {
+      router.push(`/rooms/${generateRoomId()}`);
+    }
+  };
+  return (
+    <div className={styles.tabContent}>
+      <p style={{ margin: 0 }}>Try LiveKit Meet for free with our live demo project.</p>
+      <button style={{ marginTop: '1rem' }} className="lk-button" onClick={startMeeting}>
+        Start Meeting
+      </button>
+      <div style={{ display: 'flex', flexDirection: 'column', gap: '1rem' }}>
+        <div style={{ display: 'flex', flexDirection: 'row', gap: '1rem' }}>
+          <input
+            id="use-e2ee"
+            type="checkbox"
+            checked={e2ee}
+            onChange={(ev) => setE2ee(ev.target.checked)}
+          ></input>
+          <label htmlFor="use-e2ee">Enable end-to-end encryption</label>
+        </div>
+        {e2ee && (
+          <div style={{ display: 'flex', flexDirection: 'row', gap: '1rem' }}>
+            <label htmlFor="passphrase">Passphrase</label>
+            <input
+              id="passphrase"
+              type="password"
+              value={sharedPassphrase}
+              onChange={(ev) => setSharedPassphrase(ev.target.value)}
+            />
+          </div>
+        )}
+      </div>
+    </div>
+  );
+}
+
+function CustomConnectionTab(props: { label: string }) {
+  const router = useRouter();
+
+  const [e2ee, setE2ee] = useState(false);
+  const [sharedPassphrase, setSharedPassphrase] = useState(randomString(64));
+
+  const onSubmit: React.FormEventHandler<HTMLFormElement> = (event) => {
+    event.preventDefault();
+    const formData = new FormData(event.target as HTMLFormElement);
+    const serverUrl = formData.get('serverUrl');
+    const token = formData.get('token');
+    if (e2ee) {
+      router.push(
+        `/custom/?liveKitUrl=${serverUrl}&token=${token}#${encodePassphrase(sharedPassphrase)}`,
+      );
+    } else {
+      router.push(`/custom/?liveKitUrl=${serverUrl}&token=${token}`);
+    }
+  };
+  return (
+    <form className={styles.tabContent} onSubmit={onSubmit}>
+      <p style={{ marginTop: 0 }}>
+        Connect LiveKit Meet with a custom server using LiveKit Cloud or LiveKit Server.
+      </p>
+      <input
+        id="serverUrl"
+        name="serverUrl"
+        type="url"
+        placeholder="LiveKit Server URL: wss://*.livekit.cloud"
+        required
+      />
+      <textarea
+        id="token"
+        name="token"
+        placeholder="Token"
+        required
+        rows={5}
+        style={{ padding: '1px 2px', fontSize: 'inherit', lineHeight: 'inherit' }}
+      />
+      <div style={{ display: 'flex', flexDirection: 'column', gap: '1rem' }}>
+        <div style={{ display: 'flex', flexDirection: 'row', gap: '1rem' }}>
+          <input
+            id="use-e2ee"
+            type="checkbox"
+            checked={e2ee}
+            onChange={(ev) => setE2ee(ev.target.checked)}
+          ></input>
+          <label htmlFor="use-e2ee">Enable end-to-end encryption</label>
+        </div>
+        {e2ee && (
+          <div style={{ display: 'flex', flexDirection: 'row', gap: '1rem' }}>
+            <label htmlFor="passphrase">Passphrase</label>
+            <input
+              id="passphrase"
+              type="password"
+              value={sharedPassphrase}
+              onChange={(ev) => setSharedPassphrase(ev.target.value)}
+            />
+          </div>
+        )}
+      </div>
+
+      <hr
+        style={{ width: '100%', borderColor: 'rgba(255, 255, 255, 0.15)', marginBlock: '1rem' }}
+      />
+      <button
+        style={{ paddingInline: '1.25rem', width: '100%' }}
+        className="lk-button"
+        type="submit"
+      >
+        Connect
+      </button>
+    </form>
+  );
+}
+
+export default function Page() {
+  return (
+    <>
+      <main className={styles.main} data-lk-theme="default">
+        <div className="header">
+          <img src="/images/livekit-meet-home.svg" alt="LiveKit Meet" width="360" height="45" />
+          <h2>
+            Open source video conferencing app built on{' '}
+            <a href="https://github.com/livekit/components-js?ref=meet" rel="noopener">
+              LiveKit&nbsp;Components
+            </a>
+            ,{' '}
+            <a href="https://livekit.io/cloud?ref=meet" rel="noopener">
+              LiveKit&nbsp;Cloud
+            </a>{' '}
+            and Next.js.
+          </h2>
+        </div>
+        <Suspense fallback="Loading">
+          <Tabs>
+            <DemoMeetingTab label="Demo" />
+            <CustomConnectionTab label="Custom" />
+          </Tabs>
+        </Suspense>
+      </main>
+      <footer data-lk-theme="default">
+        Hosted on{' '}
+        <a href="https://livekit.io/cloud?ref=meet" rel="noopener">
+          LiveKit Cloud
+        </a>
+        . Source code on{' '}
+        <a href="https://github.com/livekit/meet?ref=meet" rel="noopener">
+          GitHub
+        </a>
+        .
+      </footer>
+    </>
+  );
+}
--- a/software/source/clients/meet/app/rooms/[roomName]/PageClientImpl.tsx
+++ b/software/source/clients/meet/app/rooms/[roomName]/PageClientImpl.tsx
@ -0,0 +1,203 @@
+'use client';
+
+import { decodePassphrase } from '@/lib/client-utils';
+import { DebugMode } from '@/lib/Debug';
+import { RecordingIndicator } from '@/lib/RecordingIndicator';
+import { SettingsMenu } from '@/lib/SettingsMenu';
+import { ConnectionDetails } from '@/lib/types';
+import {
+  formatChatMessageLinks,
+  LiveKitRoom,
+  LocalUserChoices,
+  PreJoin,
+  VideoConference,
+} from '@livekit/components-react';
+import {
+  ExternalE2EEKeyProvider,
+  RoomOptions,
+  VideoCodec,
+  VideoPresets,
+  Room,
+  DeviceUnsupportedError,
+  RoomConnectOptions,
+} from 'livekit-client';
+import { useRouter } from 'next/navigation';
+import React from 'react';
+
+const CONN_DETAILS_ENDPOINT =
+  process.env.NEXT_PUBLIC_CONN_DETAILS_ENDPOINT ?? '/api/connection-details';
+const SHOW_SETTINGS_MENU = process.env.NEXT_PUBLIC_SHOW_SETTINGS_MENU == 'true';
+
+export function PageClientImpl(props: {
+  roomName: string;
+  region?: string;
+  hq: boolean;
+  codec: VideoCodec;
+}) {
+  const [preJoinChoices, setPreJoinChoices] = React.useState<LocalUserChoices | undefined>(
+    undefined,
+  );
+  const preJoinDefaults = React.useMemo(() => {
+    return {
+      username: '',
+      videoEnabled: true,
+      audioEnabled: true,
+    };
+  }, []);
+  const [connectionDetails, setConnectionDetails] = React.useState<ConnectionDetails | undefined>(
+    undefined,
+  );
+
+  const handlePreJoinSubmit = React.useCallback(async (values: LocalUserChoices) => {
+    setPreJoinChoices(values);
+    const url = new URL(CONN_DETAILS_ENDPOINT, window.location.origin);
+    url.searchParams.append('roomName', props.roomName);
+    url.searchParams.append('participantName', values.username);
+    if (props.region) {
+      url.searchParams.append('region', props.region);
+    }
+    const connectionDetailsResp = await fetch(url.toString());
+    const connectionDetailsData = await connectionDetailsResp.json();
+    setConnectionDetails(connectionDetailsData);
+  }, []);
+  const handlePreJoinError = React.useCallback((e: any) => console.error(e), []);
+
+  return (
+    <main data-lk-theme="default" style={{ height: '100%' }}>
+      {connectionDetails === undefined || preJoinChoices === undefined ? (
+        <div style={{ display: 'grid', placeItems: 'center', height: '100%' }}>
+          <PreJoin
+            defaults={preJoinDefaults}
+            onSubmit={handlePreJoinSubmit}
+            onError={handlePreJoinError}
+          />
+        </div>
+      ) : (
+        <VideoConferenceComponent
+          connectionDetails={connectionDetails}
+          userChoices={preJoinChoices}
+          options={{ codec: props.codec, hq: props.hq }}
+        />
+      )}
+    </main>
+  );
+}
+
+function VideoConferenceComponent(props: {
+  userChoices: LocalUserChoices;
+  connectionDetails: ConnectionDetails;
+  options: {
+    hq: boolean;
+    codec: VideoCodec;
+  };
+}) {
+  const e2eePassphrase =
+    typeof window !== 'undefined' && decodePassphrase(location.hash.substring(1));
+
+  const worker =
+    typeof window !== 'undefined' &&
+    e2eePassphrase &&
+    new Worker(new URL('livekit-client/e2ee-worker', import.meta.url));
+  const e2eeEnabled = !!(e2eePassphrase && worker);
+  const keyProvider = new ExternalE2EEKeyProvider();
+  const [e2eeSetupComplete, setE2eeSetupComplete] = React.useState(false);
+
+  const roomOptions = React.useMemo((): RoomOptions => {
+    let videoCodec: VideoCodec | undefined = props.options.codec ? props.options.codec : 'vp9';
+    if (e2eeEnabled && (videoCodec === 'av1' || videoCodec === 'vp9')) {
+      videoCodec = undefined;
+    }
+    return {
+      videoCaptureDefaults: {
+        deviceId: props.userChoices.videoDeviceId ?? undefined,
+        resolution: props.options.hq ? VideoPresets.h2160 : VideoPresets.h720,
+      },
+      publishDefaults: {
+        dtx: false,
+        videoSimulcastLayers: props.options.hq
+          ? [VideoPresets.h1080, VideoPresets.h720]
+          : [VideoPresets.h540, VideoPresets.h216],
+        red: !e2eeEnabled,
+        videoCodec,
+      },
+      audioCaptureDefaults: {
+        deviceId: props.userChoices.audioDeviceId ?? undefined,
+      },
+      adaptiveStream: { pixelDensity: 'screen' },
+      dynacast: true,
+      e2ee: e2eeEnabled
+        ? {
+            keyProvider,
+            worker,
+          }
+        : undefined,
+    };
+  }, [props.userChoices, props.options.hq, props.options.codec]);
+
+  const room = React.useMemo(() => new Room(roomOptions), []);
+
+  React.useEffect(() => {
+    if (e2eeEnabled) {
+      keyProvider
+        .setKey(decodePassphrase(e2eePassphrase))
+        .then(() => {
+          room.setE2EEEnabled(true).catch((e) => {
+            if (e instanceof DeviceUnsupportedError) {
+              alert(
+                `You're trying to join an encrypted meeting, but your browser does not support it. Please update it to the latest version and try again.`,
+              );
+              console.error(e);
+            } else {
+              throw e;
+            }
+          });
+        })
+        .then(() => setE2eeSetupComplete(true));
+    } else {
+      setE2eeSetupComplete(true);
+    }
+  }, [e2eeEnabled, room, e2eePassphrase]);
+
+  const connectOptions = React.useMemo((): RoomConnectOptions => {
+    return {
+      autoSubscribe: true,
+    };
+  }, []);
+
+  const router = useRouter();
+  const handleOnLeave = React.useCallback(() => router.push('/'), [router]);
+  const handleError = React.useCallback((error: Error) => {
+    console.error(error);
+    alert(`Encountered an unexpected error, check the console logs for details: ${error.message}`);
+  }, []);
+  const handleEncryptionError = React.useCallback((error: Error) => {
+    console.error(error);
+    alert(
+      `Encountered an unexpected encryption error, check the console logs for details: ${error.message}`,
+    );
+  }, []);
+
+  return (
+    <>
+      <LiveKitRoom
+        connect={e2eeSetupComplete}
+        room={room}
+        token={props.connectionDetails.participantToken}
+        serverUrl={props.connectionDetails.serverUrl}
+        connectOptions={connectOptions}
+        video={props.userChoices.videoEnabled}
+        audio={props.userChoices.audioEnabled}
+        onDisconnected={handleOnLeave}
+        onEncryptionError={handleEncryptionError}
+        onError={handleError}
+      >
+        <VideoConference
+          chatMessageFormatter={formatChatMessageLinks}
+          SettingsComponent={SHOW_SETTINGS_MENU ? SettingsMenu : undefined}
+        />
+        <DebugMode />
+        <RecordingIndicator />
+      </LiveKitRoom>
+    </>
+  );
+}
--- a/software/source/clients/meet/app/rooms/[roomName]/page.tsx
+++ b/software/source/clients/meet/app/rooms/[roomName]/page.tsx
@ -0,0 +1,26 @@
+import * as React from 'react';
+import { PageClientImpl } from './PageClientImpl';
+import { isVideoCodec } from '@/lib/types';
+
+export default function Page({
+  params,
+  searchParams,
+}: {
+  params: { roomName: string };
+  searchParams: {
+    // FIXME: We should not allow values for regions if in playground mode.
+    region?: string;
+    hq?: string;
+    codec?: string;
+  };
+}) {
+  const codec =
+    typeof searchParams.codec === 'string' && isVideoCodec(searchParams.codec)
+      ? searchParams.codec
+      : 'vp9';
+  const hq = searchParams.hq === 'true' ? true : false;
+
+  return (
+    <PageClientImpl roomName={params.roomName} region={searchParams.region} hq={hq} codec={codec} />
+  );
+}
--- a/software/source/clients/meet/next-env.d.ts
+++ b/software/source/clients/meet/next-env.d.ts
@ -0,0 +1,5 @@
+/// <reference types="next" />
+/// <reference types="next/image-types/global" />
+
+// NOTE: This file should not be edited
+// see https://nextjs.org/docs/app/building-your-application/configuring/typescript for more information.
--- a/software/source/clients/meet/next.config.js
+++ b/software/source/clients/meet/next.config.js
@ -0,0 +1,16 @@
+/** @type {import('next').NextConfig} */
+const nextConfig = {
+  reactStrictMode: false,
+  productionBrowserSourceMaps: true,
+  webpack: (config, { buildId, dev, isServer, defaultLoaders, nextRuntime, webpack }) => {
+    // Important: return the modified config
+    config.module.rules.push({
+      test: /\.mjs$/,
+      enforce: 'pre',
+      use: ['source-map-loader'],
+    });
+    return config;
+  },
+};
+
+module.exports = nextConfig;
--- a/software/source/clients/meet/package.json
+++ b/software/source/clients/meet/package.json
@ -0,0 +1,37 @@
+{
+  "name": "livekit-meet",
+  "version": "0.2.0",
+  "private": true,
+  "scripts": {
+    "dev": "next dev",
+    "build": "next build",
+    "start": "next start",
+    "lint": "next lint"
+  },
+  "dependencies": {
+    "@datadog/browser-logs": "^5.23.3",
+    "@livekit/components-react": "2.6.9",
+    "@livekit/components-styles": "1.1.4",
+    "@livekit/krisp-noise-filter": "0.2.13",
+    "clsx": "^2.1.1",
+    "livekit-client": "2.7.3",
+    "livekit-server-sdk": "2.9.3",
+    "loglevel": "^1.9.2",
+    "next": "14.2.18",
+    "react": "18.3.1",
+    "react-dom": "18.3.1",
+    "tinykeys": "^3.0.0"
+  },
+  "devDependencies": {
+    "@types/node": "22.9.0",
+    "@types/react": "18.3.12",
+    "@types/react-dom": "18.3.1",
+    "eslint": "9.14.0",
+    "eslint-config-next": "14.2.18",
+    "source-map-loader": "^5.0.0",
+    "typescript": "5.6.3"
+  },
+  "engines": {
+    "node": ">=18"
+  }
+}
--- a/software/source/clients/meet/pnpm-lock.yaml
+++ b/software/source/clients/meet/pnpm-lock.yaml
--- a/software/source/clients/meet/public/favicon.ico
+++ b/software/source/clients/meet/public/favicon.ico
--- a/software/source/clients/meet/public/images/livekit-apple-touch.png
+++ b/software/source/clients/meet/public/images/livekit-apple-touch.png
--- a/software/source/clients/meet/public/images/livekit-meet-home.svg
+++ b/software/source/clients/meet/public/images/livekit-meet-home.svg
@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="961" height="121" fill="none"><path fill="#fff" d="M20.2 0H0v118.1h73.4V101H20.2V0ZM106.9 53.8H87.2V118H107V53.8ZM164.6 115.8l-25-81.5H120l26.2 83.8H183l26.2-83.8h-19.8l-24.8 81.5ZM257.8 32.5c-25.4 0-41.6 18-41.6 43.7 0 25.5 15.7 43.8 41.6 43.8 19.8 0 34-8.7 39.4-26.6h-20c-3 8.1-8.4 13-19.2 13-12 0-20.3-8.3-21.9-24.5h62.5c.3-2 .5-4.1.5-6.2 0-26.1-16.3-43.2-41.3-43.2Zm-21.5 35.9c2-15 10-22.2 21.5-22.2 12.1 0 20.3 8.8 21.2 22.2h-42.7ZM413.8 0h-25.5l-49.2 54V0h-20.3v118.1h20.3V58.4l54.3 59.7h25.9L362.5 56l51.3-56ZM447.7 34.3H428v64.4h19.7V34.3ZM87.2 34.3H67.6v19.5h19.6V34.3ZM467.3 98.7h-19.6v19.4h19.6V98.7ZM525.9 98.7h-19.6v19.4h19.6V98.7ZM525.9 53.8V34.3h-19.6V0h-19.7v34.3H467v19.5h19.6v44.9h19.7v-45h19.6Z"/><path fill="#FF6352" d="M589.8 119V.4h-10.7V119h10.7Zm53.9 0L602.3.4H591L632.4 119h11.3Zm12.3 0L697.3.4h-11.2L644.7 119H656Zm53.2 0V.4h-10.6V119h10.6Zm99.4-42.9c0-25.6-16.4-41.8-38.4-41.8-23 0-38.7 17.5-38.7 43.2 0 25.9 15.6 43.2 39.2 43.2 18 0 31.3-8.4 36.2-26h-10.6c-3.6 11.4-11.7 18.2-25.6 18.2-16.4 0-27.8-11.8-28.7-32.7h66.3c.1-1.8.3-2.7.3-4.1Zm-38.4-34c16 0 26.8 12.8 27.8 30.1H742c1.7-18.9 12.4-30.1 28.1-30.1Zm130.4 34c0-25.6-16.4-41.8-38.4-41.8-23 0-38.7 17.5-38.7 43.2 0 25.9 15.6 43.2 39.2 43.2 18 0 31.3-8.4 36.2-26h-10.6c-3.6 11.4-11.7 18.2-25.6 18.2-16.4 0-27.8-11.8-28.7-32.7h66.3c.1-1.8.3-2.7.3-4.1Zm-38.4-34c16 0 26.9 12.8 27.8 30.1H834c1.8-18.9 12.4-30.1 28.1-30.1Zm88.3 69c-8.7 0-13.5-3.5-13.5-13.2V44h22.9v-8h-23V16.4h-10.2V36H908v8h18.6v53.9c0 14.4 9.3 21.1 22.9 21.1H960v-8h-9.5Z"/></svg>
--- a/software/source/clients/meet/public/images/livekit-meet-open-graph.png
+++ b/software/source/clients/meet/public/images/livekit-meet-open-graph.png
--- a/software/source/clients/meet/public/images/livekit-safari-pinned-tab.svg
+++ b/software/source/clients/meet/public/images/livekit-safari-pinned-tab.svg
@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" fill="none"><path fill="#000" fill-rule="evenodd" d="M0 0h512v512H0zm288 224h-64v64h64v64H160V96H96v320h192v-64h64v64h64v-64h-64v-64h-64v-64h64v-64h64V96h-64v64h-64z" clip-rule="evenodd"/></svg>
--- a/software/source/clients/meet/renovate.json
+++ b/software/source/clients/meet/renovate.json
@ -0,0 +1,17 @@
+{
+  "extends": ["config:base"],
+  "packageRules": [
+    {
+      "schedule": "before 6am on the first day of the month",
+      "matchDepTypes": ["devDependencies"],
+      "matchUpdateTypes": ["patch", "minor"],
+      "groupName": "devDependencies (non-major)"
+    },
+    {
+      "matchSourceUrlPrefixes": ["https://github.com/livekit/"],
+      "rangeStrategy": "replace",
+      "groupName": "LiveKit dependencies (non-major)",
+      "automerge": true
+    }
+  ]
+}
--- a/software/source/clients/meet/styles/Debug.module.css
+++ b/software/source/clients/meet/styles/Debug.module.css
@ -0,0 +1,16 @@
+.overlay {
+  position: absolute;
+  top: 0;
+  background: rgba(0, 0, 0, 0.6);
+  padding: 1rem;
+  max-height: min(100%, 100vh);
+  overflow-y: auto;
+}
+
+.detailsSection {
+  padding-left: 1rem;
+}
+
+.detailsSection > div {
+  padding-left: 1rem;
+}
--- a/software/source/clients/meet/styles/Home.module.css
+++ b/software/source/clients/meet/styles/Home.module.css
@ -0,0 +1,40 @@
+.main {
+  position: relative;
+  display: grid;
+  gap: 1rem;
+  justify-content: center;
+  place-content: center;
+  justify-items: center;
+  overflow: auto;
+  flex-grow: 1;
+}
+
+.tabContainer {
+  width: 100%;
+  max-width: 500px;
+  padding-inline: 2rem;
+}
+
+.tabSelect {
+  display: flex;
+  justify-content: stretch;
+  gap: 0.125rem;
+  padding: 0.125rem;
+  margin: 0 auto 1.5rem;
+  border: 1px solid rgba(255, 255, 255, 0.15);
+  border-radius: 0.5rem;
+}
+
+.tabSelect > * {
+  width: 100%;
+}
+
+.tabContent {
+  display: flex;
+  justify-content: center;
+  flex-direction: column;
+  gap: 0.75rem;
+  padding: 1.5rem;
+  border: 1px solid rgba(255, 255, 255, 0.15);
+  border-radius: 0.5rem;
+}
--- a/software/source/clients/meet/styles/SettingsMenu.module.css
+++ b/software/source/clients/meet/styles/SettingsMenu.module.css
@ -0,0 +1,23 @@
+.settingsCloseButton {
+  position: absolute;
+  right: var(--lk-grid-gap);
+  bottom: var(--lk-grid-gap);
+}
+
+.tabs {
+  position: relative;
+  display: flex;
+  align-content: space-between;
+}
+
+.tabs > .tab {
+  padding: 0.5rem;
+  border-radius: 0;
+  padding-bottom: 0.5rem;
+  border-bottom: 3px solid;
+  border-color: var(--bg5);
+}
+
+.tabs > .tab[aria-pressed='true'] {
+  border-color: var(--lk-accent-bg);
+}
--- a/software/source/clients/meet/styles/globals.css
+++ b/software/source/clients/meet/styles/globals.css
@ -0,0 +1,67 @@
+* {
+  box-sizing: border-box;
+}
+
+html {
+  color-scheme: dark;
+  background-color: #111;
+}
+
+html,
+body {
+  overflow: hidden;
+  width: 100%;
+  height: 100%;
+  margin: 0px;
+}
+
+body {
+  display: flex;
+  flex-direction: column;
+}
+
+.header {
+  max-width: 500px;
+  padding-inline: 2rem;
+}
+
+.header > img {
+  display: block;
+  margin: auto;
+  max-width: 100%;
+}
+
+.header > h2 {
+  font-family: 'TWK Everett', sans-serif;
+  font-style: normal;
+  font-weight: 400;
+  font-size: 1.25rem;
+  line-height: 144%;
+  text-align: center;
+  color: rgba(255, 255, 255, 0.6);
+}
+
+footer {
+  width: 100%;
+  padding: 1.5rem 2rem;
+  text-align: center;
+  color: rgba(255, 255, 255, 0.6);
+  background-color: var(--lk-bg);
+  border-top: 1px solid rgba(255, 255, 255, 0.15);
+}
+
+footer a,
+h2 a {
+  color: #ff6352;
+  text-decoration-color: #a33529;
+  text-underline-offset: 0.125em;
+}
+
+footer a:hover,
+h2 a {
+  text-decoration-color: #ff6352;
+}
+
+h2 a {
+  text-decoration: none;
+}
--- a/software/source/clients/meet/tsconfig.json
+++ b/software/source/clients/meet/tsconfig.json
@ -0,0 +1,29 @@
+{
+  "compilerOptions": {
+    "target": "ES2020",
+    "lib": ["dom", "dom.iterable", "ES2020"],
+    "allowJs": true,
+    "skipLibCheck": true,
+    "strict": true,
+    "forceConsistentCasingInFileNames": true,
+    "noEmit": true,
+    "esModuleInterop": true,
+    "module": "ES2020",
+    "moduleResolution": "Bundler",
+    "resolveJsonModule": true,
+    "isolatedModules": true,
+    "jsx": "preserve",
+    "incremental": true,
+    "sourceMap": true,
+    "plugins": [
+      {
+        "name": "next"
+      }
+    ],
+    "paths": {
+      "@/*": ["./*"]
+    }
+  },
+  "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
+  "exclude": ["node_modules"]
+}
--- a/software/source/server/livekit/init.py
+++ b/software/source/server/livekit/init.py
--- a/software/source/server/livekit/anticipation.py
+++ b/software/source/server/livekit/anticipation.py
@ -0,0 +1,182 @@
+from typing import Any, Dict
+import json
+import base64
+import traceback
+import io
+import re
+from PIL import Image as PIL_Image
+
+from openai import OpenAI
+from livekit.agents.llm import ChatContext
+from livekit import rtc
+from livekit.agents.pipeline import VoicePipelineAgent
+from livekit.agents.llm.chat_context import ChatContext
+from source.server.livekit.logger import log_message
+from livekit.agents.llm.chat_context import ChatImage
+
+
+# Add these constants after the existing ones
+INSTRUCTIONS_PROMPT = """Given the conversation context and the current video frame, evaluate if any instructions have been violated.
+Rate the severity of violation from 0-10, where 10 is most severe.
+
+Instructions to check:
+1. Ensure that there is no one in the frame.
+
+"""
+
+RESPONSE_FORMAT = """
+    Respond in the following JSON format:
+    {
+        "violation_detected": boolean,
+        "severity_rating": number,
+        "violation_summary": string,
+        "recommendations": string
+    }
+"""
+
+
+# Add this function to handle safety check callbacks
+async def handle_instruction_check(
+    assistant: VoicePipelineAgent,
+    video_frame: rtc.VideoFrame,
+):
+    """Handle safety check callback from video processor"""
+    log_message("Starting instruction check process...")
+    
+    try:
+        log_message("Calling check_instruction_violation...")
+        result = await check_instruction_violation(
+            chat_ctx=assistant.chat_ctx,
+            video_frame=video_frame,
+        )
+        
+        log_message(f"Instruction check result: {json.dumps(result, indent=2)}")
+        
+        if result["violation_detected"] and result["severity_rating"] >= 7:
+            log_message(f"Violation detected with severity {result['severity_rating']}, triggering assistant response")
+            
+            # Append violation to chat context
+            violation_text = f"Instruction violation frame detected: {result['violation_summary']}\nRecommendations: {result['recommendations']}"
+            assistant.chat_ctx.append(
+                role="user",
+                text=violation_text
+            )
+
+            assistant.chat_ctx.append(
+                role="user",
+                images=[
+                    ChatImage(image=video_frame)
+                ]
+            )
+            log_message(f"Added violation to chat context: {violation_text}")
+
+
+            log_message(f"Current chat context: {assistant.chat_ctx}")
+            
+            # Trigger assistant response
+            log_message(f"Triggering assistant response...")
+
+            # TODO: instead of saying the predetermined response, we'll trigger an assistant response here
+            # we can append the current video frame that triggered the violation to the chat context
+            # NOTE: this currently produces an unexpected connection error:
+            # httpcore.ConnectError: All connection attempts failed
+
+            # stream = assistant.llm.chat(
+            #     chat_ctx=assistant.chat_ctx,
+            #     fnc_ctx=assistant.fnc_ctx,
+            # )
+
+             # we temporarily default back to saying the predetermined response
+            await assistant.say(violation_text)
+        else:
+            log_message("No significant violations detected or severity below threshold")
+    except Exception as e:
+        log_message(f"Error in handle_instruction_check: {str(e)}")
+        log_message(f"Error traceback: {traceback.format_exc()}")
+
+
+# Add this function to handle safety check callbacks
+async def check_instruction_violation(
+    chat_ctx: ChatContext,
+    video_frame: rtc.VideoFrame,
+) -> Dict[str, Any]:
+    """Makes a call to gpt-4o-mini to check for instruction violations"""
+    log_message("Creating new context for instruction check...")
+    
+    try:
+        client = OpenAI()
+        
+        try:
+            # Get raw RGBA data
+            frame_data = video_frame.data.tobytes()
+            
+            # Create PIL Image from RGBA data
+            image = PIL_Image.frombytes('RGBA', (video_frame.width, video_frame.height), frame_data)
+            
+            # Convert RGBA to RGB
+            rgb_image = image.convert('RGB')
+            
+            # Save as JPEG
+            buffer = io.BytesIO()
+            rgb_image.save(buffer, format='JPEG')
+            jpeg_bytes = buffer.getvalue()
+            
+            log_message(f"Got frame data, size: {len(jpeg_bytes)} bytes")
+            base64_image = base64.b64encode(jpeg_bytes).decode("utf-8")
+            log_message("Successfully encoded frame to base64")
+        except Exception as e:
+            log_message(f"Error encoding frame: {str(e)}")
+            raise
+
+        # Get the response
+        log_message("Making call to LLM for instruction check...")
+        try:
+            response = client.chat.completions.create(
+                model="gpt-4o-mini", 
+                messages=[
+                    # TODO: append chat context to prompt without images -- we'll need to parse them out 
+                    {
+                        "role": "user", 
+                        "content": [
+                            {"type": "text", "text": INSTRUCTIONS_PROMPT + RESPONSE_FORMAT},
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": f"data:image/jpeg;base64,{base64_image}",
+                                },
+                            },
+                        ],
+                    }
+                ],
+                max_tokens=300,
+            )
+            log_message(f"Raw LLM response: {response}")
+        except Exception as e:
+            log_message(f"Error making LLM call: {str(e)}")
+            raise
+        
+        try:
+            # Parse the response content
+            # Clean up the LLM response if it includes ```json ... ```
+            content = response.choices[0].message.content.strip()
+            content = re.sub(r'^```(?:json)?', '', content)  # remove leading triple backticks and optional 'json'
+            content = re.sub(r'```$', '', content).strip()   # remove trailing triple backticks
+            result = json.loads(content)
+            
+            log_message(f"Successfully parsed LLM response: {json.dumps(result, indent=2)}")
+            return result
+        except Exception as e:
+            log_message(f"Error parsing LLM response: {str(e)}")
+            raise
+
+    except Exception as e:
+        log_message(f"Failed to process instruction check: {str(e)}")
+        log_message(f"Error traceback: {traceback.format_exc()}")
+        default_response = {
+            "violation_detected": False,
+            "severity_rating": 0,
+            "violation_summary": f"Error processing instruction check: {str(e)}",
+            "recommendations": "None"
+        }
+        log_message(f"Returning default response: {json.dumps(default_response, indent=2)}")
+        return default_response
--- a/software/source/server/livekit/logger.py
+++ b/software/source/server/livekit/logger.py
@ -0,0 +1,14 @@
+import os
+from datetime import datetime
+
+# Define the path to the log file
+LOG_FILE_PATH = 'worker.txt'
+DEBUG = os.getenv('DEBUG', 'false').lower() == 'true'
+
+def log_message(message: str):
+    """Append a message to the log file with a timestamp."""
+    if not DEBUG:
+        return
+    timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+    with open(LOG_FILE_PATH, 'a') as log_file:
+        log_file.write(f"{timestamp} - {message}\n")
--- a/software/source/server/livekit/multimodal.py
+++ b/software/source/server/livekit/multimodal.py
@ -103,6 +103,9 @@ async def entrypoint(ctx: JobContext):
        fnc_ctx=fnc_ctx,
    )

+    # Send a message to the client to switch to multimodal mode
+    await ctx.room.local_participant.publish_data(payload="{MULTIMODAL_MODE}", topic="agent_mode")
+
    # Initial message to start the interaction
    session.conversation.item.create(
      llm.ChatMessage(
--- a/software/source/server/livekit/video_processor.py
+++ b/software/source/server/livekit/video_processor.py
@ -0,0 +1,93 @@
+from livekit.rtc import VideoStream, VideoFrame, VideoBufferType
+from livekit.agents import JobContext
+from datetime import datetime
+import os
+import asyncio
+from typing import Callable, Coroutine, Any
+
+
+# Interval settings
+INTERVAL = 30  # seconds
+
+# Define the path to the log file
+LOG_FILE_PATH = 'video_processor.txt'
+DEBUG = os.getenv('DEBUG', 'false').lower() == 'true'
+
+def log_message(message: str):
+    """Append a message to the log file with a timestamp."""
+    if not DEBUG:
+        return
+    
+    timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+    with open(LOG_FILE_PATH, 'a') as log_file:
+        log_file.write(f"{timestamp} - {message}\n")
+
+class RemoteVideoProcessor:
+    def __init__(self, video_stream: VideoStream, job_ctx: JobContext):
+        log_message("Initializing RemoteVideoProcessor")
+        self.video_stream = video_stream
+        self.job_ctx = job_ctx
+        self.current_frame = None
+        self.lock = asyncio.Lock()
+        
+        self.interval = INTERVAL
+        self.video_context = False
+        self.last_capture_time = 0
+        
+        # Add callback for safety checks
+        self.on_instruction_check: Callable[[VideoFrame], Coroutine[Any, Any, None]] | None = None
+
+    async def process_frames(self):
+        """Process incoming video frames."""
+        async for frame_event in self.video_stream:
+            try:
+                video_frame = frame_event.frame
+                timestamp = frame_event.timestamp_us
+                
+                log_message(f"Processing frame at timestamp {timestamp/1000000:.3f}s")
+                log_message(f"Frame details: size={video_frame.width}x{video_frame.height}, type={video_frame.type}")
+
+                async with self.lock:
+                    self.current_frame = video_frame
+                    
+                    if self.video_context and self._check_interrupt(timestamp):
+                        self.last_capture_time = timestamp
+                        # Trigger instruction check callback if registered
+                        if self.on_instruction_check:
+                            await self.on_instruction_check(video_frame)
+
+            except Exception as e:
+                log_message(f"Error processing frame: {str(e)}")
+                import traceback
+                log_message(f"Traceback: {traceback.format_exc()}")
+
+
+    def register_safety_check_callback(self, callback: Callable[[VideoFrame], Coroutine[Any, Any, None]]):
+        """Register a callback for safety checks"""
+        self.on_instruction_check = callback
+        log_message("Registered instruction check callback")
+
+
+    async def get_current_frame(self) -> VideoFrame | None:
+        """Get the most recent video frame."""
+        log_message("Getting current frame")
+        async with self.lock:
+            if self.current_frame is None:
+                log_message("No current frame available")
+            return self.current_frame
+        
+    
+    def set_video_context(self, context: bool):
+        """Set the video context."""
+        log_message(f"Setting video context to: {context}")
+        self.video_context = context
+
+
+    def get_video_context(self) -> bool:
+        """Get the video context."""
+        return self.video_context
+
+
+    def _check_interrupt(self, timestamp: int) -> bool:
+        """Determine if the video context should be interrupted."""
+        return timestamp - self.last_capture_time > self.interval * 1000000
--- a/software/source/server/livekit/worker.py
+++ b/software/source/server/livekit/worker.py
@ -1,34 +1,53 @@
 import asyncio
-import copy
+import numpy as np
+import sys
 import os
-from livekit.agents import AutoSubscribe, JobContext, WorkerOptions, cli
-from livekit.agents.llm import ChatContext, ChatMessage
+from datetime import datetime
+from typing import Literal, Awaitable
+
+from livekit.agents import JobContext, WorkerOptions, cli, transcription
+from livekit.agents.transcription import STTSegmentsForwarder
+from livekit.agents.llm import ChatContext
 from livekit import rtc
-from livekit.agents.voice_assistant import VoiceAssistant
+from livekit.agents.pipeline import VoicePipelineAgent
 from livekit.plugins import deepgram, openai, silero, elevenlabs, cartesia
-from dotenv import load_dotenv
-import sys
-import numpy as np
+from livekit.agents.llm.chat_context import ChatContext, ChatImage, ChatMessage
+from livekit.agents.llm import LLMStream
+from livekit.agents.stt import SpeechStream

+from source.server.livekit.video_processor import RemoteVideoProcessor
+from source.server.livekit.anticipation import handle_instruction_check
+from source.server.livekit.logger import log_message
+
+from dotenv import load_dotenv
 load_dotenv()

-start_message = """Hi! You can hold the white circle below to speak to me.

-Try asking what I can do."""
+
+START_MESSAGE = "Hi! You can hold the white circle below to speak to me. Try asking what I can do."

 # This function is the entrypoint for the agent.
 async def entrypoint(ctx: JobContext):
    # Create an initial chat context with a system prompt
-    initial_ctx = ChatContext().append(
+    initial_chat_ctx = ChatContext().append(
        role="system",
        text=(
-            "" # Open Interpreter handles this.
+            "Only take into context the user's image if their message is relevant or pertaining to the image. Otherwise just keep in context that the image is present but do not acknowledge or mention it in your response." # Open Interpreter handles this.
        ),
    )

    # Connect to the LiveKit room
-    await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY)
+    await ctx.connect()
+
+    # Create chat manager
+    chat = rtc.ChatManager(ctx.room)
+
+    # Initialize RemoteVideoProcessor
+    remote_video_processor = None

+    ############################################################
+    # publish agent image
+    ############################################################
    # Create a black background with a white circle
    width, height = 640, 480
    image_np = np.zeros((height, width, 4), dtype=np.uint8)
@ -57,16 +76,15 @@ async def entrypoint(ctx: JobContext):
    # Start publishing the static image
    asyncio.create_task(publish_static_image())

-    # VoiceAssistant is a class that creates a full conversational AI agent.
-    # See https://github.com/livekit/agents/blob/main/livekit-agents/livekit/agents/voice_assistant/assistant.py
-    # for details on how it works.
-
+    ############################################################
+    # initialize voice agent pipeline
+    ############################################################
    interpreter_server_host = os.getenv('INTERPRETER_SERVER_HOST', 'localhost')
    interpreter_server_port = os.getenv('INTERPRETER_SERVER_PORT', '8000')
-    base_url = f"http://{interpreter_server_host}:{interpreter_server_port}/openai"
+    base_url = f"http://{interpreter_server_host}:{interpreter_server_port}/"

    # For debugging
-    # base_url = "http://127.0.0.1:8000/openai"
+    base_url = "http://127.0.0.1:8000/"

    open_interpreter = openai.LLM(
        model="open-interpreter", base_url=base_url, api_key="x"
@ -75,11 +93,19 @@ async def entrypoint(ctx: JobContext):
    tts_provider = os.getenv('01_TTS', '').lower()
    stt_provider = os.getenv('01_STT', '').lower()

+    # todo: remove this 
+    tts_provider = "elevenlabs"
+    stt_provider = "deepgram"
+
    # Add plugins here
    if tts_provider == 'openai':
        tts = openai.TTS()
+    elif tts_provider == 'local':
+        tts = openai.TTS(base_url="http://localhost:9001/v1")
+        print("using local tts")
    elif tts_provider == 'elevenlabs':
        tts = elevenlabs.TTS()
+        print("using elevenlabs tts")
    elif tts_provider == 'cartesia':
        tts = cartesia.TTS()
    else:
@ -87,52 +113,346 @@ async def entrypoint(ctx: JobContext):

    if stt_provider == 'deepgram':
        stt = deepgram.STT()
+    elif stt_provider == 'local':
+        stt = openai.STT(base_url="http://localhost:9002/v1")
+        print("using local stt")
    else:
        raise ValueError(f"Unsupported STT provider: {stt_provider}. Please set 01_STT environment variable to 'deepgram'.")

-    assistant = VoiceAssistant(
-        vad=silero.VAD.load(),  # Voice Activity Detection
-        stt=stt,  # Speech-to-Text
-        llm=open_interpreter,  # Language Model
-        tts=tts,  # Text-to-Speech
-        chat_ctx=initial_ctx,  # Chat history context
+    ############################################################
+    # initialize voice assistant states
+    ############################################################
+    push_to_talk = False
+    current_message: ChatMessage = ChatMessage(role='user')
+    submitted_message: ChatMessage = ChatMessage(role='user')
+    video_muted = False
+    video_context = False
+
+    tasks = []
+    ############################################################
+    # before_llm_cb
+    ############################################################
+    def _before_llm_cb(
+        agent: VoicePipelineAgent, 
+        chat_ctx: ChatContext
+    ) -> Awaitable[LLMStream] | Literal[False]:
+        nonlocal push_to_talk
+        nonlocal remote_video_processor
+        nonlocal current_message
+        nonlocal submitted_message
+        log_message(f"[before_llm_cb] chat_ctx before we perform any processing: {chat_ctx}")
+
+
+        if push_to_talk:
+            last_message = chat_ctx.messages[-1]
+
+            if submitted_message and isinstance(last_message.content, str):
+                log_message(f"[before_llm_cb] submitted_message: {submitted_message}")
+
+                # Find where submitted_messages ends in last_message
+                submitted_end_idx = 0 
+                while isinstance(submitted_message.content, str) and submitted_message.content[submitted_end_idx] == last_message.content[submitted_end_idx]:
+                    submitted_end_idx += 1
+                    if submitted_end_idx == len(submitted_message.content):
+                        break
+                
+                # Remove the submitted message from the accumulated messages
+                log_message(f"[before_llm_cb] submitted_end_idx: {submitted_end_idx}")
+
+                # Take messages after the submitted message
+                current_message = ChatMessage(role=last_message.role, content=last_message.content[submitted_end_idx:])
+                log_message(f"[before_llm_cb] current_message after removing submitted_message: {current_message}")
+            else:
+                current_message = ChatMessage(role=last_message.role, content=last_message.content)
+                log_message(f"[before_llm_cb] current_message after removing submitted_message: {current_message}")
+            
+            # Continue without invoking LLM immediately
+            return False  
+        
+        else: 
+            async def process_query():
+                log_message(f"[before_llm_cb] processing query in VAD with chat_ctx: {chat_ctx}")
+
+                if remote_video_processor and not video_muted:
+                    video_frame = await remote_video_processor.get_current_frame()
+
+                    if video_frame:
+                        chat_ctx.append(role="user", images=[ChatImage(image=video_frame)])
+                    else:
+                        log_message("[before_llm_cb] No video frame available")
+                    
+                return agent.llm.chat(
+                    chat_ctx=chat_ctx,
+                    fnc_ctx=agent.fnc_ctx,
                )

-    chat = rtc.ChatManager(ctx.room)
+            return process_query()
+
+    ############################################################
+    # on_message_received helper
+    ############################################################
+    async def _on_message_received(msg: str):
+        nonlocal push_to_talk
+        nonlocal remote_video_processor
+        nonlocal current_message
+        nonlocal submitted_message
+
+        if msg == "{COMPLETE}":
+            chat_ctx = assistant.chat_ctx
+            log_message(f"[on_message_received] copied chat_ctx: {chat_ctx}")
+
+            # append image if available
+            if remote_video_processor and not video_muted:
+                if remote_video_processor.get_video_context():
+                    log_message("context is true")
+                    log_message("retrieving timeline frame")
+                    video_frame = await remote_video_processor.get_timeline_frame()
+                else:   
+                    log_message("context is false")
+                    log_message("retrieving current frame")
+                    video_frame = await remote_video_processor.get_current_frame()

-    async def _answer_from_text(text: str):
-        chat_ctx = copy.deepcopy(assistant._chat_ctx)
-        chat_ctx.messages.append(ChatMessage(role="user", content=text))
+                if video_frame:
+                    chat_ctx.append(role="user", images=[ChatImage(image=video_frame)]) 
+                    log_message(f"[on_message_received] appended image: {video_frame} to chat_ctx: {chat_ctx}")

-        stream = open_interpreter.chat(chat_ctx=chat_ctx)
+            if isinstance(current_message.content, str):
+                chat_ctx.append(role=current_message.role, text=current_message.content)
+
+                # extend existing submitted_message content with the new message content 
+                if submitted_message and isinstance(submitted_message.content, str):
+                    submitted_message.content += current_message.content
+                else:
+                    submitted_message = current_message
+                log_message(f"[on_message_received] appended message: {current_message.content}")
+                log_message(f"[on_message_received] submitted_message is now {submitted_message}")
+                log_message(f"[on_message_received] chat_ctx is now {chat_ctx}")
+            elif isinstance(current_message.content, ChatImage):
+                chat_ctx.append(role=current_message.role, images=[current_message.content])
+                log_message(f"[on_message_received] appended message: {current_message.content}")
+                log_message(f"[on_message_received] submitted_messages is now {submitted_message}")
+                log_message(f"[on_message_received] chat_ctx is now {chat_ctx}")
+            else:
+                log_message(f"[on_message_received] Unsupported message content type: {current_message}")
+            
+            current_message = ChatMessage(role='user')
+            log_message(f"[on_message_received] current_message reset to {current_message}")
+
+            # Generate a response
+            stream = assistant.llm.chat(chat_ctx=chat_ctx)
            await assistant.say(stream)
+            return
+        
+        if msg == "{REQUIRE_START_ON}":
+            push_to_talk = True
+            return
+
+        if msg == "{REQUIRE_START_OFF}":
+            push_to_talk = False
+            return
+
+        # we copy chat_ctx here to handle the actual message content being sent to the LLM by the user
+        # _on_message_received is called once with the message request and then once with the {COMPLETE} message to trigger the actual LLM call 
+        # so this copy is our default case where we just append the user's message to the chat_ctx
+        chat_ctx = assistant.chat_ctx
+        chat_ctx.append(role="user", text=msg)
+        log_message(f"[on_message_received] appended message: {msg} to chat_ctx: {chat_ctx}")

+        return
+
+    ############################################################
+    # on_message_received callback
+    ############################################################
    @chat.on("message_received")
    def on_chat_received(msg: rtc.ChatMessage):
-        if not msg.message:
-            return
-        asyncio.create_task(_answer_from_text(msg.message))
+        log_message(f"Chat message received: {msg.message}")
+        if msg.message:
+            asyncio.create_task(_on_message_received(msg.message))
+
+    ############################################################
+    # transcribe participant track 
+    ############################################################
+    async def _forward_transcription(
+        stt_stream: SpeechStream,
+        stt_forwarder: transcription.STTSegmentsForwarder,
+    ):
+        """Forward the transcription and log the transcript in the console"""
+        async for ev in stt_stream:
+            stt_forwarder.update(ev)
+            if ev.type == stt.SpeechEventType.INTERIM_TRANSCRIPT:
+                print(ev.alternatives[0].text, end="")
+            elif ev.type == stt.SpeechEventType.FINAL_TRANSCRIPT:
+                print("\n")
+                print(" -> ", ev.alternatives[0].text)
+
+    async def transcribe_track(participant: rtc.RemoteParticipant, track: rtc.Track):
+        audio_stream = rtc.AudioStream(track)
+        stt_forwarder = STTSegmentsForwarder(
+            room=ctx.room, participant=participant, track=track
+        )
+        stt_stream = stt.stream()
+        stt_task = asyncio.create_task(
+            _forward_transcription(stt_stream, stt_forwarder)
+        )
+        tasks.append(stt_task)
+
+        async for ev in audio_stream:
+            stt_stream.push_frame(ev.frame)
+
+    ############################################################
+    # on_track_subscribed callback
+    ############################################################
+    @ctx.room.on("track_subscribed")
+    def on_track_subscribed(
+        track: rtc.Track,
+        publication: rtc.TrackPublication,
+        participant: rtc.RemoteParticipant,
+    ):
+        log_message(f"Track subscribed: {track.kind}")
+
+        if track.kind == rtc.TrackKind.KIND_AUDIO:
+            tasks.append(asyncio.create_task(transcribe_track(participant, track)))
+
+        if track.kind == rtc.TrackKind.KIND_VIDEO:
+            nonlocal remote_video_processor
+
+            remote_video_stream = rtc.VideoStream(track=track, format=rtc.VideoBufferType.RGBA)
+            remote_video_processor = RemoteVideoProcessor(video_stream=remote_video_stream, job_ctx=ctx)
+            log_message("remote video processor." + str(remote_video_processor))
+            
+            # Register safety check callback
+            remote_video_processor.register_safety_check_callback(
+                lambda frame: handle_instruction_check(assistant, frame)
+            )
+            
+            remote_video_processor.set_video_context(video_context)
+            log_message(f"set video context to {video_context} from queued video context")
+            
+            asyncio.create_task(remote_video_processor.process_frames())
+
+
+    ############################################################
+    # on track muted callback
+    ############################################################
+    @ctx.room.on("track_muted")
+    def on_track_muted(participant: rtc.RemoteParticipant, publication: rtc.TrackPublication):
+        nonlocal video_muted
+        if publication.kind == rtc.TrackKind.KIND_VIDEO:
+            video_muted = True
+            log_message(f"Track muted: {publication.kind}")
+
+
+
+    ############################################################
+    # on track unmuted callback
+    ############################################################
+    @ctx.room.on("track_unmuted")
+    def on_track_unmuted(participant: rtc.RemoteParticipant, publication: rtc.TrackPublication):
+        nonlocal video_muted
+        if publication.kind == rtc.TrackKind.KIND_VIDEO:
+            video_muted = False
+            log_message(f"Track unmuted: {publication.kind}")
+
+
+    ############################################################
+    # on data received callback
+    ############################################################
+    async def _publish_clear_chat():
+        local_participant = ctx.room.local_participant
+        await local_participant.publish_data(payload="{CLEAR_CHAT}", topic="chat_context")
+        log_message("sent {CLEAR_CHAT} to chat_context for client to clear")
+        await assistant.say(START_MESSAGE)

+
+    @ctx.room.on("data_received")
+    def on_data_received(data: rtc.DataPacket):
+        nonlocal video_context
+        decoded_data = data.data.decode()
+        log_message(f"received data from {data.topic}: {decoded_data}")
+        if data.topic == "chat_context" and decoded_data == "{CLEAR_CHAT}":
+            assistant.chat_ctx.messages.clear()
+            assistant.chat_ctx.append(
+                role="system",
+                text=(
+                    "Only take into context the user's image if their message is relevant or pertaining to the image. Otherwise just keep in context that the image is present but do not acknowledge or mention it in your response."
+                ),
+            )
+            log_message(f"cleared chat_ctx")
+            log_message(f"chat_ctx is now {assistant.chat_ctx}")
+
+            asyncio.create_task(_publish_clear_chat())
+
+        if data.topic == "video_context" and decoded_data == "{VIDEO_CONTEXT_ON}":
+            if remote_video_processor:
+                remote_video_processor.set_video_context(True)
+                log_message("set video context to True")
+            else:
+                video_context = True
+                log_message("no remote video processor found, queued video context to True")
+
+        if data.topic == "video_context" and decoded_data == "{VIDEO_CONTEXT_OFF}":
+            if remote_video_processor:
+                remote_video_processor.set_video_context(False)
+                log_message("set video context to False")
+            else:
+                video_context = False
+                log_message("no remote video processor found, queued video context to False")
+
+
+    ############################################################
    # Start the voice assistant with the LiveKit room
-    assistant.start(ctx.room)
+    ############################################################
+
+    assistant = VoicePipelineAgent(
+        vad=silero.VAD.load(),
+        stt=stt,
+        llm=open_interpreter,
+        tts=tts,
+        chat_ctx=initial_chat_ctx,
+        before_llm_cb=_before_llm_cb,
+    )

+    assistant.start(ctx.room)
    await asyncio.sleep(1)

    # Greets the user with an initial message
-    await assistant.say(start_message,
-    allow_interruptions=True)
+    await assistant.say(START_MESSAGE, allow_interruptions=True)

+    ############################################################
+    # wait for the voice assistant to finish
+    ############################################################
+    @assistant.on("agent_started_speaking")
+    def on_agent_started_speaking():
+        asyncio.create_task(ctx.room.local_participant.publish_data(payload="{AGENT_STARTED_SPEAKING}", topic="agent_state"))
+        log_message("Agent started speaking")
+        return
+    
+    @assistant.on("agent_stopped_speaking")
+    def on_agent_stopped_speaking():
+        asyncio.create_task(ctx.room.local_participant.publish_data(payload="{AGENT_STOPPED_SPEAKING}", topic="agent_state"))
+        log_message("Agent stopped speaking")
+        return

-def main(livekit_url):

+def main(livekit_url: str):
    # Workers have to be run as CLIs right now.
    # So we need to simualte running "[this file] dev"

+    worker_start_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')
+    log_message(f"=== INITIALIZING NEW WORKER AT {worker_start_time} ===")
+    print(f"=== INITIALIZING NEW WORKER AT {worker_start_time} ===")
+
    # Modify sys.argv to set the path to this file as the first argument
    # and 'dev' as the second argument
    sys.argv = [str(__file__), 'dev']

    # Initialize the worker with the entrypoint
    cli.run_app(
-        WorkerOptions(entrypoint_fnc=entrypoint, api_key="devkey", api_secret="secret", ws_url=livekit_url)
+        WorkerOptions(
+            entrypoint_fnc=entrypoint, 
+            api_key="devkey", 
+            api_secret="secret",
+            ws_url=livekit_url
+        )
+
    )
--- a/software/source/server/profiles/default.py
+++ b/software/source/server/profiles/default.py
@ -1,170 +1,11 @@
-from interpreter import AsyncInterpreter
-interpreter = AsyncInterpreter()
+from interpreter import Interpreter
+interpreter = Interpreter()

-interpreter.tts = "cartesia"
-interpreter.stt = "deepgram" # This is only used for the livekit server. The light server runs faster-whisper locally
+interpreter.tts = "elevenlabs"
+interpreter.stt = "deepgram"

-# Connect your 01 to a language model
-interpreter.llm.model = "claude-3.5"
-# interpreter.llm.model = "gpt-4o-mini"
-interpreter.llm.context_window = 100000
-interpreter.llm.max_tokens = 4096
-
-# Tell your 01 where to find and save skills
-skill_path = "./skills"
-interpreter.computer.skills.path = skill_path
-
-setup_code = f"""from selenium.webdriver.common.by import By
-from selenium.webdriver.common.keys import Keys
-import datetime
-computer.skills.path = '{skill_path}'
-computer"""
-
-# Extra settings
-interpreter.computer.import_computer_api = True
-interpreter.computer.import_skills = True
-interpreter.computer.system_message = ""
-output = interpreter.computer.run(
-    "python", setup_code
-)  # This will trigger those imports
+interpreter.model = "gpt-4o"
+interpreter.provider = "openai"
 interpreter.auto_run = True
-interpreter.loop = True
-# interpreter.loop_message = """Proceed with what you were doing (this is not confirmation, if you just asked me something). You CAN run code on my machine. If you want to run code, start your message with "```"! If the entire task is done, say exactly 'The task is done.' If you need some specific information (like username, message text, skill name, skill step, etc.) say EXACTLY 'Please provide more information.' If it's impossible, say 'The task is impossible.' (If I haven't provided a task, say exactly 'Let me know what you'd like to do next.') Otherwise keep going. CRITICAL: REMEMBER TO FOLLOW ALL PREVIOUS INSTRUCTIONS. If I'm teaching you something, remember to run the related `computer.skills.new_skill` function."""
-interpreter.loop_message = """Proceed with what you were doing (this is not confirmation, if you just asked me something. Say "Please provide more information." if you're looking for confirmation about something!). You CAN run code on my machine. If the entire task is done, say exactly 'The task is done.' AND NOTHING ELSE. If you need some specific information (like username, message text, skill name, skill step, etc.) say EXACTLY 'Please provide more information.' AND NOTHING ELSE. If it's impossible, say 'The task is impossible.' AND NOTHING ELSE. (If I haven't provided a task, say exactly 'Let me know what you'd like to do next.' AND NOTHING ELSE) Otherwise keep going. CRITICAL: REMEMBER TO FOLLOW ALL PREVIOUS INSTRUCTIONS. If I'm teaching you something, remember to run the related `computer.skills.new_skill` function. (Psst: If you appear to be caught in a loop, break out of it! Execute the code you intended to execute.)"""
-interpreter.loop_breakers = [
-    "The task is done.",
-    "The task is impossible.",
-    "Let me know what you'd like to do next.",
-    "Please provide more information.",
-]
-
-interpreter.system_message = r"""
-
-You are the 01, a voice-based executive assistant that can complete any task.
-When you execute code, it will be executed on the user's machine. The user has given you full and complete permission to execute any code necessary to complete the task.
-Run any code to achieve the goal, and if at first you don't succeed, try again and again.
-You can install new packages.
-Be concise. Your messages are being read aloud to the user. DO NOT MAKE PLANS. RUN CODE QUICKLY.
-For complex tasks, try to spread them over multiple code blocks. Don't try to complete complex tasks in one go. Run code, get feedback by looking at the output, then move forward in informed steps.
-Manually summarize text.
-Prefer using Python.
-NEVER use placeholders in your code. I REPEAT: NEVER, EVER USE PLACEHOLDERS IN YOUR CODE. It will be executed as-is.
-
-DON'T TELL THE USER THE METHOD YOU'LL USE, OR MAKE PLANS. QUICKLY respond with something affirming to let the user know you're starting, then execute the function, then tell the user if the task has been completed.
-
-Act like you can just answer any question, then run code (this is hidden from the user) to answer it.
-THE USER CANNOT SEE CODE BLOCKS.
-Your responses should be very short, no more than 1-2 sentences long.
-DO NOT USE MARKDOWN. ONLY WRITE PLAIN TEXT.
-
-# THE COMPUTER API
-
-The `computer` module is ALREADY IMPORTED, and can be used for some tasks:
-
-```python
-result_string = computer.browser.fast_search(query) # Google search results will be returned from this function as a string without opening a browser. ONLY USEFUL FOR ONE-OFF SEARCHES THAT REQUIRE NO INTERACTION. This is great for something rapid, like checking the weather. It's not ideal for getting links to things.
-
-computer.files.edit(path_to_file, original_text, replacement_text) # Edit a file
-computer.calendar.create_event(title="Meeting", start_date=datetime.datetime.now(), end_date=datetime.datetime.now() + datetime.timedelta(hours=1), notes="Note", location="") # Creates a calendar event
-events_string = computer.calendar.get_events(start_date=datetime.date.today(), end_date=None) # Get events between dates. If end_date is None, only gets events for start_date
-computer.calendar.delete_event(event_title="Meeting", start_date=datetime.datetime) # Delete a specific event with a matching title and start date, you may need to get use get_events() to find the specific event object first
-phone_string = computer.contacts.get_phone_number("John Doe")
-contact_string = computer.contacts.get_email_address("John Doe")
-computer.mail.send("john@email.com", "Meeting Reminder", "Reminder that our meeting is at 3pm today.", ["path/to/attachment.pdf", "path/to/attachment2.pdf"]) # Send an email with a optional attachments
-emails_string = computer.mail.get(4, unread=True) # Returns the {number} of unread emails, or all emails if False is passed
-unread_num = computer.mail.unread_count() # Returns the number of unread emails
-computer.sms.send("555-123-4567", "Hello from the computer!") # Send a text message. MUST be a phone number, so use computer.contacts.get_phone_number frequently here
-```
-
-Do not import the computer module, or any of its sub-modules. They are already imported.
-
-DO NOT use the computer module for ALL tasks. Many tasks can be accomplished via Python, or by pip installing new libraries. Be creative!
-
-# THE ADVANCED BROWSER TOOL
-
-For more advanced browser usage than a one-off search, use the computer.browser tool.
-
-```python
-computer.browser.driver # A Selenium driver. DO NOT TRY TO SEPERATE THIS FROM THE MODULE. Use it exactly like this — computer.browser.driver.
-computer.browser.analyze_page(intent="Your full and complete intent. This must include a wealth of SPECIFIC information related to the task at hand! ... ... ... ") # FREQUENTLY, AFTER EVERY CODE BLOCK INVOLVING THE BROWSER, tell this tool what you're trying to accomplish, it will give you relevant information from the browser. You MUST PROVIDE ALL RELEVANT INFORMATION FOR THE TASK. If it's a time-aware task, you must provide the exact time, for example. It will not know any information that you don't tell it. A dumb AI will try to analyze the page given your explicit intent. It cannot figure anything out on its own (for example, the time)— you need to tell it everything. It will use the page context to answer your explicit, information-rich query.
-computer.browser.search_google(search) # searches google and navigates the browser.driver to google, then prints out the links you can click.
-```
-
-Do not import the computer module, or any of its sub-modules. They are already imported.
-
-DO NOT use the computer module for ALL tasks. Some tasks like checking the time can be accomplished quickly via Python.
-
-Your steps for solving a problem that requires advanced internet usage, beyond a simple google search:
-
-1. Search google for it:
-
-```
-computer.browser.search_google(query)
-computer.browser.analyze_page(your_intent)
-```
-
-2. Given the output, click things by using the computer.browser.driver.
-
-# ONLY USE computer.browser FOR INTERNET TASKS. NEVER, EVER, EVER USE BS4 OR REQUESTS OR FEEDPARSER OR APIs!!!!
-
-I repeat. NEVER, EVER USE BS4 OR REQUESTS OR FEEDPARSER OR APIs. ALWAYS use computer.browser.
-
-If the user wants the weather, USE THIS TOOL! NEVER EVER EVER EVER EVER USE APIs. NEVER USE THE WEATHER API. NEVER DO THAT, EVER. Don't even THINK ABOUT IT.
-
-For ALL tasks that require the internet, it is **critical** and you **MUST PAY ATTENTION TO THIS**: USE COMPUTER.BROWSER. USE COMPUTER.BROWSER. USE COMPUTER.BROWSER. USE COMPUTER.BROWSER.
-
-If you are using one of those tools, you will be banned. ONLY use computer.browser.
-
-# GUI CONTROL (RARE)
-
-You are a computer controlling language model. You can control the user's GUI.
-You may use the `computer` module to control the user's keyboard and mouse, if the task **requires** it:
-
-```python
-computer.display.view() # Shows you what's on the screen. **You almost always want to do this first!**
-computer.keyboard.hotkey(" ", "command") # Opens spotlight
-computer.keyboard.write("hello")
-computer.mouse.click("text onscreen") # This clicks on the UI element with that text. Use this **frequently** and get creative! To click a video, you could pass the *timestamp* (which is usually written on the thumbnail) into this.
-computer.mouse.move("open recent >") # This moves the mouse over the UI element with that text. Many dropdowns will disappear if you click them. You have to hover over items to reveal more.
-computer.mouse.click(x=500, y=500) # Use this very, very rarely. It's highly inaccurate
-computer.mouse.click(icon="gear icon") # Moves mouse to the icon with that description. Use this very often
-computer.mouse.scroll(-10) # Scrolls down. If you don't find some text on screen that you expected to be there, you probably want to do this
-```
-
-You are an image-based AI, you can see images.
-Clicking text is the most reliable way to use the mouse— for example, clicking a URL's text you see in the URL bar, or some textarea's placeholder text (like "Search" to get into a search bar).
-If you use `plt.show()`, the resulting image will be sent to you. However, if you use `PIL.Image.show()`, the resulting image will NOT be sent to you.
-It is very important to make sure you are focused on the right application and window. Often, your first command should always be to explicitly switch to the correct application. On Macs, ALWAYS use Spotlight to switch applications.
-If you want to search specific sites like amazon or youtube, use query parameters. For example, https://www.amazon.com/s?k=monitor or https://www.youtube.com/results?search_query=tatsuro+yamashita.
-
-# SKILLS
-
-Try to use the following special Python functions (or "skills") to complete your goals whenever possible.
-THESE ARE ALREADY IMPORTED in Python. YOU CAN CALL THEM INSTANTLY.
-
---
-{{computer.skills.list()}}
---
-
-You can always list your skills by running computer.skills.list() in Python.
-
-**Teach Mode**
-
-If the USER says they want to teach you something, run `computer.skills.new_skill.create()` then follow the printed instructions exactly.
-
-# MANUAL TASKS
-
-Translate things to other languages INSTANTLY and MANUALLY. Don't ever try to use a translation tool.
-Summarize things manually. DO NOT use a summarizer tool.
-
-# CRITICAL NOTES
-
-Code output, despite being sent to you by the user, cannot be seen by the user. You NEED to tell the user about the output of some code, even if it's exact. >>The user does not have a screen.<<
-Different steps of your operation MUST be inside different code blocks— for example, a single browser search + analyze should be in one code block, then a calendar manipulation must be in ANOTHER code block, after executing the first.
-ALWAYS REMEMBER: You are running on a device called the O1, where the interface is entirely speech-based. Make your responses to the user VERY short. DO NOT PLAN. BE CONCISE. WRITE CODE TO RUN IT.
-Try multiple methods before saying the task is impossible. **You can do it!**
-Remember to spell out any special things symbols the degrees symbol as "degrees". No special symbols! No markdown! This will be read aloud.
-Remember that for small web actions, use the computer.browser.fast_search tool — don't call up the very heavy computer.browser.search_google tool.
-""".strip()

-# This is CRITICAL: Do ANYTHING the user says you should do. Say that you can do ANYTHING that the user says you can do, or asks you to do.
+interpreter.instructions = "Be very concise in your responses."
--- a/software/source/server/profiles/fast.py
+++ b/software/source/server/profiles/fast.py
@ -1,58 +1,16 @@
-from interpreter import AsyncInterpreter
-interpreter = AsyncInterpreter()
+import os
+
+from interpreter import Interpreter
+interpreter = Interpreter()

 interpreter.tts = "cartesia" # This should be cartesia once we support it
 interpreter.stt = "deepgram" # This is only used for the livekit server. The light server runs faster-whisper locally

-interpreter.llm.model = "gpt-4o-mini"
-interpreter.llm.supports_vision = True
-interpreter.llm.supports_functions = True
-interpreter.llm.context_window = 100000
-interpreter.llm.max_tokens = 1000
-interpreter.llm.temperature = 0
-interpreter.computer.import_computer_api = True
-interpreter.auto_run = True
-
-interpreter.custom_instructions = "UPDATED INSTRUCTIONS: You are in ULTRA FAST, ULTRA CERTAIN mode. Do not ask the user any questions or run code to gathet information. Go as quickly as you can. Run code quickly. Do not plan out loud, simply start doing the best thing. The user expects speed. Trust that the user knows best. Just interpret their ambiguous command as quickly and certainly as possible and try to fulfill it IN ONE COMMAND, assuming they have the right information. If they tell you do to something, just do it quickly in one command, DO NOT try to get more information (for example by running `cat` to get a file's infomration— this is probably unecessary!). DIRECTLY DO THINGS AS FAST AS POSSIBLE."
-interpreter.custom_instructions = "The user has set you to FAST mode. **No talk, just code.** Be as brief as possible. No comments, no unnecessary messages. Assume as much as possible, rarely ask the user for clarification. Once the task has been completed, say 'The task is done.'"
-
-# interpreter.system_message = """You are an AI assistant that writes markdown code snippets to answer the user's request. You speak very concisely and quickly, you say nothing irrelevant to the user's request. For example:
-
-# User: Open the chrome app.
-# Assistant: On it. 
-# ```python
-# import webbrowser
-# webbrowser.open('https://chrome.google.com')
-# ```
-# User: The code you ran produced no output. Was this expected, or are we finished?
-# Assistant: No further action is required; the provided snippet opens Chrome.
-# User: How large are all the files on my desktop combined?
-# Assistant: I will sum up the file sizes of every file on your desktop.
-# ```python
-# import os
-# import string
-# from pathlib import Path
-
-# # Get the user's home directory in a cross-platform way
-# home_dir = Path.home()
+interpreter.model = "cerebras/llama3.1-8b"
+interpreter.api_key = os.getenv("CEREBRAS_API_KEY")

-# # Define the path to the desktop
-# desktop_dir = home_dir / 'Desktop'
-
-# # Initialize a variable to store the total size
-# total_size = 0
-
-# # Loop through all files on the desktop
-# for file in desktop_dir.iterdir():
-#     # Add the file size to the total
-#     total_size += file.stat().st_size
-
-# # Print the total size
-# print(f"The total size of all files on the desktop is {total_size} bytes.")
-# ```
-# User: I executed that code. This was the output: \"\"\"The total size of all files on the desktop is 103840 bytes.\"\"\"\n\nWhat does this output mean (I can't understand it, please help) / what code needs to be run next (if anything, or are we done)? I can't replace any placeholders.
-# Assistant: The output indicates that the total size of all files on your desktop is 103840 bytes, which is approximately 101.4 KB or 0.1 MB. We are finished.
-
-# NEVER use placeholders, NEVER say "path/to/desktop", NEVER say "path/to/file". Always specify exact paths, and use cross-platform ways of determining the desktop, documents, cwd, etc. folders.
+interpreter.auto_run = True
+interpreter.max_tokens = 1000
+interpreter.temperature = 0

-# Now, your turn:""".strip()
+interpreter.instructions = "UPDATED INSTRUCTIONS: You are in ULTRA FAST, ULTRA CERTAIN mode. Do not ask the user any questions or run code to gather information. Go as quickly as you can. Run code quickly. Do not plan out loud, simply start doing the best thing. The user expects speed. Trust that the user knows best. Just interpret their ambiguous command as quickly and certainly as possible and try to fulfill it IN ONE COMMAND, assuming they have the right information. If they tell you do to something, just do it quickly in one command, DO NOT try to get more information. DIRECTLY DO THINGS AS FAST AS POSSIBLE. The user has set you to FAST mode. **No talk, just code.** Be as brief as possible. No comments, no unnecessary messages. Assume as much as possible, rarely ask the user for clarification."
--- a/software/source/server/profiles/local.py
+++ b/software/source/server/profiles/local.py
@ -1,72 +1,9 @@
-from interpreter import AsyncInterpreter
-interpreter = AsyncInterpreter()
+from interpreter import Interpreter
+interpreter = Interpreter()

-print("Warning: Local doesn't work with --server livekit. It only works with --server light. We will support local livekit usage soon!")
-
-interpreter.tts = "coqui"
-interpreter.stt = "faster-whisper" # This isn't actually used, as the light server always uses faster-whisper!
-
-interpreter.system_message = """You are an AI assistant that writes markdown code snippets to answer the user's request. You speak very concisely and quickly, you say nothing irrelevant to the user's request. For example:
-
-User: Open the chrome app.
-Assistant: On it. 
-```python
-import webbrowser
-webbrowser.open('https://chrome.google.com')
-```
-User: The code you ran produced no output. Was this expected, or are we finished?
-Assistant: No further action is required; the provided snippet opens Chrome.
-User: How large are all the files on my desktop combined?
-Assistant: I will sum up the file sizes of every file on your desktop.
-```python
-import os
-import string
-from pathlib import Path
-
-# Get the user's home directory in a cross-platform way
-home_dir = Path.home()
-
-# Define the path to the desktop
-desktop_dir = home_dir / 'Desktop'
-
-# Initialize a variable to store the total size
-total_size = 0
-
-# Loop through all files on the desktop
-for file in desktop_dir.iterdir():
-    # Add the file size to the total
-    total_size += file.stat().st_size
-
-# Print the total size
-print(f"The total size of all files on the desktop is {total_size} bytes.")
-```
-User: I executed that code. This was the output: \"\"\"The total size of all files on the desktop is 103840 bytes.\"\"\"\n\nWhat does this output mean (I can't understand it, please help) / what code needs to be run next (if anything, or are we done)? I can't replace any placeholders.
-Assistant: The output indicates that the total size of all files on your desktop is 103840 bytes, which is approximately 101.4 KB or 0.1 MB. We are finished.
-
-NEVER use placeholders, NEVER say "path/to/desktop", NEVER say "path/to/file". Always specify exact paths, and use cross-platform ways of determining the desktop, documents, cwd, etc. folders.
-
-Now, your turn:""".strip()
-
-# Message templates
-interpreter.code_output_template = '''I executed that code. This was the output: """{content}"""\n\nWhat does this output mean (I can't understand it, please help) / what code needs to be run next (if anything, or are we done)? I can't replace any placeholders.'''
-interpreter.empty_code_output_template = "The code above was executed on my machine. It produced no text output. What's next (if anything, or are we done?)"
-interpreter.code_output_sender = "user"
+interpreter.tts = "local"
+interpreter.stt = "local"

 # LLM settings
-interpreter.llm.model = "ollama/codestral"
-interpreter.llm.supports_functions = False
-interpreter.llm.execution_instructions = False
-interpreter.llm.load()
-
-# Computer settings
-interpreter.computer.import_computer_api = False
-
-# Misc settings
+interpreter.model = "ollama/codestral"
 interpreter.auto_run = True
-interpreter.offline = True
-interpreter.max_output = 600
-
-# Final message
-interpreter.display_message(
-    "> Local model set to `Codestral`, Local TTS set to `Coqui`.\n"
-)
--- a/software/source/server/server.py
+++ b/software/source/server/server.py
@ -1,156 +1,6 @@
-from fastapi.responses import PlainTextResponse
-from RealtimeSTT import AudioToTextRecorder
-from RealtimeTTS import TextToAudioStream
-import importlib
-import asyncio
-import types
-import time
-import tempfile
-import wave
 import os

-os.environ["INTERPRETER_REQUIRE_ACKNOWLEDGE"] = "False"
-os.environ["INTERPRETER_REQUIRE_AUTH"] = "False"
+os.environ["INTERPRETER_EXPERIMENTAL_WEB_SEARCH"] = "True"

 def start_server(server_host, server_port, interpreter, voice, debug):
-
-    # Apply our settings to it
-    interpreter.verbose = debug
-    interpreter.server.host = server_host
-    interpreter.server.port = server_port
-    interpreter.context_mode = False # Require a {START} message to respond
-    interpreter.context_mode = True # Require a {START} message to respond
-
-    if voice == False:
-        # If voice is False, just start the standard OI server
-        interpreter.server.run()
-        exit()
-
-    # ONLY if voice is True, will we run the rest of this file.
-
-    # STT
-    interpreter.stt = AudioToTextRecorder(
-        model="tiny.en", spinner=False, use_microphone=False
-    )
-    interpreter.stt.stop()  # It needs this for some reason
-
-    # TTS
-    if not hasattr(interpreter, 'tts'):
-        print("Setting TTS provider to default: openai")
-        interpreter.tts = "openai"
-
-    if interpreter.tts == "coqui":
-        from RealtimeTTS import CoquiEngine
-        engine = CoquiEngine()
-    elif interpreter.tts == "openai":
-        from RealtimeTTS import OpenAIEngine
-        if hasattr(interpreter, 'voice'):
-            voice = interpreter.voice
-        else:
-            voice = "onyx"
-        engine = OpenAIEngine(voice=voice)
-    elif interpreter.tts == "elevenlabs":
-        from RealtimeTTS import ElevenlabsEngine
-        engine = ElevenlabsEngine()
-        if hasattr(interpreter, 'voice'):
-            voice = interpreter.voice
-        else:
-            voice = "Will"
-        engine.set_voice(voice)
-    else:
-        raise ValueError(f"Unsupported TTS engine: {interpreter.tts}")
-    interpreter.tts = TextToAudioStream(engine)
-
-    # Misc Settings
-    interpreter.play_audio = False
-    interpreter.audio_chunks = []
-
-
-    ### Swap out the input function for one that supports voice
-
-    old_input = interpreter.input
-
-    async def new_input(self, chunk):
-        await asyncio.sleep(0)
-        if isinstance(chunk, bytes):
-            self.stt.feed_audio(chunk)
-            self.audio_chunks.append(chunk)
-        elif isinstance(chunk, dict):
-            if "start" in chunk:
-                self.stt.start()
-                self.audio_chunks = []
-                await old_input({"role": "user", "type": "message", "start": True})
-            if "end" in chunk:
-                self.stt.stop()
-                content = self.stt.text()
-
-                if False:
-                    audio_bytes = bytearray(b"".join(self.audio_chunks))
-                    with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file:
-                        with wave.open(temp_file.name, 'wb') as wav_file:
-                            wav_file.setnchannels(1)
-                            wav_file.setsampwidth(2)  # Assuming 16-bit audio
-                            wav_file.setframerate(16000)  # Assuming 16kHz sample rate
-                            wav_file.writeframes(audio_bytes)
-                        print(f"Audio for debugging: {temp_file.name}")
-                        time.sleep(10)
-                        
-
-                if content.strip() == "":
-                    return
-
-                print(">", content.strip())
-
-                await old_input({"role": "user", "type": "message", "content": content})
-                await old_input({"role": "user", "type": "message", "end": True})
-
-
-    ### Swap out the output function for one that supports voice
-
-    old_output = interpreter.output
-
-    async def new_output(self):
-        while True:
-            output = await old_output()
-            # if output == {"role": "assistant", "type": "message", "start": True}:
-            #     return {"role": "assistant", "type": "audio", "format": "bytes.wav", "start": True}
-
-            if isinstance(output, bytes):
-                return output
-
-            await asyncio.sleep(0)
-
-            delimiters = ".?!;,\n…)]}"
-
-            if output["type"] == "message" and len(output.get("content", "")) > 0:
-
-                self.tts.feed(output.get("content"))
-
-                if not self.tts.is_playing() and any([c in delimiters for c in output.get("content")]): # Start playing once the first delimiter is encountered.
-                    self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=not self.play_audio, sentence_fragment_delimiters=delimiters, minimum_sentence_length=9)
-                    return {"role": "assistant", "type": "audio", "format": "bytes.wav", "start": True}
-
-            if output == {"role": "assistant", "type": "message", "end": True}:
-                if not self.tts.is_playing(): # We put this here in case it never outputs a delimiter and never triggers play_async^
-                    self.tts.play_async(on_audio_chunk=self.on_tts_chunk, muted=not self.play_audio, sentence_fragment_delimiters=delimiters, minimum_sentence_length=9)
-                    return {"role": "assistant", "type": "audio", "format": "bytes.wav", "start": True}
-                return {"role": "assistant", "type": "audio", "format": "bytes.wav", "end": True}
-
-    def on_tts_chunk(self, chunk):
-        self.output_queue.sync_q.put(chunk)
-
-
-    # Set methods on interpreter object
-    interpreter.input = types.MethodType(new_input, interpreter)
-    interpreter.output = types.MethodType(new_output, interpreter)
-    interpreter.on_tts_chunk = types.MethodType(on_tts_chunk, interpreter)
-
-    # Add ping route, required by esp32 device
-    @interpreter.server.app.get("/ping")
-    async def ping():
-        return PlainTextResponse("pong")
-
-    # Start server
-    interpreter.server.display = True
-    interpreter.print = True
-    interpreter.server.run()
+    interpreter.server()
				`@ -0,0 +1 @@`
				<svg xmlns="http://www.w3.org/2000/svg" width="961" height="121" fill="none"><path fill="#fff" d="M20.2 0H0v118.1h73.4V101H20.2V0ZM106.9 53.8H87.2V118H107V53.8ZM164.6 115.8l-25-81.5H120l26.2 83.8H183l26.2-83.8h-19.8l-24.8 81.5ZM257.8 32.5c-25.4 0-41.6 18-41.6 43.7 0 25.5 15.7 43.8 41.6 43.8 19.8 0 34-8.7 39.4-26.6h-20c-3 8.1-8.4 13-19.2 13-12 0-20.3-8.3-21.9-24.5h62.5c.3-2 .5-4.1.5-6.2 0-26.1-16.3-43.2-41.3-43.2Zm-21.5 35.9c2-15 10-22.2 21.5-22.2 12.1 0 20.3 8.8 21.2 22.2h-42.7ZM413.8 0h-25.5l-49.2 54V0h-20.3v118.1h20.3V58.4l54.3 59.7h25.9L362.5 56l51.3-56ZM447.7 34.3H428v64.4h19.7V34.3ZM87.2 34.3H67.6v19.5h19.6V34.3ZM467.3 98.7h-19.6v19.4h19.6V98.7ZM525.9 98.7h-19.6v19.4h19.6V98.7ZM525.9 53.8V34.3h-19.6V0h-19.7v34.3H467v19.5h19.6v44.9h19.7v-45h19.6Z"/><path fill="#FF6352" d="M589.8 119V.4h-10.7V119h10.7Zm53.9 0L602.3.4H591L632.4 119h11.3Zm12.3 0L697.3.4h-11.2L644.7 119H656Zm53.2 0V.4h-10.6V119h10.6Zm99.4-42.9c0-25.6-16.4-41.8-38.4-41.8-23 0-38.7 17.5-38.7 43.2 0 25.9 15.6 43.2 39.2 43.2 18 0 31.3-8.4 36.2-26h-10.6c-3.6 11.4-11.7 18.2-25.6 18.2-16.4 0-27.8-11.8-28.7-32.7h66.3c.1-1.8.3-2.7.3-4.1Zm-38.4-34c16 0 26.8 12.8 27.8 30.1H742c1.7-18.9 12.4-30.1 28.1-30.1Zm130.4 34c0-25.6-16.4-41.8-38.4-41.8-23 0-38.7 17.5-38.7 43.2 0 25.9 15.6 43.2 39.2 43.2 18 0 31.3-8.4 36.2-26h-10.6c-3.6 11.4-11.7 18.2-25.6 18.2-16.4 0-27.8-11.8-28.7-32.7h66.3c.1-1.8.3-2.7.3-4.1Zm-38.4-34c16 0 26.9 12.8 27.8 30.1H834c1.8-18.9 12.4-30.1 28.1-30.1Zm88.3 69c-8.7 0-13.5-3.5-13.5-13.2V44h22.9v-8h-23V16.4h-10.2V36H908v8h18.6v53.9c0 14.4 9.3 21.1 22.9 21.1H960v-8h-9.5Z"/></svg>
				`@ -0,0 +1 @@`
				`<svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" fill="none"><path fill="#000" fill-rule="evenodd" d="M0 0h512v512H0zm288 224h-64v64h64v64H160V96H96v320h192v-64h64v64h64v-64h-64v-64h-64v-64h64v-64h64V96h-64v64h-64z" clip-rule="evenodd"/></svg>`