From 72f41ad7606aafbdca48f06553d100e350c7c7f3 Mon Sep 17 00:00:00 2001
From: Ben Xu <benx.xu@mail.utoronto.ca>
Date: Mon, 24 Jun 2024 09:17:20 -0400
Subject: [PATCH 1/7] add debug flag

---
 software/source/clients/base_device.py      |  8 ++++
 software/source/clients/linux/device.py     |  3 +-
 software/source/clients/mac/device.py       |  3 +-
 software/source/clients/windows/device.py   |  3 +-
 software/source/server/async_interpreter.py | 46 +++++++++++++++++----
 software/source/server/async_server.py      | 19 ++++++---
 software/start.py                           | 10 ++++-
 7 files changed, 75 insertions(+), 17 deletions(-)

diff --git a/software/source/clients/base_device.py b/software/source/clients/base_device.py
index 88eac6b..34b96a1 100644
--- a/software/source/clients/base_device.py
+++ b/software/source/clients/base_device.py
@@ -91,6 +91,8 @@ class Device:
         self.server_url = ""
         self.ctrl_pressed = False
         self.tts_service = ""
+        self.debug = False
+        self.playback_latency = None
 
     def fetch_image_from_camera(self, camera_index=CAMERA_DEVICE_INDEX):
         """Captures an image from the specified camera device and saves it to a temporary file. Adds the image to the captured_images list."""
@@ -164,6 +166,10 @@ class Device:
         while True:
             try:
                 audio = await self.audiosegments.get()
+                if self.debug and self.playback_latency and isinstance(audio, bytes):
+                    elapsed_time = time.time() - self.playback_latency
+                    print(f"Time from request to playback: {elapsed_time} seconds")
+                    self.playback_latency = None
 
                 if self.tts_service == "elevenlabs":
                     mpv_process.stdin.write(audio)  # type: ignore
@@ -219,6 +225,8 @@ class Device:
         stream.stop_stream()
         stream.close()
         print("Recording stopped.")
+        if self.debug:
+            self.playback_latency = time.time()
 
         duration = wav_file.getnframes() / RATE
         if duration < 0.3:
diff --git a/software/source/clients/linux/device.py b/software/source/clients/linux/device.py
index 0fa0fed..36182fb 100644
--- a/software/source/clients/linux/device.py
+++ b/software/source/clients/linux/device.py
@@ -3,8 +3,9 @@ from ..base_device import Device
 device = Device()
 
 
-def main(server_url):
+def main(server_url, debug):
     device.server_url = server_url
+    device.debug = debug
     device.start()
 
 
diff --git a/software/source/clients/mac/device.py b/software/source/clients/mac/device.py
index 0fa0fed..36182fb 100644
--- a/software/source/clients/mac/device.py
+++ b/software/source/clients/mac/device.py
@@ -3,8 +3,9 @@ from ..base_device import Device
 device = Device()
 
 
-def main(server_url):
+def main(server_url, debug):
     device.server_url = server_url
+    device.debug = debug
     device.start()
 
 
diff --git a/software/source/clients/windows/device.py b/software/source/clients/windows/device.py
index 0fa0fed..36182fb 100644
--- a/software/source/clients/windows/device.py
+++ b/software/source/clients/windows/device.py
@@ -3,8 +3,9 @@ from ..base_device import Device
 device = Device()
 
 
-def main(server_url):
+def main(server_url, debug):
     device.server_url = server_url
+    device.debug = debug
     device.start()
 
 
diff --git a/software/source/server/async_interpreter.py b/software/source/server/async_interpreter.py
index 03a4249..7b3f6ae 100644
--- a/software/source/server/async_interpreter.py
+++ b/software/source/server/async_interpreter.py
@@ -21,7 +21,13 @@ import os
 
 
 class AsyncInterpreter:
-    def __init__(self, interpreter):
+    def __init__(self, interpreter, debug):
+        self.stt_latency = None
+        self.tts_latency = None
+        self.interpreter_latency = None
+        self.tffytfp = None
+        self.debug = debug
+
         self.interpreter = interpreter
         self.audio_chunks = []
 
@@ -126,6 +132,8 @@ class AsyncInterpreter:
                     # Experimental: The AI voice sounds better with replacements like these, but it should happen at the TTS layer
                     # content = content.replace(". ", ". ... ").replace(", ", ", ... ").replace("!", "! ... ").replace("?", "? ... ")
                     # print("yielding ", content)
+                    if self.time_from_first_yield_to_first_put is None:
+                        self.time_from_first_yield_to_first_put = time.time()
 
                     yield content
 
@@ -157,6 +165,10 @@ class AsyncInterpreter:
                         )
 
         # Send a completion signal
+        if self.debug:
+            end_interpreter = time.time()
+            self.interpreter_latency = end_interpreter - start_interpreter
+            print("INTERPRETER LATENCY", self.interpreter_latency)
         # self.add_to_output_queue_sync({"role": "server","type": "completion", "content": "DONE"})
 
     async def run(self):
@@ -171,13 +183,20 @@ class AsyncInterpreter:
         while not self._input_queue.empty():
             input_queue.append(self._input_queue.get())
 
-        message = self.stt.text()
-
-        if self.audio_chunks:
-            audio_bytes = bytearray(b"".join(self.audio_chunks))
-            wav_file_path = bytes_to_wav(audio_bytes, "audio/raw")
-            print("wav_file_path ", wav_file_path)
-            self.audio_chunks = []
+        if self.debug:
+            start_stt = time.time()
+            message = self.stt.text()
+            end_stt = time.time()
+            self.stt_latency = end_stt - start_stt
+            print("STT LATENCY", self.stt_latency)
+
+            if self.audio_chunks:
+                audio_bytes = bytearray(b"".join(self.audio_chunks))
+                wav_file_path = bytes_to_wav(audio_bytes, "audio/raw")
+                print("wav_file_path ", wav_file_path)
+                self.audio_chunks = []
+        else:
+            message = self.stt.text()
 
         print(message)
 
@@ -204,11 +223,22 @@ class AsyncInterpreter:
                         "end": True,
                     }
                 )
+                if self.debug:
+                    end_tts = time.time()
+                    self.tts_latency = end_tts - self.tts.stream_start_time
+                    print("TTS LATENCY", self.tts_latency)
                 self.tts.stop()
+
                 break
 
     async def _on_tts_chunk_async(self, chunk):
         # print("adding chunk to queue")
+        if self.debug and self.tffytfp is not None and self.tffytfp != 0:
+            print(
+                "time from first yield to first put is ",
+                time.time() - self.tffytfp,
+            )
+            self.tffytfp = 0
         await self._add_to_queue(self._output_queue, chunk)
 
     def on_tts_chunk(self, chunk):
diff --git a/software/source/server/async_server.py b/software/source/server/async_server.py
index 53f38ed..8bb91a3 100644
--- a/software/source/server/async_server.py
+++ b/software/source/server/async_server.py
@@ -12,7 +12,7 @@ from .profiles.default import interpreter as base_interpreter
 import asyncio
 import traceback
 import json
-from fastapi import FastAPI, WebSocket
+from fastapi import FastAPI, WebSocket, Depends
 from fastapi.responses import PlainTextResponse
 from uvicorn import Config, Server
 from .async_interpreter import AsyncInterpreter
@@ -23,8 +23,6 @@ import os
 os.environ["STT_RUNNER"] = "server"
 os.environ["TTS_RUNNER"] = "server"
 
-# interpreter.tts set in the profiles directory!!!!
-interpreter = AsyncInterpreter(base_interpreter)
 
 app = FastAPI()
 
@@ -37,15 +35,24 @@ app.add_middleware(
 )
 
 
+async def get_debug_flag():
+    return app.state.debug
+
+
 @app.get("/ping")
 async def ping():
     return PlainTextResponse("pong")
 
 
 @app.websocket("/")
-async def websocket_endpoint(websocket: WebSocket):
+async def websocket_endpoint(
+    websocket: WebSocket, debug: bool = Depends(get_debug_flag)
+):
     await websocket.accept()
 
+    # interpreter.tts set in the profiles directory!!!!
+    interpreter = AsyncInterpreter(base_interpreter, debug)
+
     # Send the tts_service value to the client
     await websocket.send_text(
         json.dumps({"type": "config", "tts_service": interpreter.interpreter.tts})
@@ -91,7 +98,9 @@ async def websocket_endpoint(websocket: WebSocket):
             await websocket.close()
 
 
-async def main(server_host, server_port):
+async def main(server_host, server_port, debug):
+    app.state.debug = debug
+
     print(f"Starting server on {server_host}:{server_port}")
     config = Config(app, host=server_host, port=server_port, lifespan="on")
     server = Server(config)
diff --git a/software/start.py b/software/start.py
index f93d872..0808b0f 100644
--- a/software/start.py
+++ b/software/start.py
@@ -41,6 +41,11 @@ def run(
     qr: bool = typer.Option(
         False, "--qr", help="Display QR code to scan to connect to the server"
     ),
+    debug: bool = typer.Option(
+        False,
+        "--debug",
+        help="Print latency measurements and save microphone recordings locally for manual playback.",
+    ),
 ):
     _run(
         server=server,
@@ -52,6 +57,7 @@ def run(
         server_url=server_url,
         client_type=client_type,
         qr=qr,
+        debug=debug,
     )
 
 
@@ -65,6 +71,7 @@ def _run(
     server_url: str = None,
     client_type: str = "auto",
     qr: bool = False,
+    debug: bool = False,
 ):
 
     system_type = platform.system()
@@ -93,6 +100,7 @@ def _run(
                 main(
                     server_host,
                     server_port,
+                    debug,
                 ),
             ),
         )
@@ -125,7 +133,7 @@ def _run(
             f".clients.{client_type}.device", package="source"
         )
 
-        client_thread = threading.Thread(target=module.main, args=[server_url])
+        client_thread = threading.Thread(target=module.main, args=[server_url, debug])
         client_thread.start()
 
     try:

From 1c4be961c278e75f1363aebdf4d249056e4ded2a Mon Sep 17 00:00:00 2001
From: Ben Xu <benx.xu@mail.utoronto.ca>
Date: Mon, 24 Jun 2024 12:58:16 -0400
Subject: [PATCH 2/7] fix self.tffytfp in async interpreter

---
 software/source/server/async_interpreter.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/software/source/server/async_interpreter.py b/software/source/server/async_interpreter.py
index 7b3f6ae..0cd2ef7 100644
--- a/software/source/server/async_interpreter.py
+++ b/software/source/server/async_interpreter.py
@@ -132,8 +132,8 @@ class AsyncInterpreter:
                     # Experimental: The AI voice sounds better with replacements like these, but it should happen at the TTS layer
                     # content = content.replace(". ", ". ... ").replace(", ", ", ... ").replace("!", "! ... ").replace("?", "? ... ")
                     # print("yielding ", content)
-                    if self.time_from_first_yield_to_first_put is None:
-                        self.time_from_first_yield_to_first_put = time.time()
+                    if self.tffytfp is None:
+                        self.tffytfp = time.time()
 
                     yield content
 

From 0e68bb7125d063b170c2e9cdcc927481157771e6 Mon Sep 17 00:00:00 2001
From: Ben Xu <benx.xu@mail.utoronto.ca>
Date: Mon, 24 Jun 2024 13:00:27 -0400
Subject: [PATCH 3/7] add docs fixes for esp32 and async interpreter

---
 README.md                                   | 2 +-
 software/source/server/async_interpreter.py | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index ebd8488..c14c382 100644
--- a/README.md
+++ b/README.md
@@ -129,7 +129,7 @@ If you want to run local speech-to-text using Whisper, you must install Rust. Fo
 
 To customize the behavior of the system, edit the [system message, model, skills library path,](https://docs.openinterpreter.com/settings/all-settings) etc. in the `profiles` directory under the `server` directory. This file sets up an interpreter, and is powered by Open Interpreter.
 
-To specify the text-to-speech service for the 01 `base_device.py`, set `interpreter.tts` to either "openai" for OpenAI, "elevenlabs" for ElevenLabs, or "coqui" for Coqui (local) in a profile. For the 01 Light, set `SPEAKER_SAMPLE_RATE` to 24000 for Coqui (local) or 22050 for OpenAI TTS. We currently don't support ElevenLabs TTS on the 01 Light.
+To specify the text-to-speech service for the 01 `base_device.py`, set `interpreter.tts` to either "openai" for OpenAI, "elevenlabs" for ElevenLabs, or "coqui" for Coqui (local) in a profile. For the 01 Light, set `SPEAKER_SAMPLE_RATE` in `client.ino` under the `esp32` client directory to 24000 for Coqui (local) or 22050 for OpenAI TTS. We currently don't support ElevenLabs TTS on the 01 Light.
 
 ## Ubuntu Dependencies
 
diff --git a/software/source/server/async_interpreter.py b/software/source/server/async_interpreter.py
index 0cd2ef7..d0bac0a 100644
--- a/software/source/server/async_interpreter.py
+++ b/software/source/server/async_interpreter.py
@@ -25,6 +25,7 @@ class AsyncInterpreter:
         self.stt_latency = None
         self.tts_latency = None
         self.interpreter_latency = None
+        # time from first put to first yield
         self.tffytfp = None
         self.debug = debug
 

From 2607b9ce8be990a45ae0fd6cc006e482f06569e2 Mon Sep 17 00:00:00 2001
From: Henry Fontanier <h.fontanier@gmail.com>
Date: Mon, 24 Jun 2024 20:37:40 +0200
Subject: [PATCH 4/7] Update introduction.mdx

The `cd` commands assumes the user is already inside of the `01` directory (the repo's root), which is not likely to be the case as the previous command clones the repository into the current working directory.
---
 docs/getting-started/introduction.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/getting-started/introduction.mdx b/docs/getting-started/introduction.mdx
index 1b14e3a..14d7372 100644
--- a/docs/getting-started/introduction.mdx
+++ b/docs/getting-started/introduction.mdx
@@ -34,7 +34,7 @@ For windows, please refer to the [setup guide](/getting-started/setup#windows).
 ```bash
 # Clone the repo and navigate into the software directory
 git clone https://github.com/OpenInterpreter/01.git
-cd software
+cd 01/software
 
 # Install dependencies and run 01
 poetry install

From fda23e95b22a679f6b1197c3d1f78d9caece8521 Mon Sep 17 00:00:00 2001
From: killian <63927363+KillianLucas@users.noreply.github.com>
Date: Wed, 10 Jul 2024 10:56:54 -0700
Subject: [PATCH 5/7] Implemented `profiles`

---
 software/source/server/async_server.py | 34 ++++++++-----------
 software/source/server/tunnel.py       |  9 +++--
 software/start.py                      | 47 ++++++++++++++++++++++++--
 3 files changed, 66 insertions(+), 24 deletions(-)

diff --git a/software/source/server/async_server.py b/software/source/server/async_server.py
index 8bb91a3..849f72d 100644
--- a/software/source/server/async_server.py
+++ b/software/source/server/async_server.py
@@ -1,14 +1,3 @@
-# import from the profiles directory the interpreter to be served
-
-# add other profiles to the directory to define other interpreter instances and import them here
-# {.profiles.fast: optimizes for STT/TTS latency with the fastest models }
-# {.profiles.local: uses local models and local STT/TTS }
-# {.profiles.default: uses default interpreter settings with optimized TTS latency }
-
-# from .profiles.fast import interpreter as base_interpreter
-# from .profiles.local import interpreter as base_interpreter
-from .profiles.default import interpreter as base_interpreter
-
 import asyncio
 import traceback
 import json
@@ -19,6 +8,7 @@ from .async_interpreter import AsyncInterpreter
 from fastapi.middleware.cors import CORSMiddleware
 from typing import List, Dict, Any
 import os
+import importlib.util
 
 os.environ["STT_RUNNER"] = "server"
 os.environ["TTS_RUNNER"] = "server"
@@ -50,14 +40,6 @@ async def websocket_endpoint(
 ):
     await websocket.accept()
 
-    # interpreter.tts set in the profiles directory!!!!
-    interpreter = AsyncInterpreter(base_interpreter, debug)
-
-    # Send the tts_service value to the client
-    await websocket.send_text(
-        json.dumps({"type": "config", "tts_service": interpreter.interpreter.tts})
-    )
-
     try:
 
         async def receive_input():
@@ -98,9 +80,21 @@ async def websocket_endpoint(
             await websocket.close()
 
 
-async def main(server_host, server_port, debug):
+async def main(server_host, server_port, profile, debug):
+
     app.state.debug = debug
 
+    # Load the profile module from the provided path
+    spec = importlib.util.spec_from_file_location("profile", profile)
+    profile_module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(profile_module)
+
+    # Get the interpreter from the profile
+    interpreter = profile_module.interpreter
+
+    # Make it async
+    interpreter = AsyncInterpreter(interpreter, debug)
+
     print(f"Starting server on {server_host}:{server_port}")
     config = Config(app, host=server_host, port=server_port, lifespan="on")
     server = Server(config)
diff --git a/software/source/server/tunnel.py b/software/source/server/tunnel.py
index f25a0b3..a40c0f3 100644
--- a/software/source/server/tunnel.py
+++ b/software/source/server/tunnel.py
@@ -6,7 +6,7 @@ from ..utils.print_markdown import print_markdown
 
 
 def create_tunnel(
-    tunnel_method="ngrok", server_host="localhost", server_port=10001, qr=False
+    tunnel_method="ngrok", server_host="localhost", server_port=10001, qr=False, domain=None
 ):
     print_markdown("Exposing server to the internet...")
 
@@ -99,8 +99,13 @@ def create_tunnel(
 
         # If ngrok is installed, start it on the specified port
         # process = subprocess.Popen(f'ngrok http {server_port} --log=stdout', shell=True, stdout=subprocess.PIPE)
+        
+        if domain:
+            domain = f"--domain={domain}"
+        else:
+            domain = ""
         process = subprocess.Popen(
-            f"ngrok http {server_port} --scheme http,https  --log=stdout",
+            f"ngrok http {server_port} --scheme http,https {domain} --log=stdout",
             shell=True,
             stdout=subprocess.PIPE,
         )
diff --git a/software/start.py b/software/start.py
index 0808b0f..28c5675 100644
--- a/software/start.py
+++ b/software/start.py
@@ -6,6 +6,7 @@ import os
 import importlib
 from source.server.tunnel import create_tunnel
 from source.server.async_server import main
+import subprocess
 
 import signal
 
@@ -41,11 +42,25 @@ def run(
     qr: bool = typer.Option(
         False, "--qr", help="Display QR code to scan to connect to the server"
     ),
+    domain: str = typer.Option(
+        None, "--domain", help="Connect ngrok to a custom domain"
+    ),
+    profiles: bool = typer.Option(
+        False,
+        "--profiles",
+        help="Opens the folder where this script is contained",
+    ),
+    profile: str = typer.Option(
+        "default.py", # default
+        "--profile",
+        help="Specify the path to the profile, or the name of the file if it's in the `profiles` directory (run `--profiles` to open the profiles directory)",
+    ),
     debug: bool = typer.Option(
         False,
         "--debug",
         help="Print latency measurements and save microphone recordings locally for manual playback.",
     ),
+
 ):
     _run(
         server=server,
@@ -58,6 +73,9 @@ def run(
         client_type=client_type,
         qr=qr,
         debug=debug,
+        domain=domain,
+        profiles=profiles,
+        profile=profile,
     )
 
 
@@ -72,8 +90,33 @@ def _run(
     client_type: str = "auto",
     qr: bool = False,
     debug: bool = False,
+    domain = None,
+    profiles = None,
+    profile = None,
 ):
 
+    profiles_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "source", "server", "profiles")
+
+    if profiles:
+        if platform.system() == "Windows":
+            subprocess.Popen(['explorer', profiles_dir])
+        elif platform.system() == "Darwin":
+            subprocess.Popen(['open', profiles_dir])
+        elif platform.system() == "Linux":
+            subprocess.Popen(['xdg-open', profiles_dir])
+        else:
+            subprocess.Popen(['open', profiles_dir])
+        exit(0)
+
+    if profile:
+        if not os.path.isfile(profile):
+            profile = os.path.join(profiles_dir, profile)
+            if not os.path.isfile(profile):
+                profile += ".py"
+                if not os.path.isfile(profile):
+                    print(f"Invalid profile path: {profile}")
+                    exit(1)
+
     system_type = platform.system()
     if system_type == "Windows":
         server_host = "localhost"
@@ -91,7 +134,6 @@ def _run(
     signal.signal(signal.SIGINT, handle_exit)
 
     if server:
-        # print(f"Starting server with mobile = {mobile}")
         loop = asyncio.new_event_loop()
         asyncio.set_event_loop(loop)
         server_thread = threading.Thread(
@@ -100,6 +142,7 @@ def _run(
                 main(
                     server_host,
                     server_port,
+                    profile,
                     debug,
                 ),
             ),
@@ -108,7 +151,7 @@ def _run(
 
     if expose:
         tunnel_thread = threading.Thread(
-            target=create_tunnel, args=[tunnel_service, server_host, server_port, qr]
+            target=create_tunnel, args=[tunnel_service, server_host, server_port, qr, domain]
         )
         tunnel_thread.start()
 

From d13c0cf3a4303b3838f494397afd7d2d296de57a Mon Sep 17 00:00:00 2001
From: killian <63927363+KillianLucas@users.noreply.github.com>
Date: Wed, 10 Jul 2024 11:08:37 -0700
Subject: [PATCH 6/7] Open Interpreter compatible `--profiles`

---
 software/source/server/async_server.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/software/source/server/async_server.py b/software/source/server/async_server.py
index 849f72d..0772301 100644
--- a/software/source/server/async_server.py
+++ b/software/source/server/async_server.py
@@ -92,6 +92,10 @@ async def main(server_host, server_port, profile, debug):
     # Get the interpreter from the profile
     interpreter = profile_module.interpreter
 
+    if not hasattr(interpreter, 'tts'):
+        print("Setting TTS provider to default: openai")
+        interpreter.tts = "openai"
+
     # Make it async
     interpreter = AsyncInterpreter(interpreter, debug)
 

From d8d7658f8a9b3c969efe77628cabde6b6b3271b0 Mon Sep 17 00:00:00 2001
From: killian <63927363+KillianLucas@users.noreply.github.com>
Date: Wed, 10 Jul 2024 11:14:27 -0700
Subject: [PATCH 7/7] Restored sending TTS service to client

---
 software/source/server/async_server.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/software/source/server/async_server.py b/software/source/server/async_server.py
index 0772301..3897f56 100644
--- a/software/source/server/async_server.py
+++ b/software/source/server/async_server.py
@@ -40,6 +40,11 @@ async def websocket_endpoint(
 ):
     await websocket.accept()
 
+    # Send the tts_service value to the client
+    await websocket.send_text(
+        json.dumps({"type": "config", "tts_service": interpreter.interpreter.tts})
+    )
+
     try:
 
         async def receive_input():