Docs changes, minor fixes

10 months ago · 6fb71dde41
parent 575754ed7f
commit 6fb71dde41
8 changed files with 10859 additions and 17 deletions
--- a/docs/safety/file-safety.mdx
+++ b/docs/safety/file-safety.mdx
--- a/docs/safety/introduction.mdx
+++ b/docs/safety/introduction.mdx
--- a/docs/safety/llm-safety.mdx
+++ b/docs/safety/llm-safety.mdx
--- a/software/main.py
+++ b/software/main.py
@ -127,19 +127,21 @@ def run(

        if server == "light":
            light_server_port = server_port
+            light_server_host = server_host
            voice = True # The light server will support voice
        elif server == "livekit":
            # The light server should run at a different port if we want to run a livekit server
            spinner.stop()
-            print(f"Starting light server (required for livekit server) on the port before `--server-port` (port {server_port-1}), unless the `AN_OPEN_PORT` env var is set.")
+            print(f"Starting light server (required for livekit server) on localhost, on the port before `--server-port` (port {server_port-1}), unless the `AN_OPEN_PORT` env var is set.")
            print(f"The livekit server will be started on port {server_port}.")
            light_server_port = os.getenv('AN_OPEN_PORT', server_port-1)
+            light_server_host = "localhost"
            voice = False # The light server will NOT support voice. It will just run Open Interpreter. The Livekit server will handle voice

        server_thread = threading.Thread(
            target=start_server,
            args=(
-                server_host,
+                light_server_host,
                light_server_port,
                profile,
                voice,
--- a/software/poetry.lock
+++ b/software/poetry.lock
--- a/software/pyproject.toml
+++ b/software/pyproject.toml
@ -19,7 +19,7 @@ livekit-plugins-openai = "^0.8.1"
 livekit-plugins-silero = "^0.6.4"
 livekit-plugins-elevenlabs = "^0.7.3"
 segno = "^1.6.1"
-open-interpreter = {extras = ["os", "server"], version = "^0.3.9"}
+open-interpreter = {extras = ["os", "server"], version = "^0.3.10"}
 ngrok = "^1.4.0"
 realtimetts = {extras = ["all"], version = "^0.4.5"}
 realtimestt = "^0.2.41"
--- a/software/source/server/livekit/worker.py
+++ b/software/source/server/livekit/worker.py
@ -16,8 +16,7 @@ async def entrypoint(ctx: JobContext):
    initial_ctx = ChatContext().append(
        role="system",
        text=(
-            "You are a voice assistant created by LiveKit. Your interface with users will be voice. "
-            "You should use short and concise responses, and avoiding usage of unpronounceable punctuation."
+            "" # Open Interpreter handles this.
        ),
    )

@ -66,7 +65,9 @@ async def entrypoint(ctx: JobContext):
    await asyncio.sleep(1)

    # Greets the user with an initial message
-    await assistant.say("Hey, how can I help you today?", allow_interruptions=True)
+    await assistant.say("""Hi! You can hold the white circle below to speak to me.
+
+Try asking what I can do.""", allow_interruptions=True)


 if __name__ == "__main__":
--- a/software/source/server/server.py
+++ b/software/source/server/server.py
@ -2,12 +2,12 @@ from fastapi.responses import PlainTextResponse
 from RealtimeSTT import AudioToTextRecorder
 from RealtimeTTS import TextToAudioStream
 import importlib
-import warnings
 import asyncio
 import types
+import time
+import tempfile
 import wave
 import os
-import sys

 os.environ["INTERPRETER_REQUIRE_ACKNOWLEDGE"] = "False"
 os.environ["INTERPRETER_REQUIRE_AUTH"] = "False"
@ -90,19 +90,22 @@ def start_server(server_host, server_port, profile, voice, debug):
                self.stt.stop()
                content = self.stt.text()

-                if content.strip() == "":
-                    return
-
-                print(">", content.strip())
-
                if False:
                    audio_bytes = bytearray(b"".join(self.audio_chunks))
-                    with wave.open('audio.wav', 'wb') as wav_file:
+                    with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file:
+                        with wave.open(temp_file.name, 'wb') as wav_file:
                            wav_file.setnchannels(1)
                            wav_file.setsampwidth(2)  # Assuming 16-bit audio
                            wav_file.setframerate(16000)  # Assuming 16kHz sample rate
                            wav_file.writeframes(audio_bytes)
-                    print(os.path.abspath('audio.wav'))
+                        print(f"Audio for debugging: {temp_file.name}")
+                        time.sleep(10)
+                        
+
+                if content.strip() == "":
+                    return
+
+                print(">", content.strip())

                await old_input({"role": "user", "type": "message", "content": content})
                await old_input({"role": "user", "type": "message", "end": True})