fix api keys

12 months ago · 5e9f94024c
parent 72f7d140d4
commit 5e9f94024c
4 changed files with 48 additions and 36 deletions
--- a/software/source/clients/base_device.py
+++ b/software/source/clients/base_device.py
@ -152,6 +152,15 @@ class Device:

    async def play_audiosegments(self):
        """Plays them sequentially."""
+
+        mpv_command = ["mpv", "--no-cache", "--no-terminal", "--", "fd://0"]
+        mpv_process = subprocess.Popen(
+            mpv_command,
+            stdin=subprocess.PIPE,
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+        )
+
        while True:
            try:
                audio = await self.audiosegments.get()
@ -161,6 +170,10 @@ class Device:
                    print(f"Time from request to playback: {elapsed_time} seconds")
                    self.playback_latency = None

+                if audio is not None:
+                    mpv_process.stdin.write(audio)  # type: ignore
+                    mpv_process.stdin.flush()  # type: ignore
+                """
                args = ["ffplay", "-autoexit", "-", "-nodisp"]
                proc = subprocess.Popen(
                    args=args,
@ -171,7 +184,8 @@ class Device:
                out, err = proc.communicate(input=audio)
                proc.poll()

-                # play(audio)
+                play(audio)
+                """
                # self.audiosegments.remove(audio)
                # await asyncio.sleep(0.1)
            except asyncio.exceptions.CancelledError:
@ -391,7 +405,7 @@ class Device:
                        # signed 16-bit little-endian format
                        sample_width=2,
                        # 24,000 Hz frame rate
-                        frame_rate=24000,
+                        frame_rate=16000,
                        # mono sound
                        channels=1,
                    )
@ -400,6 +414,8 @@ class Device:
                    # print("audio segment was created")
                    await self.audiosegments.put(audio_bytes)

+                    # await self.audiosegments.put(audio)
+
                # Run the code if that's the client's job
                if os.getenv("CODE_RUNNER") == "client":
                    if message["type"] == "code" and "end" in message:
@ -434,7 +450,8 @@ class Device:
    async def start_async(self):
        print("start async was called!!!!!")
        # Configuration for WebSocket
-        WS_URL = f"ws://{self.server_url}/ws"
+        WS_URL = f"ws://{self.server_url}"
+
        # Start the WebSocket communication
        asyncio.create_task(self.websocket_communication(WS_URL))

--- a/software/source/server/async_interpreter.py
+++ b/software/source/server/async_interpreter.py
@ -24,6 +24,7 @@ from RealtimeSTT import AudioToTextRecorder
 import time
 import asyncio
 import json
+import os


 class AsyncInterpreter:
@ -47,9 +48,7 @@ class AsyncInterpreter:
        elif self.interpreter.tts == "gtts":
            engine = GTTSEngine()
        elif self.interpreter.tts == "elevenlabs":
-            engine = ElevenlabsEngine(
-                api_key="sk_077cb1cabdf67e62b85f8782e66e5d8e11f78b450c7ce171"
-            )
+            engine = ElevenlabsEngine(api_key=os.environ["ELEVEN_LABS_API_KEY"])
        elif self.interpreter.tts == "system":
            engine = SystemEngine()
        else:
@ -96,12 +95,12 @@ class AsyncInterpreter:
                pass

            if "start" in chunk:
-                print("input received")
+                # print("Starting STT")
                self.stt.start()
                self._last_lmc_start_flag = time.time()
                # self.interpreter.computer.terminal.stop() # Stop any code execution... maybe we should make interpreter.stop()?
            elif "end" in chunk:
-                print("running oi on input now")
+                # print("Running OI on input")
                asyncio.create_task(self.run())
            else:
                await self._add_to_queue(self._input_queue, chunk)
@ -139,6 +138,7 @@ class AsyncInterpreter:
        print("STT LATENCY", self.stt_latency)

        # print(message)
+        end_interpreter = 0

        # print(message)
        def generate(message):
@ -165,7 +165,7 @@ class AsyncInterpreter:

                        # Experimental: The AI voice sounds better with replacements like these, but it should happen at the TTS layer
                        # content = content.replace(". ", ". ... ").replace(", ", ", ... ").replace("!", "! ... ").replace("?", "? ... ")
-                        print("yielding this", content)
+                        # print("yielding this", content)
                        yield content

                # Handle code blocks
@ -214,6 +214,7 @@ class AsyncInterpreter:
        while True:
            if self.tts.is_playing():
                start_tts = time.time()
+
                break
            await asyncio.sleep(0.1)
        while True:
@ -231,6 +232,7 @@ class AsyncInterpreter:
                end_tts = time.time()
                self.tts_latency = end_tts - start_tts
                print("TTS LATENCY", self.tts_latency)
+                self.tts.stop()
                break

    async def _on_tts_chunk_async(self, chunk):
--- a/software/source/server/async_server.py
+++ b/software/source/server/async_server.py
@ -2,6 +2,7 @@ import asyncio
 import traceback
 import json
 from fastapi import FastAPI, WebSocket, Header
+from fastapi.responses import PlainTextResponse
 from uvicorn import Config, Server
 from interpreter import interpreter as base_interpreter
 from .async_interpreter import AsyncInterpreter
@ -18,38 +19,25 @@ base_interpreter.system_message = (
    "You are a helpful assistant that can answer questions and help with tasks."
 )
 base_interpreter.computer.import_computer_api = False
-base_interpreter.llm.model = "groq/mixtral-8x7b-32768"
-base_interpreter.llm.api_key = (
-    "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"
-)
+base_interpreter.llm.model = "groq/llama3-8b-8192"
+base_interpreter.llm.api_key = os.environ["GROQ_API_KEY"]
+print(base_interpreter.llm.api_key)
 base_interpreter.llm.supports_functions = False
+base_interpreter.auto_run = True

 os.environ["STT_RUNNER"] = "server"
 os.environ["TTS_RUNNER"] = "server"

 # Parse command line arguments for port number
+"""
 parser = argparse.ArgumentParser(description="FastAPI server.")
 parser.add_argument("--port", type=int, default=8000, help="Port to run on.")
 args = parser.parse_args()
-
+"""
 base_interpreter.tts = "elevenlabs"


-async def main():
-    """
-    sentry_sdk.init(
-        dsn="https://a1465f62a31c7dfb23e1616da86341e9@o4506046614667264.ingest.us.sentry.io/4507374662385664",
-        enable_tracing=True,
-        # Set traces_sample_rate to 1.0 to capture 100%
-        # of transactions for performance monitoring.
-        traces_sample_rate=1.0,
-        # Set profiles_sample_rate to 1.0 to profile 100%
-        # of sampled transactions.
-        # We recommend adjusting this value in production.
-        profiles_sample_rate=1.0,
-    )
-    """
-
+async def main(server_host, server_port):
    interpreter = AsyncInterpreter(base_interpreter)

    app = FastAPI()
@ -62,6 +50,10 @@ async def main():
        allow_headers=["*"],  # Allow all headers
    )

+    @app.get("/ping")
+    async def ping():
+        return PlainTextResponse("pong")
+
    @app.post("/load_chat")
    async def load_chat(messages: List[Dict[str, Any]]):
        interpreter.interpreter.messages = messages
@ -69,7 +61,7 @@ async def main():
        print("🪼🪼🪼🪼🪼🪼 Messages loaded: ", interpreter.active_chat_messages)
        return {"status": "success"}

-    @app.websocket("/ws")
+    @app.websocket("/")
    async def websocket_endpoint(websocket: WebSocket):
        await websocket.accept()
        try:
@ -85,7 +77,7 @@ async def main():
                        await interpreter.input(data)
                    elif "bytes" in data:
                        await interpreter.input(data["bytes"])
-                        # print("SERVER FEEDING AUDIO")
+                        # print("RECEIVED INPUT", data)
                    elif "text" in data:
                        # print("RECEIVED INPUT", data)
                        await interpreter.input(data["text"])
@ -111,7 +103,8 @@ async def main():
            if not websocket.client_state == "DISCONNECTED":
                await websocket.close()

-    config = Config(app, host="0.0.0.0", port=8000, lifespan="on")
+    print(f"Starting server on {server_host}:{server_port}")
+    config = Config(app, host=server_host, port=server_port, lifespan="on")
    server = Server(config)
    await server.serve()

--- a/software/start.py
+++ b/software/start.py
@ -22,7 +22,7 @@ def run(
        help="Specify the server host where the server will deploy",
    ),
    server_port: int = typer.Option(
-        8000,
+        10001,
        "--server-port",
        help="Specify the server port where the server will deploy",
    ),
@ -152,8 +152,8 @@ def _run(
            target=loop.run_until_complete,
            args=(
                main(
-                    # server_host,
-                    # server_port,
+                    server_host,
+                    server_port,
                    # llm_service,
                    # model,
                    # llm_supports_vision,
@ -196,7 +196,7 @@ def _run(
        module = importlib.import_module(
            f".clients.{client_type}.device", package="source"
        )
-        server_url = "0.0.0.0:8000"
+        # server_url = "0.0.0.0:8000"
        client_thread = threading.Thread(target=module.main, args=[server_url])
        print("client thread started")
        client_thread.start()