Merge branch 'main' into Raspberry-Pi-button-compatibility-(Thanks-Thea!!!)

2 years ago · cc1761971c
parent e446e2765b 996e139826
commit cc1761971c
6 changed files with 44 additions and 84 deletions
--- a/OS/01/device.py
+++ b/OS/01/device.py
@ -6,12 +6,15 @@ from starlette.websockets import WebSocket
 from queue import Queue
 from pynput import keyboard
 import json
 import traceback
 import websockets
 import queue
 import pydub
 import ast
 from pydub import AudioSegment
 from pydub.playback import play
 import io
 import time
 import wave
 import tempfile
 from datetime import datetime
@ -19,7 +22,6 @@ from interpreter import interpreter # Just for code execution. Maybe we should l
 from utils.kernel import put_kernel_messages_into_queue
 from utils.get_system_info import get_system_info
 from stt import stt_wav
 import asyncio
 from utils.logs import setup_logging
 from utils.logs import logger
@ -33,6 +35,10 @@ RATE = 44100  # Sample rate
 RECORDING = False  # Flag to control recording state
 SPACEBAR_PRESSED = False  # Flag to track spacebar press state
 # Configuration for WebSocket
 WS_URL = os.getenv('SERVER_URL')
 if not WS_URL:
    raise ValueError("The environment variable SERVER_URL is not set. Please set it to proceed.")
 # Specify OS
 current_platform = get_system_info()
@ -125,6 +131,7 @@ def on_release(key):
        logger.info("Exiting...")
        os._exit(0)
 import asyncio
 send_queue = queue.Queue()
@ -137,12 +144,8 @@ async def message_sender(websocket):
 async def websocket_communication(WS_URL):
    while True:
        try:
-            logger.info(f"Connecting to `{WS_URL}` ...")
+            async with websockets.connect(WS_URL) as websocket:
            headers = {"ngrok-skip-browser-warning": str(80), "User-Agent": "project01"} if os.getenv('NGROK_AUTHTOKEN') else {}
            async with websockets.connect(WS_URL, extra_headers=headers) as websocket:
                logger.info("Press the spacebar to start/stop recording. Press ESC to exit.")
                asyncio.create_task(message_sender(websocket))
                initial_message = {"role": None, "type": None, "format": None, "content": None} 
@ -189,19 +192,14 @@ async def websocket_communication(WS_URL):
                            send_queue.put(result)
-        except Exception as e:
+        except:
-            logger.exception(f"An error occurred during websocket communication. {e}")
+            # traceback.print_exc()
            logger.info(f"Connecting to `{WS_URL}`...")
            await asyncio.sleep(2)
 if __name__ == "__main__":
    # Configuration for WebSocket
    async def main():
        WS_URL = os.getenv('SERVER_CONNECTION_URL')
        if not WS_URL:
            raise ValueError("The environment variable SERVER_URL is not set. Please set it to proceed.")
        # Start the WebSocket communication
        asyncio.create_task(websocket_communication(WS_URL))
--- a/OS/01/requirements.txt
+++ b/OS/01/requirements.txt
@ -1,16 +1,12 @@
 git+https://github.com/KillianLucas/open-interpreter.git
-asyncio==3.4.3
+asyncio
-PyAudio==0.2.14
+PyAudio
-pynput==1.7.6
+pynput
-fastapi==0.109.2
+fastapi
-uvicorn==0.27.1
+uvicorn
-websockets==12.0
+websockets
-playsound==1.3.0
+playsound
-python-dotenv==1.0.1
+python-dotenv
-ffmpeg-python==0.2.0
+ffmpeg-python
-textual==0.50.1
+textual
-pydub==0.25.1
+pydub
 ngrok==1.0.0
 wheel
--- a/OS/01/server.py
+++ b/OS/01/server.py
@ -1,12 +1,17 @@
 from starlette.websockets import WebSocketDisconnect
 import ast
 import json
 import time
 import queue
 import os
 import traceback
 from queue import Queue
 from threading import Thread
 import threading
 import uvicorn
 import re
 from fastapi import FastAPI
-from fastapi.responses import PlainTextResponse
+from threading import Thread
 from starlette.websockets import WebSocket
 from stt import stt_bytes
 from tts import tts
@ -16,8 +21,6 @@ import urllib.parse
 from utils.kernel import put_kernel_messages_into_queue
 from i import configure_interpreter
 from interpreter import interpreter
 import ngrok
 import signal
 from utils.logs import setup_logging
 from utils.logs import logger
@ -87,10 +90,6 @@ if os.getenv('CODE_RUNNER') == "device":
 # Configure interpreter
 interpreter = configure_interpreter(interpreter)
@app.get("/ping")
 async def ping():
    return PlainTextResponse("pong")
@app.websocket("/")
 async def websocket_endpoint(websocket: WebSocket):
    await websocket.accept()
@ -234,7 +233,6 @@ from uvicorn import Config, Server
 if __name__ == "__main__":
    async def main():
        # Start listening
        asyncio.create_task(listener())
@ -246,19 +244,6 @@ if __name__ == "__main__":
        if not server_url:
            raise ValueError("The environment variable SERVER_URL is not set. Please set it to proceed.")
        parsed_url = urllib.parse.urlparse(server_url)
        # Set up Ngrok
        ngrok_auth_token = os.getenv('NGROK_AUTHTOKEN')
        if ngrok_auth_token is not None:
            logger.info("Setting up Ngrok")
            ngrok_listener = await ngrok.forward(f"{parsed_url.hostname}:{parsed_url.port}", authtoken=ngrok_auth_token)
            ngrok_parsed_url = urllib.parse.urlparse(ngrok_listener.url())
            # Setup SERVER_URL environment variable for device to use
            connection_url = f"wss://{ngrok_parsed_url.hostname}/"
            logger.info(f"Ngrok established at {ngrok_parsed_url.geturl()}")
            logger.info(f"\033[1mSERVER_CONNECTION_URL should be set to \"{connection_url}\"\033[0m")
        logger.info("Starting `server.py`...")
        config = Config(app, host=parsed_url.hostname, port=parsed_url.port, lifespan='on')
--- a/OS/01/start.sh
+++ b/OS/01/start.sh
@ -3,16 +3,10 @@
 ### SETTINGS
 # If ALL_LOCAL is False, we'll use OpenAI's services
-# else we use whisper.cpp and piper local models
+# If setting ALL_LOCAL to true, set the path to the WHISPER local model
 export ALL_LOCAL=False
-export WHISPER_MODEL_NAME="ggml-tiny.en.bin"
+# export WHISPER_MODEL_PATH=...
-
+# export OPENAI_API_KEY=sk-...
 # Uncomment and set the OpenAI API key for OpenInterpreter to work
 # export OPENAI_API_KEY="sk-..."
 # Expose through Ngrok
 # Uncomment following line with your Ngrok auth token (https://dashboard.ngrok.com/get-started/your-authtoken)
 # export NGROK_AUTHTOKEN="AUTH_TOKEN"
 # For TTS, we use the en_US-lessac-medium voice model by default
 # Please change the voice URL and voice name if you wish to use another voice
@ -22,7 +16,6 @@ export PIPER_VOICE_NAME="en_US-lessac-medium.onnx"
 # If SERVER_START, this is where we'll serve the server.
 # If DEVICE_START, this is where the device expects the server to be.
 export SERVER_URL=ws://localhost:8000/
 export SERVER_CONNECTION_URL=$SERVER_URL # Comment if setting up through Ngrok
 export SERVER_START=True
 export DEVICE_START=True
@ -38,13 +31,9 @@ export SERVER_EXPOSE_PUBLICALLY=False
 # export LOG_LEVEL="DEBUG"
 export LOG_LEVEL="INFO"
 ### SETUP
 # if using local models, install the models / executables
 WHISPER_MODEL_URL="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/"
 WHISPER_RUST_PATH="`pwd`/local_stt/whisper-rust"
 if [[ "$ALL_LOCAL" == "True" ]]; then
    curl -OL "${WHISPER_MODEL_URL}${WHISPER_MODEL_NAME}" --output-dir ${WHISPER_RUST_PATH}
@ -97,14 +86,6 @@ fi
 start_device() {
    echo "Starting device..."
    if [[ -n $NGROK_AUTHTOKEN ]]; then
        echo "Waiting for Ngrok to setup"
        sleep 7
        read -p "Enter the Ngrok URL: " ngrok_url
        export SERVER_CONNECTION_URL=$ngrok_url
        echo "SERVER_CONNECTION_URL set to $SERVER_CONNECTION_URL"
    fi
    python device.py &
    DEVICE_PID=$!
    echo "Device started as process $DEVICE_PID"
@ -132,18 +113,18 @@ stop_processes() {
 # Trap SIGINT and SIGTERM to stop processes when the script is terminated
 trap stop_processes SIGINT SIGTERM
 # SERVER
 # Start server if SERVER_START is True
 if [[ "$SERVER_START" == "True" ]]; then
    start_server
 fi
 # DEVICE
 # Start device if DEVICE_START is True
 if [[ "$DEVICE_START" == "True" ]]; then
    start_device
 fi
 # SERVER
 # Start server if SERVER_START is True
 if [[ "$SERVER_START" == "True" ]]; then
    start_server
 fi
 # Wait for device and server processes to exit
 wait $DEVICE_PID
 wait $SERVER_PID
--- a/OS/01/stt.py
+++ b/OS/01/stt.py
@ -53,14 +53,13 @@ def run_command(command):
    return result.stdout, result.stderr
 def get_transcription_file(wav_file_path: str):
-    whisper_rust_path = os.path.join(os.path.dirname(__file__), 'local_stt', 'whisper-rust')
+    model_path = os.getenv("WHISPER_MODEL_PATH")
-    model_name = os.getenv('WHISPER_MODEL_NAME')
+    if not model_path:
-    if not model_name:
+        raise EnvironmentError("WHISPER_MODEL_PATH environment variable is not set.")
        raise EnvironmentError("WHISPER_MODEL_NAME environment variable is not set.")
    output, error = run_command([
-        os.path.join(whisper_rust_path, 'whisper-rust'),
+        os.path.join(os.path.dirname(__file__), 'local_stt', 'whisper-rust', 'whisper-rust'),
-        '--model-path', os.path.join(whisper_rust_path, model_name),
+        '--model-path', model_path,
        '--file-path', wav_file_path
    ])
--- a/README.md
+++ b/README.md
@ -23,6 +23,8 @@ python -m pip install -r requirements.txt
 ```
 NB: Depending on your local Python version, you may run into [this issue↗](https://github.com/TaylorSMarks/playsound/issues/150) installing playsound. Workarounds are provided in the issue.
 If you want to run local speech-to-text from whisper, download the GGML Whisper model from [Huggingface](https://huggingface.co/ggerganov/whisper.cpp). Then in `OS/01/start.sh`, set `ALL_LOCAL=TRUE` and set `WHISPER_MODEL_PATH` to the path of the model.
 ## Usage
 ```bash
@ -30,7 +32,6 @@ cd OS/01
 bash start.sh
 ```
 If you want to run local text-to-speech and speech-to-text, set `ALL_LOCAL` in the `start.sh` script to True. This will use the [whisper.cpp](https://github.com/ggerganov/whisper.cpp) and [Piper](https://github.com/rhasspy/piper) models.
 <br>
 ## Background