Merge branch 'main' into Raspberry-Pi-button-compatibility-(Thanks-Thea!!!)

pull/26/head
Ty Fiero 11 months ago committed by GitHub
commit cc1761971c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -6,12 +6,15 @@ from starlette.websockets import WebSocket
from queue import Queue from queue import Queue
from pynput import keyboard from pynput import keyboard
import json import json
import traceback
import websockets import websockets
import queue import queue
import pydub
import ast import ast
from pydub import AudioSegment from pydub import AudioSegment
from pydub.playback import play from pydub.playback import play
import io import io
import time
import wave import wave
import tempfile import tempfile
from datetime import datetime from datetime import datetime
@ -19,7 +22,6 @@ from interpreter import interpreter # Just for code execution. Maybe we should l
from utils.kernel import put_kernel_messages_into_queue from utils.kernel import put_kernel_messages_into_queue
from utils.get_system_info import get_system_info from utils.get_system_info import get_system_info
from stt import stt_wav from stt import stt_wav
import asyncio
from utils.logs import setup_logging from utils.logs import setup_logging
from utils.logs import logger from utils.logs import logger
@ -33,6 +35,10 @@ RATE = 44100 # Sample rate
RECORDING = False # Flag to control recording state RECORDING = False # Flag to control recording state
SPACEBAR_PRESSED = False # Flag to track spacebar press state SPACEBAR_PRESSED = False # Flag to track spacebar press state
# Configuration for WebSocket
WS_URL = os.getenv('SERVER_URL')
if not WS_URL:
raise ValueError("The environment variable SERVER_URL is not set. Please set it to proceed.")
# Specify OS # Specify OS
current_platform = get_system_info() current_platform = get_system_info()
@ -125,6 +131,7 @@ def on_release(key):
logger.info("Exiting...") logger.info("Exiting...")
os._exit(0) os._exit(0)
import asyncio
send_queue = queue.Queue() send_queue = queue.Queue()
@ -137,12 +144,8 @@ async def message_sender(websocket):
async def websocket_communication(WS_URL): async def websocket_communication(WS_URL):
while True: while True:
try: try:
logger.info(f"Connecting to `{WS_URL}` ...") async with websockets.connect(WS_URL) as websocket:
headers = {"ngrok-skip-browser-warning": str(80), "User-Agent": "project01"} if os.getenv('NGROK_AUTHTOKEN') else {}
async with websockets.connect(WS_URL, extra_headers=headers) as websocket:
logger.info("Press the spacebar to start/stop recording. Press ESC to exit.") logger.info("Press the spacebar to start/stop recording. Press ESC to exit.")
asyncio.create_task(message_sender(websocket)) asyncio.create_task(message_sender(websocket))
initial_message = {"role": None, "type": None, "format": None, "content": None} initial_message = {"role": None, "type": None, "format": None, "content": None}
@ -189,19 +192,14 @@ async def websocket_communication(WS_URL):
send_queue.put(result) send_queue.put(result)
except Exception as e: except:
logger.exception(f"An error occurred during websocket communication. {e}") # traceback.print_exc()
logger.info(f"Connecting to `{WS_URL}`...") logger.info(f"Connecting to `{WS_URL}`...")
await asyncio.sleep(2) await asyncio.sleep(2)
if __name__ == "__main__": if __name__ == "__main__":
# Configuration for WebSocket
async def main(): async def main():
WS_URL = os.getenv('SERVER_CONNECTION_URL')
if not WS_URL:
raise ValueError("The environment variable SERVER_URL is not set. Please set it to proceed.")
# Start the WebSocket communication # Start the WebSocket communication
asyncio.create_task(websocket_communication(WS_URL)) asyncio.create_task(websocket_communication(WS_URL))

@ -1,16 +1,12 @@
git+https://github.com/KillianLucas/open-interpreter.git git+https://github.com/KillianLucas/open-interpreter.git
asyncio==3.4.3 asyncio
PyAudio==0.2.14 PyAudio
pynput==1.7.6 pynput
fastapi==0.109.2 fastapi
uvicorn==0.27.1 uvicorn
websockets==12.0 websockets
playsound==1.3.0 playsound
python-dotenv==1.0.1 python-dotenv
ffmpeg-python==0.2.0 ffmpeg-python
textual==0.50.1 textual
pydub==0.25.1 pydub
ngrok==1.0.0
wheel

@ -1,12 +1,17 @@
from starlette.websockets import WebSocketDisconnect from starlette.websockets import WebSocketDisconnect
import ast import ast
import json import json
import time
import queue import queue
import os import os
import traceback import traceback
from queue import Queue
from threading import Thread
import threading
import uvicorn
import re import re
from fastapi import FastAPI from fastapi import FastAPI
from fastapi.responses import PlainTextResponse from threading import Thread
from starlette.websockets import WebSocket from starlette.websockets import WebSocket
from stt import stt_bytes from stt import stt_bytes
from tts import tts from tts import tts
@ -16,8 +21,6 @@ import urllib.parse
from utils.kernel import put_kernel_messages_into_queue from utils.kernel import put_kernel_messages_into_queue
from i import configure_interpreter from i import configure_interpreter
from interpreter import interpreter from interpreter import interpreter
import ngrok
import signal
from utils.logs import setup_logging from utils.logs import setup_logging
from utils.logs import logger from utils.logs import logger
@ -87,10 +90,6 @@ if os.getenv('CODE_RUNNER') == "device":
# Configure interpreter # Configure interpreter
interpreter = configure_interpreter(interpreter) interpreter = configure_interpreter(interpreter)
@app.get("/ping")
async def ping():
return PlainTextResponse("pong")
@app.websocket("/") @app.websocket("/")
async def websocket_endpoint(websocket: WebSocket): async def websocket_endpoint(websocket: WebSocket):
await websocket.accept() await websocket.accept()
@ -234,7 +233,6 @@ from uvicorn import Config, Server
if __name__ == "__main__": if __name__ == "__main__":
async def main(): async def main():
# Start listening # Start listening
asyncio.create_task(listener()) asyncio.create_task(listener())
@ -246,19 +244,6 @@ if __name__ == "__main__":
if not server_url: if not server_url:
raise ValueError("The environment variable SERVER_URL is not set. Please set it to proceed.") raise ValueError("The environment variable SERVER_URL is not set. Please set it to proceed.")
parsed_url = urllib.parse.urlparse(server_url) parsed_url = urllib.parse.urlparse(server_url)
# Set up Ngrok
ngrok_auth_token = os.getenv('NGROK_AUTHTOKEN')
if ngrok_auth_token is not None:
logger.info("Setting up Ngrok")
ngrok_listener = await ngrok.forward(f"{parsed_url.hostname}:{parsed_url.port}", authtoken=ngrok_auth_token)
ngrok_parsed_url = urllib.parse.urlparse(ngrok_listener.url())
# Setup SERVER_URL environment variable for device to use
connection_url = f"wss://{ngrok_parsed_url.hostname}/"
logger.info(f"Ngrok established at {ngrok_parsed_url.geturl()}")
logger.info(f"\033[1mSERVER_CONNECTION_URL should be set to \"{connection_url}\"\033[0m")
logger.info("Starting `server.py`...") logger.info("Starting `server.py`...")
config = Config(app, host=parsed_url.hostname, port=parsed_url.port, lifespan='on') config = Config(app, host=parsed_url.hostname, port=parsed_url.port, lifespan='on')

@ -3,16 +3,10 @@
### SETTINGS ### SETTINGS
# If ALL_LOCAL is False, we'll use OpenAI's services # If ALL_LOCAL is False, we'll use OpenAI's services
# else we use whisper.cpp and piper local models # If setting ALL_LOCAL to true, set the path to the WHISPER local model
export ALL_LOCAL=False export ALL_LOCAL=False
export WHISPER_MODEL_NAME="ggml-tiny.en.bin" # export WHISPER_MODEL_PATH=...
# export OPENAI_API_KEY=sk-...
# Uncomment and set the OpenAI API key for OpenInterpreter to work
# export OPENAI_API_KEY="sk-..."
# Expose through Ngrok
# Uncomment following line with your Ngrok auth token (https://dashboard.ngrok.com/get-started/your-authtoken)
# export NGROK_AUTHTOKEN="AUTH_TOKEN"
# For TTS, we use the en_US-lessac-medium voice model by default # For TTS, we use the en_US-lessac-medium voice model by default
# Please change the voice URL and voice name if you wish to use another voice # Please change the voice URL and voice name if you wish to use another voice
@ -22,7 +16,6 @@ export PIPER_VOICE_NAME="en_US-lessac-medium.onnx"
# If SERVER_START, this is where we'll serve the server. # If SERVER_START, this is where we'll serve the server.
# If DEVICE_START, this is where the device expects the server to be. # If DEVICE_START, this is where the device expects the server to be.
export SERVER_URL=ws://localhost:8000/ export SERVER_URL=ws://localhost:8000/
export SERVER_CONNECTION_URL=$SERVER_URL # Comment if setting up through Ngrok
export SERVER_START=True export SERVER_START=True
export DEVICE_START=True export DEVICE_START=True
@ -38,13 +31,9 @@ export SERVER_EXPOSE_PUBLICALLY=False
# export LOG_LEVEL="DEBUG" # export LOG_LEVEL="DEBUG"
export LOG_LEVEL="INFO" export LOG_LEVEL="INFO"
### SETUP ### SETUP
# if using local models, install the models / executables # if using local models, install the models / executables
WHISPER_MODEL_URL="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/"
WHISPER_RUST_PATH="`pwd`/local_stt/whisper-rust"
if [[ "$ALL_LOCAL" == "True" ]]; then if [[ "$ALL_LOCAL" == "True" ]]; then
curl -OL "${WHISPER_MODEL_URL}${WHISPER_MODEL_NAME}" --output-dir ${WHISPER_RUST_PATH} curl -OL "${WHISPER_MODEL_URL}${WHISPER_MODEL_NAME}" --output-dir ${WHISPER_RUST_PATH}
@ -97,14 +86,6 @@ fi
start_device() { start_device() {
echo "Starting device..." echo "Starting device..."
if [[ -n $NGROK_AUTHTOKEN ]]; then
echo "Waiting for Ngrok to setup"
sleep 7
read -p "Enter the Ngrok URL: " ngrok_url
export SERVER_CONNECTION_URL=$ngrok_url
echo "SERVER_CONNECTION_URL set to $SERVER_CONNECTION_URL"
fi
python device.py & python device.py &
DEVICE_PID=$! DEVICE_PID=$!
echo "Device started as process $DEVICE_PID" echo "Device started as process $DEVICE_PID"
@ -132,18 +113,18 @@ stop_processes() {
# Trap SIGINT and SIGTERM to stop processes when the script is terminated # Trap SIGINT and SIGTERM to stop processes when the script is terminated
trap stop_processes SIGINT SIGTERM trap stop_processes SIGINT SIGTERM
# SERVER
# Start server if SERVER_START is True
if [[ "$SERVER_START" == "True" ]]; then
start_server
fi
# DEVICE # DEVICE
# Start device if DEVICE_START is True # Start device if DEVICE_START is True
if [[ "$DEVICE_START" == "True" ]]; then if [[ "$DEVICE_START" == "True" ]]; then
start_device start_device
fi fi
# SERVER
# Start server if SERVER_START is True
if [[ "$SERVER_START" == "True" ]]; then
start_server
fi
# Wait for device and server processes to exit # Wait for device and server processes to exit
wait $DEVICE_PID wait $DEVICE_PID
wait $SERVER_PID wait $SERVER_PID

@ -53,14 +53,13 @@ def run_command(command):
return result.stdout, result.stderr return result.stdout, result.stderr
def get_transcription_file(wav_file_path: str): def get_transcription_file(wav_file_path: str):
whisper_rust_path = os.path.join(os.path.dirname(__file__), 'local_stt', 'whisper-rust') model_path = os.getenv("WHISPER_MODEL_PATH")
model_name = os.getenv('WHISPER_MODEL_NAME') if not model_path:
if not model_name: raise EnvironmentError("WHISPER_MODEL_PATH environment variable is not set.")
raise EnvironmentError("WHISPER_MODEL_NAME environment variable is not set.")
output, error = run_command([ output, error = run_command([
os.path.join(whisper_rust_path, 'whisper-rust'), os.path.join(os.path.dirname(__file__), 'local_stt', 'whisper-rust', 'whisper-rust'),
'--model-path', os.path.join(whisper_rust_path, model_name), '--model-path', model_path,
'--file-path', wav_file_path '--file-path', wav_file_path
]) ])

@ -23,6 +23,8 @@ python -m pip install -r requirements.txt
``` ```
NB: Depending on your local Python version, you may run into [this issue↗](https://github.com/TaylorSMarks/playsound/issues/150) installing playsound. Workarounds are provided in the issue. NB: Depending on your local Python version, you may run into [this issue↗](https://github.com/TaylorSMarks/playsound/issues/150) installing playsound. Workarounds are provided in the issue.
If you want to run local speech-to-text from whisper, download the GGML Whisper model from [Huggingface](https://huggingface.co/ggerganov/whisper.cpp). Then in `OS/01/start.sh`, set `ALL_LOCAL=TRUE` and set `WHISPER_MODEL_PATH` to the path of the model.
## Usage ## Usage
```bash ```bash
@ -30,7 +32,6 @@ cd OS/01
bash start.sh bash start.sh
``` ```
If you want to run local text-to-speech and speech-to-text, set `ALL_LOCAL` in the `start.sh` script to True. This will use the [whisper.cpp](https://github.com/ggerganov/whisper.cpp) and [Piper](https://github.com/rhasspy/piper) models.
<br> <br>
## Background ## Background

Loading…
Cancel
Save