Merge branch 'main' into u/zabirauf/ngrok

pull/21/head
Zohaib Rauf 12 months ago committed by GitHub
commit cb989eef70
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

2
.gitignore vendored

@ -1,5 +1,5 @@
ggml-*.bin ggml-*.bin
OS/01/local_tts/*
# Byte-compiled / optimized / DLL files # Byte-compiled / optimized / DLL files
__pycache__/ __pycache__/
*.py[cod] *.py[cod]

@ -1,7 +1,6 @@
import asyncio import asyncio
import threading import threading
import os import os
import logging
import pyaudio import pyaudio
from starlette.websockets import WebSocket from starlette.websockets import WebSocket
from queue import Queue from queue import Queue
@ -21,8 +20,9 @@ from utils.kernel import put_kernel_messages_into_queue
from stt import stt_wav from stt import stt_wav
import asyncio import asyncio
# Configure logging from utils.logs import setup_logging
logging.basicConfig(format='%(message)s', level=logging.getLevelName(os.getenv('DEBUG_LEVEL', 'INFO').upper())) from utils.logs import logger
setup_logging()
# Configuration for Audio Recording # Configuration for Audio Recording
CHUNK = 1024 # Record in chunks of 1024 samples CHUNK = 1024 # Record in chunks of 1024 samples
@ -49,7 +49,7 @@ def record_audio():
"""Record audio from the microphone and add it to the queue.""" """Record audio from the microphone and add it to the queue."""
stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
logging.info("Recording started...") logger.info("Recording started...")
global RECORDING global RECORDING
# Create a temporary WAV file to store the audio data # Create a temporary WAV file to store the audio data
@ -67,7 +67,7 @@ def record_audio():
wav_file.close() wav_file.close()
stream.stop_stream() stream.stop_stream()
stream.close() stream.close()
logging.info("Recording stopped.") logger.info("Recording stopped.")
duration = wav_file.getnframes() / RATE duration = wav_file.getnframes() / RATE
if duration < 0.3: if duration < 0.3:
@ -118,7 +118,7 @@ def on_release(key):
if key == keyboard.Key.space: if key == keyboard.Key.space:
toggle_recording(False) toggle_recording(False)
elif key == keyboard.Key.esc: elif key == keyboard.Key.esc:
logging.info("Exiting...") logger.info("Exiting...")
os._exit(0) os._exit(0)
@ -133,11 +133,12 @@ async def message_sender(websocket):
async def websocket_communication(WS_URL): async def websocket_communication(WS_URL):
while True: while True:
try: try:
logging.info(f"Connecting to `{WS_URL}` ...") logger.info(f"Connecting to `{WS_URL}` ...")
headers = {"ngrok-skip-browser-warning": str(80), "User-Agent": "project01"} if os.getenv('NGROK_AUTHTOKEN') else {} headers = {"ngrok-skip-browser-warning": str(80), "User-Agent": "project01"} if os.getenv('NGROK_AUTHTOKEN') else {}
async with websockets.connect(WS_URL, extra_headers=headers) as websocket: async with websockets.connect(WS_URL, extra_headers=headers) as websocket:
logging.info("Press the spacebar to start/stop recording. Press ESC to exit.") logger.info("Press the spacebar to start/stop recording. Press ESC to exit.")
asyncio.create_task(message_sender(websocket)) asyncio.create_task(message_sender(websocket))
initial_message = {"role": None, "type": None, "format": None, "content": None} initial_message = {"role": None, "type": None, "format": None, "content": None}
@ -146,16 +147,18 @@ async def websocket_communication(WS_URL):
while True: while True:
message = await websocket.recv() message = await websocket.recv()
logging.info(f"Got this message from the server: {type(message)} {message}") logger.debug(f"Got this message from the server: {type(message)} {message}")
if type(message) == str: if type(message) == str:
message = json.loads(message) message = json.loads(message)
if message.get("end"): if message.get("end"):
logging.info(f"Complete message from the server: {message_so_far}") logger.debug(f"Complete message from the server: {message_so_far}")
logger.info("\n")
message_so_far = initial_message message_so_far = initial_message
if "content" in message: if "content" in message:
print(message['content'], end="", flush=True)
if any(message_so_far[key] != message[key] for key in message_so_far if key != "content"): if any(message_so_far[key] != message[key] for key in message_so_far if key != "content"):
message_so_far = message message_so_far = message
else: else:
@ -181,6 +184,7 @@ async def websocket_communication(WS_URL):
result = interpreter.computer.run(language, code) result = interpreter.computer.run(language, code)
send_queue.put(result) send_queue.put(result)
except Exception as e: except Exception as e:
logging.exception(f"An error occurred during websocket communication. {e}") logging.exception(f"An error occurred during websocket communication. {e}")
logging.info(f"Connecting to `{WS_URL}`...") logging.info(f"Connecting to `{WS_URL}`...")

@ -3,7 +3,6 @@ import ast
import json import json
import queue import queue
import os import os
import logging
import traceback import traceback
import re import re
from fastapi import FastAPI from fastapi import FastAPI
@ -20,8 +19,10 @@ from interpreter import interpreter
import ngrok import ngrok
import signal import signal
# Configure logging from utils.logs import setup_logging
logging.basicConfig(format='%(message)s', level=logging.getLevelName(os.getenv('DEBUG_LEVEL', 'INFO').upper())) from utils.logs import logger
setup_logging()
app = FastAPI() app = FastAPI()
@ -65,10 +66,10 @@ if os.getenv('CODE_RUNNER') == "device":
to_device.put({"role": "assistant", "type": "code", "format": "python", "end": True}) to_device.put({"role": "assistant", "type": "code", "format": "python", "end": True})
# Stream the response # Stream the response
logging.info("Waiting for the device to respond...") logger.info("Waiting for the device to respond...")
while True: while True:
chunk = from_computer.get() chunk = from_computer.get()
logging.info(f"Server received from device: {chunk}") logger.info(f"Server received from device: {chunk}")
if "end" in chunk: if "end" in chunk:
break break
yield chunk yield chunk
@ -99,7 +100,7 @@ async def websocket_endpoint(websocket: WebSocket):
await asyncio.gather(receive_task, send_task) await asyncio.gather(receive_task, send_task)
except Exception as e: except Exception as e:
traceback.print_exc() traceback.print_exc()
logging.info(f"Connection lost. Error: {e}") logger.info(f"Connection lost. Error: {e}")
async def receive_messages(websocket: WebSocket): async def receive_messages(websocket: WebSocket):
while True: while True:
@ -114,7 +115,7 @@ async def receive_messages(websocket: WebSocket):
async def send_messages(websocket: WebSocket): async def send_messages(websocket: WebSocket):
while True: while True:
message = await to_device.get() message = await to_device.get()
logging.debug(f"Sending to the device: {type(message)} {message}") logger.debug(f"Sending to the device: {type(message)} {message}")
await websocket.send_json(message) await websocket.send_json(message)
async def listener(): async def listener():
@ -164,7 +165,7 @@ async def listener():
for chunk in interpreter.chat(messages, stream=True, display=False): for chunk in interpreter.chat(messages, stream=True, display=False):
logging.debug("Got chunk:", chunk) logger.debug("Got chunk:", chunk)
# Send it to the user # Send it to the user
await to_device.put(chunk) await to_device.put(chunk)
@ -200,7 +201,7 @@ async def listener():
with open(conversation_history_path, 'w') as file: with open(conversation_history_path, 'w') as file:
json.dump(interpreter.messages, file, indent=4) json.dump(interpreter.messages, file, indent=4)
logging.info("New user message recieved. Breaking.") logger.info("New user message recieved. Breaking.")
break break
# Also check if there's any new computer messages # Also check if there's any new computer messages
@ -209,7 +210,7 @@ async def listener():
with open(conversation_history_path, 'w') as file: with open(conversation_history_path, 'w') as file:
json.dump(interpreter.messages, file, indent=4) json.dump(interpreter.messages, file, indent=4)
logging.info("New computer message recieved. Breaking.") logger.info("New computer message recieved. Breaking.")
break break
else: else:
with open(conversation_history_path, 'w') as file: with open(conversation_history_path, 'w') as file:
@ -249,16 +250,16 @@ if __name__ == "__main__":
# Set up Ngrok # Set up Ngrok
ngrok_auth_token = os.getenv('NGROK_AUTHTOKEN') ngrok_auth_token = os.getenv('NGROK_AUTHTOKEN')
if ngrok_auth_token is not None: if ngrok_auth_token is not None:
logging.info("Setting up Ngrok") logger.info("Setting up Ngrok")
ngrok_listener = await ngrok.forward(f"{parsed_url.hostname}:{parsed_url.port}", authtoken=ngrok_auth_token) ngrok_listener = await ngrok.forward(f"{parsed_url.hostname}:{parsed_url.port}", authtoken=ngrok_auth_token)
ngrok_parsed_url = urllib.parse.urlparse(ngrok_listener.url()) ngrok_parsed_url = urllib.parse.urlparse(ngrok_listener.url())
# Setup SERVER_URL environment variable for device to use # Setup SERVER_URL environment variable for device to use
connection_url = f"wss://{ngrok_parsed_url.hostname}/" connection_url = f"wss://{ngrok_parsed_url.hostname}/"
logging.info(f"Ngrok established at {ngrok_parsed_url.geturl()}") logger.info(f"Ngrok established at {ngrok_parsed_url.geturl()}")
logging.info(f"\033[1mSERVER_CONNECTION_URL should be set to \"{connection_url}\"\033[0m") logger.info(f"\033[1mSERVER_CONNECTION_URL should be set to \"{connection_url}\"\033[0m")
logging.info("Starting `server.py`...") logger.info("Starting `server.py`...")
config = Config(app, host=parsed_url.hostname, port=parsed_url.port, lifespan='on') config = Config(app, host=parsed_url.hostname, port=parsed_url.port, lifespan='on')
server = Server(config) server = Server(config)

@ -10,6 +10,11 @@ export OPENAI_API_KEY="sk-..."
# Uncomment following line with your Ngrok auth token (https://dashboard.ngrok.com/get-started/your-authtoken) # Uncomment following line with your Ngrok auth token (https://dashboard.ngrok.com/get-started/your-authtoken)
# export NGROK_AUTHTOKEN="AUTH_TOKEN" # export NGROK_AUTHTOKEN="AUTH_TOKEN"
# For TTS, we use the en_US-lessac-medium voice model by default
# Please change the voice URL and voice name if you wish to use another voice
export PIPER_VOICE_URL="https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/"
export PIPER_VOICE_NAME="en_US-lessac-medium.onnx"
# If SERVER_START, this is where we'll serve the server. # If SERVER_START, this is where we'll serve the server.
# If DEVICE_START, this is where the device expects the server to be. # If DEVICE_START, this is where the device expects the server to be.
export SERVER_URL=ws://localhost:8000/ export SERVER_URL=ws://localhost:8000/
@ -26,12 +31,52 @@ export STT_RUNNER=device # If server, audio will be sent over websocket.
export SERVER_EXPOSE_PUBLICALLY=False export SERVER_EXPOSE_PUBLICALLY=False
# Debug level # Debug level
# export DEBUG_LEVEL=DEBUG # export LOG_LEVEL=DEBUG
export DEBUG_LEVEL="INFO" export LOG_LEVEL="INFO"
### SETUP ### SETUP
# if using local models, install the models / executables
if [[ "$ALL_LOCAL" == "True" ]]; then
OS=$(uname -s)
ARCH=$(uname -m)
if [ "$OS" = "Darwin" ]; then
OS="macos"
if [ "$ARCH" = "arm64" ]; then
ARCH="aarch64"
elif [ "$ARCH" = "x86_64" ]; then
ARCH="x64"
else
echo "Piper: unsupported architecture"
fi
fi
PIPER_ASSETNAME="piper_${OS}_${ARCH}.tar.gz"
PIPER_URL="https://github.com/rhasspy/piper/releases/latest/download/"
mkdir local_tts
cd local_tts
curl -OL "${PIPER_URL}${PIPER_ASSETNAME}"
tar -xvzf $PIPER_ASSETNAME
cd piper
if [ "$OS" = "macos" ]; then
if [ "$ARCH" = "x64" ]; then
softwareupdate --install-rosetta --agree-to-license
fi
PIPER_PHONEMIZE_ASSETNAME="piper-phonemize_${OS}_${ARCH}.tar.gz"
PIPER_PHONEMIZE_URL="https://github.com/rhasspy/piper-phonemize/releases/latest/download/"
curl -OL "${PIPER_PHONEMIZE_URL}${PIPER_PHONEMIZE_ASSETNAME}"
tar -xvzf $PIPER_PHONEMIZE_ASSETNAME
curl -OL "${PIPER_VOICE_URL}${PIPER_VOICE_NAME}"
curl -OL "${PIPER_VOICE_URL}${PIPER_VOICE_NAME}.json"
PIPER_DIR=`pwd`
install_name_tool -change @rpath/libespeak-ng.1.dylib "${PIPER_DIR}/piper-phonemize/lib/libespeak-ng.1.dylib" "${PIPER_DIR}/piper"
install_name_tool -change @rpath/libonnxruntime.1.14.1.dylib "${PIPER_DIR}/piper-phonemize/lib/libonnxruntime.1.14.1.dylib" "${PIPER_DIR}/piper"
install_name_tool -change @rpath/libpiper_phonemize.1.dylib "${PIPER_DIR}/piper-phonemize/lib/libpiper_phonemize.1.dylib" "${PIPER_DIR}/piper"
fi
cd ../..
fi
# (for dev, reset the ports we were using) # (for dev, reset the ports we were using)
SERVER_PORT=$(echo $SERVER_URL | grep -oE "[0-9]+") SERVER_PORT=$(echo $SERVER_URL | grep -oE "[0-9]+")

@ -4,7 +4,6 @@ Defines a function which takes a path to an audio file and turns it into text.
from datetime import datetime from datetime import datetime
import os import os
import logging
import contextlib import contextlib
import tempfile import tempfile
import ffmpeg import ffmpeg
@ -12,8 +11,9 @@ import subprocess
import openai import openai
from openai import OpenAI from openai import OpenAI
# Configure logging from utils.logs import setup_logging
logging.basicConfig(format='%(message)s', level=logging.getLevelName(os.getenv('DEBUG_LEVEL', 'INFO').upper())) from utils.logs import logger
setup_logging()
client = OpenAI() client = OpenAI()
@ -85,10 +85,10 @@ def stt_wav(wav_file_path: str):
response_format="text" response_format="text"
) )
except openai.BadRequestError as e: except openai.BadRequestError as e:
logging.info(f"openai.BadRequestError: {e}") logger.info(f"openai.BadRequestError: {e}")
return None return None
logging.info(f"Transcription result: {transcript}") logger.info(f"Transcription result: {transcript}")
return transcript return transcript
else: else:
temp_dir = tempfile.gettempdir() temp_dir = tempfile.gettempdir()

@ -7,20 +7,37 @@ from openai import OpenAI
from pydub import AudioSegment from pydub import AudioSegment
from pydub.playback import play from pydub.playback import play
from playsound import playsound from playsound import playsound
import os
import subprocess
import tempfile
client = OpenAI() client = OpenAI()
def tts(text, play_audio): def tts(text, play_audio):
response = client.audio.speech.create( if os.getenv('ALL_LOCAL') == 'False':
model="tts-1", response = client.audio.speech.create(
voice="alloy", model="tts-1",
input=text, voice="alloy",
response_format="mp3" input=text,
) response_format="mp3"
with tempfile.NamedTemporaryFile(suffix=".mp3") as temp_file: )
response.stream_to_file(temp_file.name) with tempfile.NamedTemporaryFile(suffix=".mp3") as temp_file:
response.stream_to_file(temp_file.name)
if play_audio:
playsound(temp_file.name) if play_audio:
playsound(temp_file.name)
return temp_file.read()
return temp_file.read()
else:
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
output_file = temp_file.name
piper_dir = os.path.join(os.path.dirname(__file__), 'local_tts', 'piper')
subprocess.run([
os.path.join(piper_dir, 'piper'),
'--model', os.path.join(piper_dir, os.getenv('PIPER_VOICE_NAME')),
'--output_file', output_file
], input=text, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if play_audio:
playsound(temp_file.name)
return temp_file.read()

@ -1,11 +1,10 @@
import asyncio import asyncio
import subprocess import subprocess
import platform import platform
import os
import logging
# Configure logging from utils.logs import setup_logging
logging.basicConfig(format='%(message)s', level=logging.getLevelName(os.getenv('DEBUG_LEVEL', 'INFO').upper())) from utils.logs import logger
setup_logging()
def get_kernel_messages(): def get_kernel_messages():
""" """
@ -21,7 +20,7 @@ def get_kernel_messages():
with open('/var/log/dmesg', 'r') as file: with open('/var/log/dmesg', 'r') as file:
return file.read() return file.read()
else: else:
logging.info("Unsupported platform.") logger.info("Unsupported platform.")
def custom_filter(message): def custom_filter(message):
# Check for {TO_INTERPRETER{ message here }TO_INTERPRETER} pattern # Check for {TO_INTERPRETER{ message here }TO_INTERPRETER} pattern
@ -33,7 +32,7 @@ def custom_filter(message):
elif 'USB' in message: elif 'USB' in message:
return message return message
# Check for network related keywords # Check for network related keywords
elif any(keyword in message for keyword in ['network', 'IP', 'internet', 'LAN', 'WAN', 'router', 'switch']): elif any(keyword in message for keyword in ['network', 'IP', 'internet', 'LAN', 'WAN', 'router', 'switch']) and "networkStatusForFlags" not in message:
return message return message
else: else:
return None return None

@ -0,0 +1,22 @@
import os
import logging
logger: logging.Logger = logging.getLogger("01")
root_logger: logging.Logger = logging.getLogger()
def _basic_config() -> None:
logging.basicConfig(
format="%(message)s"
)
def setup_logging() -> None:
env = os.environ.get("LOG_LEVEL", "").upper()
if env == "DEBUG":
_basic_config()
logger.setLevel(logging.DEBUG)
root_logger.setLevel(logging.DEBUG)
elif env == "INFO":
_basic_config()
logger.setLevel(logging.INFO)

@ -21,6 +21,7 @@ sudo apt-get install portaudio19-dev libav-tools
```bash ```bash
python -m pip install -r requirements.txt python -m pip install -r requirements.txt
``` ```
NB: Depending on your local Python version, you may run into [this issue↗](https://github.com/TaylorSMarks/playsound/issues/150) installing playsound. Workarounds are provided in the issue.
If you want to run local speech-to-text from whisper, download the GGML Whisper model from [Huggingface](https://huggingface.co/ggerganov/whisper.cpp). Then in `OS/01/start.sh`, set `ALL_LOCAL=TRUE` and set `WHISPER_MODEL_PATH` to the path of the model. If you want to run local speech-to-text from whisper, download the GGML Whisper model from [Huggingface](https://huggingface.co/ggerganov/whisper.cpp). Then in `OS/01/start.sh`, set `ALL_LOCAL=TRUE` and set `WHISPER_MODEL_PATH` to the path of the model.

@ -0,0 +1,22 @@
# Development Setup for Jetson Nano
1. Go through the tutorial here: https://developer.nvidia.com/embedded/learn/get-started-jetson-nano-devkit#intro
2. At the end of that guide, you should have a Jetson running off a power supply or micro USB.
3. Get network connectivity. The Jetson does not have a WiFi module so you will need to plug in ethernet.
If you have a laptop, you can share internet access over Ethernet.
To do this with Mac, do the following:
a. Plug a cable from the Jetson Ethernet port to your Mac (you can use a Ethernet -> USB converter for your Mac).
b. Go to General->Sharing, then click the little `(i)` icon next to "Internet Sharing", and check all the options.
![](mac-share-internet.png)
c. Go back to General->Sharing, and turn on "Internet Sharing".
![](mac-share-internet-v2.png)
d. Now the Jetson should have connectivity!

Binary file not shown.

After

Width:  |  Height:  |  Size: 470 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 702 KiB

Loading…
Cancel
Save