Merge branch 'main' into feature/dotenv

pull/27/head
Tom Chapin 11 months ago
commit 3d6123a714

2
.gitignore vendored

@ -1,5 +1,5 @@
ggml-*.bin ggml-*.bin
OS/01/local_tts/*
# Byte-compiled / optimized / DLL files # Byte-compiled / optimized / DLL files
__pycache__/ __pycache__/
*.py[cod] *.py[cod]

@ -1,7 +1,6 @@
import asyncio import asyncio
import threading import threading
import os import os
import logging
import pyaudio import pyaudio
from starlette.websockets import WebSocket from starlette.websockets import WebSocket
from queue import Queue from queue import Queue
@ -23,8 +22,9 @@ from interpreter import interpreter # Just for code execution. Maybe we should l
from utils.kernel import put_kernel_messages_into_queue from utils.kernel import put_kernel_messages_into_queue
from stt import stt_wav from stt import stt_wav
# Configure logging from utils.logs import setup_logging
logging.basicConfig(format='%(message)s', level=logging.getLevelName(os.getenv('DEBUG_LEVEL', 'INFO').upper())) from utils.logs import logger
setup_logging()
# Configuration for Audio Recording # Configuration for Audio Recording
CHUNK = 1024 # Record in chunks of 1024 samples CHUNK = 1024 # Record in chunks of 1024 samples
@ -55,7 +55,7 @@ def record_audio():
"""Record audio from the microphone and add it to the queue.""" """Record audio from the microphone and add it to the queue."""
stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
logging.info("Recording started...") logger.info("Recording started...")
global RECORDING global RECORDING
# Create a temporary WAV file to store the audio data # Create a temporary WAV file to store the audio data
@ -73,7 +73,7 @@ def record_audio():
wav_file.close() wav_file.close()
stream.stop_stream() stream.stop_stream()
stream.close() stream.close()
logging.info("Recording stopped.") logger.info("Recording stopped.")
duration = wav_file.getnframes() / RATE duration = wav_file.getnframes() / RATE
if duration < 0.3: if duration < 0.3:
@ -124,7 +124,7 @@ def on_release(key):
if key == keyboard.Key.space: if key == keyboard.Key.space:
toggle_recording(False) toggle_recording(False)
elif key == keyboard.Key.esc: elif key == keyboard.Key.esc:
logging.info("Exiting...") logger.info("Exiting...")
os._exit(0) os._exit(0)
import asyncio import asyncio
@ -141,7 +141,7 @@ async def websocket_communication(WS_URL):
while True: while True:
try: try:
async with websockets.connect(WS_URL) as websocket: async with websockets.connect(WS_URL) as websocket:
logging.info("Press the spacebar to start/stop recording. Press ESC to exit.") logger.info("Press the spacebar to start/stop recording. Press ESC to exit.")
asyncio.create_task(message_sender(websocket)) asyncio.create_task(message_sender(websocket))
initial_message = {"role": None, "type": None, "format": None, "content": None} initial_message = {"role": None, "type": None, "format": None, "content": None}
@ -150,16 +150,18 @@ async def websocket_communication(WS_URL):
while True: while True:
message = await websocket.recv() message = await websocket.recv()
logging.info(f"Got this message from the server: {type(message)} {message}") logger.debug(f"Got this message from the server: {type(message)} {message}")
if type(message) == str: if type(message) == str:
message = json.loads(message) message = json.loads(message)
if message.get("end"): if message.get("end"):
logging.info(f"Complete message from the server: {message_so_far}") logger.debug(f"Complete message from the server: {message_so_far}")
logger.info("\n")
message_so_far = initial_message message_so_far = initial_message
if "content" in message: if "content" in message:
print(message['content'], end="", flush=True)
if any(message_so_far[key] != message[key] for key in message_so_far if key != "content"): if any(message_so_far[key] != message[key] for key in message_so_far if key != "content"):
message_so_far = message message_so_far = message
else: else:
@ -187,7 +189,7 @@ async def websocket_communication(WS_URL):
except: except:
# traceback.print_exc() # traceback.print_exc()
logging.info(f"Connecting to `{WS_URL}`...") logger.info(f"Connecting to `{WS_URL}`...")
await asyncio.sleep(2) await asyncio.sleep(2)

@ -4,7 +4,6 @@ import json
import time import time
import queue import queue
import os import os
import logging
import traceback import traceback
from queue import Queue from queue import Queue
from threading import Thread from threading import Thread
@ -23,8 +22,10 @@ from utils.kernel import put_kernel_messages_into_queue
from i import configure_interpreter from i import configure_interpreter
from interpreter import interpreter from interpreter import interpreter
# Configure logging from utils.logs import setup_logging
logging.basicConfig(format='%(message)s', level=logging.getLevelName(os.getenv('DEBUG_LEVEL', 'INFO').upper())) from utils.logs import logger
setup_logging()
app = FastAPI() app = FastAPI()
@ -68,10 +69,10 @@ if os.getenv('CODE_RUNNER') == "device":
to_device.put({"role": "assistant", "type": "code", "format": "python", "end": True}) to_device.put({"role": "assistant", "type": "code", "format": "python", "end": True})
# Stream the response # Stream the response
logging.info("Waiting for the device to respond...") logger.info("Waiting for the device to respond...")
while True: while True:
chunk = from_computer.get() chunk = from_computer.get()
logging.info(f"Server received from device: {chunk}") logger.info(f"Server received from device: {chunk}")
if "end" in chunk: if "end" in chunk:
break break
yield chunk yield chunk
@ -98,7 +99,7 @@ async def websocket_endpoint(websocket: WebSocket):
await asyncio.gather(receive_task, send_task) await asyncio.gather(receive_task, send_task)
except Exception as e: except Exception as e:
traceback.print_exc() traceback.print_exc()
logging.info(f"Connection lost. Error: {e}") logger.info(f"Connection lost. Error: {e}")
async def receive_messages(websocket: WebSocket): async def receive_messages(websocket: WebSocket):
while True: while True:
@ -113,7 +114,7 @@ async def receive_messages(websocket: WebSocket):
async def send_messages(websocket: WebSocket): async def send_messages(websocket: WebSocket):
while True: while True:
message = await to_device.get() message = await to_device.get()
logging.debug(f"Sending to the device: {type(message)} {message}") logger.debug(f"Sending to the device: {type(message)} {message}")
await websocket.send_json(message) await websocket.send_json(message)
async def listener(): async def listener():
@ -163,7 +164,7 @@ async def listener():
for chunk in interpreter.chat(messages, stream=True, display=False): for chunk in interpreter.chat(messages, stream=True, display=False):
logging.debug("Got chunk:", chunk) logger.debug("Got chunk:", chunk)
# Send it to the user # Send it to the user
await to_device.put(chunk) await to_device.put(chunk)
@ -199,7 +200,7 @@ async def listener():
with open(conversation_history_path, 'w') as file: with open(conversation_history_path, 'w') as file:
json.dump(interpreter.messages, file, indent=4) json.dump(interpreter.messages, file, indent=4)
logging.info("New user message recieved. Breaking.") logger.info("New user message recieved. Breaking.")
break break
# Also check if there's any new computer messages # Also check if there's any new computer messages
@ -208,7 +209,7 @@ async def listener():
with open(conversation_history_path, 'w') as file: with open(conversation_history_path, 'w') as file:
json.dump(interpreter.messages, file, indent=4) json.dump(interpreter.messages, file, indent=4)
logging.info("New computer message recieved. Breaking.") logger.info("New computer message recieved. Breaking.")
break break
else: else:
with open(conversation_history_path, 'w') as file: with open(conversation_history_path, 'w') as file:
@ -243,7 +244,7 @@ if __name__ == "__main__":
if not server_url: if not server_url:
raise ValueError("The environment variable SERVER_URL is not set. Please set it to proceed.") raise ValueError("The environment variable SERVER_URL is not set. Please set it to proceed.")
parsed_url = urllib.parse.urlparse(server_url) parsed_url = urllib.parse.urlparse(server_url)
logging.info("Starting `server.py`...") logger.info("Starting `server.py`...")
config = Config(app, host=parsed_url.hostname, port=parsed_url.port, lifespan='on') config = Config(app, host=parsed_url.hostname, port=parsed_url.port, lifespan='on')
server = Server(config) server = Server(config)

@ -5,8 +5,49 @@ if [ ! -f ".env" ]; then
fi fi
set -a; source .env; set +a set -a; source .env; set +a
### SETUP ### SETUP
# if using local models, install the models / executables
if [[ "$ALL_LOCAL" == "True" ]]; then
OS=$(uname -s)
ARCH=$(uname -m)
if [ "$OS" = "Darwin" ]; then
OS="macos"
if [ "$ARCH" = "arm64" ]; then
ARCH="aarch64"
elif [ "$ARCH" = "x86_64" ]; then
ARCH="x64"
else
echo "Piper: unsupported architecture"
fi
fi
PIPER_ASSETNAME="piper_${OS}_${ARCH}.tar.gz"
PIPER_URL="https://github.com/rhasspy/piper/releases/latest/download/"
mkdir local_tts
cd local_tts
curl -OL "${PIPER_URL}${PIPER_ASSETNAME}"
tar -xvzf $PIPER_ASSETNAME
cd piper
if [ "$OS" = "macos" ]; then
if [ "$ARCH" = "x64" ]; then
softwareupdate --install-rosetta --agree-to-license
fi
PIPER_PHONEMIZE_ASSETNAME="piper-phonemize_${OS}_${ARCH}.tar.gz"
PIPER_PHONEMIZE_URL="https://github.com/rhasspy/piper-phonemize/releases/latest/download/"
curl -OL "${PIPER_PHONEMIZE_URL}${PIPER_PHONEMIZE_ASSETNAME}"
tar -xvzf $PIPER_PHONEMIZE_ASSETNAME
curl -OL "${PIPER_VOICE_URL}${PIPER_VOICE_NAME}"
curl -OL "${PIPER_VOICE_URL}${PIPER_VOICE_NAME}.json"
PIPER_DIR=`pwd`
install_name_tool -change @rpath/libespeak-ng.1.dylib "${PIPER_DIR}/piper-phonemize/lib/libespeak-ng.1.dylib" "${PIPER_DIR}/piper"
install_name_tool -change @rpath/libonnxruntime.1.14.1.dylib "${PIPER_DIR}/piper-phonemize/lib/libonnxruntime.1.14.1.dylib" "${PIPER_DIR}/piper"
install_name_tool -change @rpath/libpiper_phonemize.1.dylib "${PIPER_DIR}/piper-phonemize/lib/libpiper_phonemize.1.dylib" "${PIPER_DIR}/piper"
fi
cd ../..
fi
# (for dev, reset the ports we were using) # (for dev, reset the ports we were using)
SERVER_PORT=$(echo $SERVER_URL | grep -oE "[0-9]+") SERVER_PORT=$(echo $SERVER_URL | grep -oE "[0-9]+")

@ -4,7 +4,6 @@ Defines a function which takes a path to an audio file and turns it into text.
from datetime import datetime from datetime import datetime
import os import os
import logging
import contextlib import contextlib
import tempfile import tempfile
import ffmpeg import ffmpeg
@ -12,8 +11,9 @@ import subprocess
import openai import openai
from openai import OpenAI from openai import OpenAI
# Configure logging from utils.logs import setup_logging
logging.basicConfig(format='%(message)s', level=logging.getLevelName(os.getenv('DEBUG_LEVEL', 'INFO').upper())) from utils.logs import logger
setup_logging()
client = OpenAI() client = OpenAI()
@ -85,10 +85,10 @@ def stt_wav(wav_file_path: str):
response_format="text" response_format="text"
) )
except openai.BadRequestError as e: except openai.BadRequestError as e:
logging.info(f"openai.BadRequestError: {e}") logger.info(f"openai.BadRequestError: {e}")
return None return None
logging.info(f"Transcription result: {transcript}") logger.info(f"Transcription result: {transcript}")
return transcript return transcript
else: else:
temp_dir = tempfile.gettempdir() temp_dir = tempfile.gettempdir()

@ -7,10 +7,14 @@ from openai import OpenAI
from pydub import AudioSegment from pydub import AudioSegment
from pydub.playback import play from pydub.playback import play
from playsound import playsound from playsound import playsound
import os
import subprocess
import tempfile
client = OpenAI() client = OpenAI()
def tts(text, play_audio): def tts(text, play_audio):
if os.getenv('ALL_LOCAL') == 'False':
response = client.audio.speech.create( response = client.audio.speech.create(
model="tts-1", model="tts-1",
voice="alloy", voice="alloy",
@ -24,3 +28,16 @@ def tts(text, play_audio):
playsound(temp_file.name) playsound(temp_file.name)
return temp_file.read() return temp_file.read()
else:
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
output_file = temp_file.name
piper_dir = os.path.join(os.path.dirname(__file__), 'local_tts', 'piper')
subprocess.run([
os.path.join(piper_dir, 'piper'),
'--model', os.path.join(piper_dir, os.getenv('PIPER_VOICE_NAME')),
'--output_file', output_file
], input=text, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if play_audio:
playsound(temp_file.name)
return temp_file.read()

@ -1,11 +1,10 @@
import asyncio import asyncio
import subprocess import subprocess
import platform import platform
import os
import logging
# Configure logging from utils.logs import setup_logging
logging.basicConfig(format='%(message)s', level=logging.getLevelName(os.getenv('DEBUG_LEVEL', 'INFO').upper())) from utils.logs import logger
setup_logging()
def get_kernel_messages(): def get_kernel_messages():
""" """
@ -21,7 +20,7 @@ def get_kernel_messages():
with open('/var/log/dmesg', 'r') as file: with open('/var/log/dmesg', 'r') as file:
return file.read() return file.read()
else: else:
logging.info("Unsupported platform.") logger.info("Unsupported platform.")
def custom_filter(message): def custom_filter(message):
# Check for {TO_INTERPRETER{ message here }TO_INTERPRETER} pattern # Check for {TO_INTERPRETER{ message here }TO_INTERPRETER} pattern
@ -33,7 +32,7 @@ def custom_filter(message):
elif 'USB' in message: elif 'USB' in message:
return message return message
# Check for network related keywords # Check for network related keywords
elif any(keyword in message for keyword in ['network', 'IP', 'internet', 'LAN', 'WAN', 'router', 'switch']): elif any(keyword in message for keyword in ['network', 'IP', 'internet', 'LAN', 'WAN', 'router', 'switch']) and "networkStatusForFlags" not in message:
return message return message
else: else:
return None return None

@ -0,0 +1,22 @@
import os
import logging
logger: logging.Logger = logging.getLogger("01")
root_logger: logging.Logger = logging.getLogger()
def _basic_config() -> None:
logging.basicConfig(
format="%(message)s"
)
def setup_logging() -> None:
env = os.environ.get("LOG_LEVEL", "").upper()
if env == "DEBUG":
_basic_config()
logger.setLevel(logging.DEBUG)
root_logger.setLevel(logging.DEBUG)
elif env == "INFO":
_basic_config()
logger.setLevel(logging.INFO)

@ -25,6 +25,7 @@ sudo apt-get install portaudio19-dev libav-tools
```bash ```bash
python -m pip install -r requirements.txt python -m pip install -r requirements.txt
``` ```
NB: Depending on your local Python version, you may run into [this issue↗](https://github.com/TaylorSMarks/playsound/issues/150) installing playsound. Workarounds are provided in the issue.
If you want to run local speech-to-text from whisper, download the GGML Whisper model from [Huggingface](https://huggingface.co/ggerganov/whisper.cpp). Then in `OS/01/start.sh`, set `ALL_LOCAL=TRUE` and set `WHISPER_MODEL_PATH` to the path of the model. If you want to run local speech-to-text from whisper, download the GGML Whisper model from [Huggingface](https://huggingface.co/ggerganov/whisper.cpp). Then in `OS/01/start.sh`, set `ALL_LOCAL=TRUE` and set `WHISPER_MODEL_PATH` to the path of the model.

@ -0,0 +1,22 @@
# Development Setup for Jetson Nano
1. Go through the tutorial here: https://developer.nvidia.com/embedded/learn/get-started-jetson-nano-devkit#intro
2. At the end of that guide, you should have a Jetson running off a power supply or micro USB.
3. Get network connectivity. The Jetson does not have a WiFi module so you will need to plug in ethernet.
If you have a laptop, you can share internet access over Ethernet.
To do this with Mac, do the following:
a. Plug a cable from the Jetson Ethernet port to your Mac (you can use a Ethernet -> USB converter for your Mac).
b. Go to General->Sharing, then click the little `(i)` icon next to "Internet Sharing", and check all the options.
![](mac-share-internet.png)
c. Go back to General->Sharing, and turn on "Internet Sharing".
![](mac-share-internet-v2.png)
d. Now the Jetson should have connectivity!

Binary file not shown.

After

Width:  |  Height:  |  Size: 470 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 702 KiB

Loading…
Cancel
Save