You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
104 lines
3.3 KiB
104 lines
3.3 KiB
import redis
|
|
import RPi.GPIO as GPIO
|
|
import asyncio
|
|
import websockets
|
|
import sounddevice as sd
|
|
import numpy as np
|
|
import time
|
|
import re
|
|
|
|
def transcribe(audio_chunks):
|
|
pass # (todo)
|
|
|
|
def say(text):
|
|
# This should immediatly stop if button is pressed (if GPIO.input(18))
|
|
pass # (todo)
|
|
|
|
# Connect to button
|
|
GPIO.setmode(GPIO.BCM)
|
|
GPIO.setup(18, GPIO.IN, pull_up_down=GPIO.PUD_UP)
|
|
|
|
# Set the duration and sample rate for the mic
|
|
chunk_duration = 0.5 # seconds
|
|
sample_rate = 44100 # Hz
|
|
|
|
# Set up Redis connection
|
|
r = redis.Redis(host='localhost', port=6379, db=0)
|
|
|
|
# Set up websocket connection
|
|
websocket = websockets.connect('ws://localhost:8765')
|
|
|
|
# This is so we only say() full sentences
|
|
accumulated_text = ""
|
|
def is_full_sentence(text):
|
|
return text.endswith(('.', '!', '?'))
|
|
def split_into_sentences(text):
|
|
return re.split(r'(?<=[.!?])\s+', text)
|
|
|
|
async def send_to_websocket(message):
|
|
async with websocket as ws:
|
|
await ws.send(message)
|
|
|
|
async def check_websocket():
|
|
async with websocket as ws:
|
|
message = await ws.recv()
|
|
return message
|
|
|
|
def main():
|
|
while True:
|
|
|
|
# If the button is pushed down
|
|
if not GPIO.input(18):
|
|
|
|
# Tell websocket and core that the user is speaking
|
|
send_to_websocket({"role": "user", "type": "message", "start": True}) # Standard start flag, required per streaming LMC protocol (https://docs.openinterpreter.com/guides/streaming-response)
|
|
r.rpush('to_core', {"role": "user", "type": "message", "content": "stop"}) # Custom stop message. Core is not streaming LMC (it's static LMC) so doesn't require that ^ flag
|
|
|
|
# Record audio from the microphone in chunks
|
|
audio_chunks = []
|
|
|
|
# Continue recording until the button is released
|
|
while not GPIO.input(18):
|
|
chunk = sd.rec(int(chunk_duration * sample_rate), samplerate=sample_rate, channels=2)
|
|
sd.wait() # Wait until recording is finished
|
|
audio_chunks.append(chunk)
|
|
|
|
# Transcribe
|
|
text = transcribe(audio_chunks)
|
|
|
|
message = {"role": "user", "type": "message", "content": text, "time": time.time()}
|
|
|
|
# Send message to core and websocket
|
|
r.rpush('to_core', message)
|
|
send_to_websocket(message)
|
|
|
|
# Send user message end flag to websocket, required per streaming LMC protocol
|
|
send_to_websocket({"role": "user", "type": "message", "end": True})
|
|
|
|
# Send out anything in the to_interface queue
|
|
chunk = r.lpop('to_interface')
|
|
if chunk:
|
|
send_to_websocket(chunk)
|
|
accumulated_text += chunk["content"]
|
|
|
|
# Speak full sentences out loud
|
|
sentences = split_into_sentences(accumulated_text)
|
|
if is_full_sentence(sentences[-1]):
|
|
for sentence in sentences:
|
|
say(sentence)
|
|
accumulated_text = ""
|
|
else:
|
|
for sentence in sentences[:-1]:
|
|
say(sentence)
|
|
accumulated_text = sentences[-1]
|
|
else:
|
|
say(accumulated_text)
|
|
accumulated_text = ""
|
|
|
|
message = check_websocket()
|
|
if message:
|
|
r.rpush('to_core', message)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|