@ -25,6 +25,7 @@ from datetime import datetime
import cv2
import cv2
import base64
import base64
from interpreter import interpreter # Just for code execution. Maybe we should let people do from interpreter.computer import run?
from interpreter import interpreter # Just for code execution. Maybe we should let people do from interpreter.computer import run?
# In the future, I guess kernel watching code should be elsewhere? Somewhere server / client agnostic?
from . . server . utils . kernel import put_kernel_messages_into_queue
from . . server . utils . kernel import put_kernel_messages_into_queue
from . . server . utils . get_system_info import get_system_info
from . . server . utils . get_system_info import get_system_info
from . . server . stt . stt import stt_wav
from . . server . stt . stt import stt_wav
@ -33,6 +34,11 @@ from ..server.utils.logs import setup_logging
from . . server . utils . logs import logger
from . . server . utils . logs import logger
setup_logging ( )
setup_logging ( )
from . . utils . accumulator import Accumulator
accumulator = Accumulator ( )
# Configuration for Audio Recording
# Configuration for Audio Recording
CHUNK = 1024 # Record in chunks of 1024 samples
CHUNK = 1024 # Record in chunks of 1024 samples
FORMAT = pyaudio . paInt16 # 16 bits per sample
FORMAT = pyaudio . paInt16 # 16 bits per sample
@ -52,14 +58,13 @@ current_platform = get_system_info()
# Initialize PyAudio
# Initialize PyAudio
p = pyaudio . PyAudio ( )
p = pyaudio . PyAudio ( )
import asyncio
send_queue = queue . Queue ( )
send_queue = queue . Queue ( )
class Device :
class Device :
def __init__ ( self ) :
def __init__ ( self ) :
self . pressed_keys = set ( )
self . pressed_keys = set ( )
self . captured_images = [ ]
self . captured_images = [ ]
self . audiosegments = [ ]
def fetch_image_from_camera ( self , camera_index = CAMERA_DEVICE_INDEX ) :
def fetch_image_from_camera ( self , camera_index = CAMERA_DEVICE_INDEX ) :
""" Captures an image from the specified camera device and saves it to a temporary file. Adds the image to the captured_images list. """
""" Captures an image from the specified camera device and saves it to a temporary file. Adds the image to the captured_images list. """
@ -114,12 +119,24 @@ class Device:
self . add_image_to_send_queue ( image_path )
self . add_image_to_send_queue ( image_path )
self . captured_images . clear ( ) # Clear the list after sending
self . captured_images . clear ( ) # Clear the list after sending
async def play_audiosegments ( self ) :
""" Plays them sequentially. """
while True :
try :
for audio in self . audiosegments :
play ( audio )
self . audiosegments . remove ( audio )
await asyncio . sleep ( 0.1 )
except :
traceback . print_exc ( )
def record_audio ( self ) :
def record_audio ( self ) :
if os . getenv ( ' STT_RUNNER ' ) == " server " :
if os . getenv ( ' STT_RUNNER ' ) == " server " :
# STT will happen on the server. we're sending audio.
# STT will happen on the server. we're sending audio.
send_queue . put ( { " role " : " user " , " type " : " audio " , " format " : " audio/wav " , " start " : True } )
send_queue . put ( { " role " : " user " , " type " : " audio " , " format " : " bytes. wav" , " start " : True } )
elif os . getenv ( ' STT_RUNNER ' ) == " client " :
elif os . getenv ( ' STT_RUNNER ' ) == " client " :
# STT will happen here, on the client. we're sending text.
# STT will happen here, on the client. we're sending text.
send_queue . put ( { " role " : " user " , " type " : " message " , " start " : True } )
send_queue . put ( { " role " : " user " , " type " : " message " , " start " : True } )
@ -155,8 +172,8 @@ class Device:
send_queue . put ( { " role " : " user " , " type " : " message " , " content " : " stop " } )
send_queue . put ( { " role " : " user " , " type " : " message " , " content " : " stop " } )
send_queue . put ( { " role " : " user " , " type " : " message " , " end " : True } )
send_queue . put ( { " role " : " user " , " type " : " message " , " end " : True } )
else :
else :
send_queue . put ( { " role " : " user " , " type " : " audio " , " format " : " audio/ wav" , " content " : " " } )
send_queue . put ( { " role " : " user " , " type " : " audio " , " format " : " bytes. wav" , " content " : " " } )
send_queue . put ( { " role " : " user " , " type " : " audio " , " format " : " audio/ wav" , " end " : True } )
send_queue . put ( { " role " : " user " , " type " : " audio " , " format " : " bytes. wav" , " end " : True } )
else :
else :
self . queue_all_captured_images ( )
self . queue_all_captured_images ( )
@ -170,9 +187,9 @@ class Device:
with open ( wav_path , ' rb ' ) as audio_file :
with open ( wav_path , ' rb ' ) as audio_file :
byte_data = audio_file . read ( CHUNK )
byte_data = audio_file . read ( CHUNK )
while byte_data :
while byte_data :
send_queue . put ( { " role " : " user " , " type " : " audio " , " format " : " audio/wav " , " content " : str ( byte_data ) } )
send_queue . put ( byte_data )
byte_data = audio_file . read ( CHUNK )
byte_data = audio_file . read ( CHUNK )
send_queue . put ( { " role " : " user " , " type " : " audio " , " format " : " audio/ wav" , " end " : True } )
send_queue . put ( { " role " : " user " , " type " : " audio " , " format " : " bytes. wav" , " end " : True } )
if os . path . exists ( wav_path ) :
if os . path . exists ( wav_path ) :
os . remove ( wav_path )
os . remove ( wav_path )
@ -215,8 +232,12 @@ class Device:
async def message_sender ( self , websocket ) :
async def message_sender ( self , websocket ) :
while True :
while True :
message = await asyncio . get_event_loop ( ) . run_in_executor ( None , send_queue . get )
message = await asyncio . get_event_loop ( ) . run_in_executor ( None , send_queue . get )
await websocket . send ( json . dumps ( message ) )
if isinstance ( message , bytes ) :
await websocket . send ( message )
else :
await websocket . send ( json . dumps ( message ) )
send_queue . task_done ( )
send_queue . task_done ( )
await asyncio . sleep ( 0.01 )
async def websocket_communication ( self , WS_URL ) :
async def websocket_communication ( self , WS_URL ) :
while True :
while True :
@ -229,52 +250,42 @@ class Device:
asyncio . create_task ( self . message_sender ( websocket ) )
asyncio . create_task ( self . message_sender ( websocket ) )
initial_message = { " role " : None , " type " : None , " format " : None , " content " : None }
message_so_far = initial_message
while True :
while True :
message = await websocket . recv ( )
await asyncio . sleep ( 0.01 )
chunk = await websocket . recv ( )
logger . debug ( f " Got this message from the server: { type ( message) } { message } " )
logger . debug ( f " Got this message from the server: { type ( chunk) } { chunk } " )
if type ( message ) == str :
if type ( chunk ) == str :
message = json . loads ( message )
chunk = json . loads ( chunk )
if message . get ( " end " ) :
message = accumulator . accumulate ( chunk )
logger . debug ( f " Complete message from the server: { message_so_far } " )
if message == None :
logger . info ( " \n " )
# Will be None until we have a full message ready
message_so_far = initial_messag e
continu e
if " content " in message :
# At this point, we have our message
print ( message [ ' content ' ] , end = " " , flush = True )
if any ( message_so_far [ key ] != message [ key ] for key in message_so_far if key != " content " ) :
message_so_far = message
else :
message_so_far [ " content " ] + = message [ " content " ]
if message [ " type " ] == " audio " and " content " in message :
if message [ " type " ] == " audio " and message [ " format " ] . startswith ( " bytes " ) :
audio_bytes = bytes ( ast . literal_eval ( message [ " content " ] ) )
# Convert bytes to audio file
# Convert bytes to audio file
audio_file = io . BytesIO ( audio_bytes )
# Format will be bytes.wav or bytes.opus
audio = AudioSegment . from_mp3 ( audio_file )
audio_bytes = io . BytesIO ( message [ " content " ] )
audio = AudioSegment . from_file ( audio_bytes , codec = message [ " format " ] . split ( " . " ) [ 1 ] )
# Play the audio
play ( audio )
await asyncio . sleep ( 1 )
self . audiosegments . append ( audio )
# Run the code if that's the client's job
# Run the code if that's the client's job
if os . getenv ( ' CODE_RUNNER ' ) == " client " :
if os . getenv ( ' CODE_RUNNER ' ) == " client " :
if message [ " type " ] == " code " and " end " in message :
if message [ " type " ] == " code " and " end " in message :
language = message _so_far [ " format " ]
language = message [ " format " ]
code = message _so_far [ " content " ]
code = message [ " content " ]
result = interpreter . computer . run ( language , code )
result = interpreter . computer . run ( language , code )
send_queue . put ( result )
send_queue . put ( result )
except :
except :
# traceback.print_exc( )
traceback . print_exc ( )
logger . info ( f " Connecting to ` { WS_URL } `... " )
logger . info ( f " Connecting to ` { WS_URL } `... " )
await asyncio . sleep ( 2 )
await asyncio . sleep ( 2 )
@ -291,6 +302,7 @@ class Device:
if os . getenv ( ' CODE_RUNNER ' ) == " client " :
if os . getenv ( ' CODE_RUNNER ' ) == " client " :
asyncio . create_task ( put_kernel_messages_into_queue ( send_queue ) )
asyncio . create_task ( put_kernel_messages_into_queue ( send_queue ) )
asyncio . create_task ( self . play_audiosegments ( ) )
# If Raspberry Pi, add the button listener, otherwise use the spacebar
# If Raspberry Pi, add the button listener, otherwise use the spacebar
if current_platform . startswith ( " raspberry-pi " ) :
if current_platform . startswith ( " raspberry-pi " ) :